From 720eaf1e1aabb4301444c0b0185bb2006a0ae6b8 Mon Sep 17 00:00:00 2001 From: yao Date: Sat, 23 Feb 2013 15:19:46 +0800 Subject: [PATCH 1/2] fix the haar kernel problems on Nvidia and Intel OCL --- modules/ocl/src/haar.cpp | 2 +- modules/ocl/src/kernels/haarobjectdetect.cl | 433 ++++++++++---------- 2 files changed, 220 insertions(+), 215 deletions(-) diff --git a/modules/ocl/src/haar.cpp b/modules/ocl/src/haar.cpp index 5c9b75bf52..26e6a40704 100644 --- a/modules/ocl/src/haar.cpp +++ b/modules/ocl/src/haar.cpp @@ -926,7 +926,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS if( gimg.cols < minSize.width || gimg.rows < minSize.height ) CV_Error(CV_StsError, "Image too small"); - if( (flags & CV_HAAR_SCALE_IMAGE) && gimg.clCxt->impl->devName.find("Intel(R) HD Graphics") == string::npos ) + if( (flags & CV_HAAR_SCALE_IMAGE) ) { CvSize winSize0 = cascade->orig_window_size; //float scalefactor = 1.1f; diff --git a/modules/ocl/src/kernels/haarobjectdetect.cl b/modules/ocl/src/kernels/haarobjectdetect.cl index 95cfa63c1f..7835b4bcc5 100644 --- a/modules/ocl/src/kernels/haarobjectdetect.cl +++ b/modules/ocl/src/kernels/haarobjectdetect.cl @@ -9,6 +9,7 @@ // Niko Li, newlife20080214@gmail.com // Wang Weiyan, wangweiyanster@gmail.com // Jia Haipeng, jiahaipeng95@gmail.com +// Nathan, liujun@multicorewareinc.com // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // @@ -47,14 +48,14 @@ typedef float sqsumtype; typedef struct __attribute__((aligned (128))) GpuHidHaarFeature { struct __attribute__((aligned (32))) - { - int p0 __attribute__((aligned (4))); - int p1 __attribute__((aligned (4))); - int p2 __attribute__((aligned (4))); - int p3 __attribute__((aligned (4))); - float weight __attribute__((aligned (4))); - } - rect[CV_HAAR_FEATURE_MAX] __attribute__((aligned (32))); +{ + int p0 __attribute__((aligned (4))); + int p1 __attribute__((aligned (4))); + int p2 __attribute__((aligned (4))); + int p3 __attribute__((aligned (4))); + float weight __attribute__((aligned (4))); +} +rect[CV_HAAR_FEATURE_MAX] __attribute__((aligned (32))); } GpuHidHaarFeature; @@ -108,31 +109,31 @@ typedef struct __attribute__((aligned (64))) GpuHidHaarClassifierCascade int p2 __attribute__((aligned (4))); int p3 __attribute__((aligned (4))); float inv_window_area __attribute__((aligned (4))); -}GpuHidHaarClassifierCascade; +} GpuHidHaarClassifierCascade; __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCascade(//constant GpuHidHaarClassifierCascade * cascade, - global GpuHidHaarStageClassifier * stagecascadeptr, - global int4 * info, - global GpuHidHaarTreeNode * nodeptr, - global const int * restrict sum1, - global const float * restrict sqsum1, - global int4 * candidate, - const int pixelstep, - const int loopcount, - const int start_stage, - const int split_stage, - const int end_stage, - const int startnode, - const int splitnode, - const int4 p, - const int4 pq, - const float correction - //const int width, - //const int height, - //const int grpnumperline, - //const int totalgrp - ) + global GpuHidHaarStageClassifier * stagecascadeptr, + global int4 * info, + global GpuHidHaarTreeNode * nodeptr, + global const int * restrict sum1, + global const float * restrict sqsum1, + global int4 * candidate, + const int pixelstep, + const int loopcount, + const int start_stage, + const int split_stage, + const int end_stage, + const int startnode, + const int splitnode, + const int4 p, + const int4 pq, + const float correction + //const int width, + //const int height, + //const int grpnumperline, + //const int totalgrp +) { int grpszx = get_local_size(0); int grpszy = get_local_size(1); @@ -184,7 +185,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa __global const int * sum = sum1 + imgoff; __global const float * sqsum = sqsum1 + imgoff; - for(int grploop=grpidx;grploop=0.f ? sqrt(variance_norm_factor) : 1.f; @@ -270,19 +271,19 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa info2.z +=lcl_off; float classsum = (lcldata[mad24(info1.y,readwidth,info1.x)] - lcldata[mad24(info1.y,readwidth,info1.z)] - - lcldata[mad24(info1.w,readwidth,info1.x)] + lcldata[mad24(info1.w,readwidth,info1.z)]) * w.x; + lcldata[mad24(info1.w,readwidth,info1.x)] + lcldata[mad24(info1.w,readwidth,info1.z)]) * w.x; classsum += (lcldata[mad24(info2.y,readwidth,info2.x)] - lcldata[mad24(info2.y,readwidth,info2.z)] - - lcldata[mad24(info2.w,readwidth,info2.x)] + lcldata[mad24(info2.w,readwidth,info2.z)]) * w.y; + lcldata[mad24(info2.w,readwidth,info2.x)] + lcldata[mad24(info2.w,readwidth,info2.z)]) * w.y; //if((info3.z - info3.x) && (!stageinfo.z)) //{ - info3.x +=lcl_off; - info3.z +=lcl_off; - classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] - - lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z; + info3.x +=lcl_off; + info3.z +=lcl_off; + classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] - + lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z; //} stage_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x; nodecounter++; @@ -299,12 +300,13 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa } barrier(CLK_LOCAL_MEM_FENCE); int queuecount = lclcount[0]; + barrier(CLK_LOCAL_MEM_FENCE); nodecounter = splitnode; - for(int stageloop = split_stage; stageloop< end_stage && queuecount>0;stageloop++) + for(int stageloop = split_stage; stageloop< end_stage && queuecount>0; stageloop++) { - //barrier(CLK_LOCAL_MEM_FENCE); + //barrier(CLK_LOCAL_MEM_FENCE); //if(lcl_id == 0) - lclcount[0]=0; + lclcount[0]=0; barrier(CLK_LOCAL_MEM_FENCE); int2 stageinfo = *(global int2*)(stagecascadeptr+stageloop); @@ -316,70 +318,73 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa int lcl_compute_win_id = (lcl_id >>(6-perfscale)); int lcl_loops = (stageinfo.x + lcl_compute_win -1) >> (6-perfscale); int lcl_compute_id = lcl_id - (lcl_compute_win_id << (6-perfscale)); - for(int queueloop=0;queueloop>16),readwidth,temp_coord & 0xffff); - //barrier(CLK_LOCAL_MEM_FENCE); - if(lcl_compute_win_id < queuecount) { - - int tempnodecounter = lcl_compute_id; - float part_sum = 0.f; - for(int lcl_loop=0;lcl_loopp[0][0])); - int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0])); - int4 info3 = *(__global int4*)(&(currentnodeptr->p[2][0])); - float4 w = *(__global float4*)(&(currentnodeptr->weight[0])); - float2 alpha2 = *(__global float2*)(&(currentnodeptr->alpha[0])); - float nodethreshold = w.w * variance_norm_factor; + int tempnodecounter = lcl_compute_id; + float part_sum = 0.f; + for(int lcl_loop=0; lcl_loopp[0][0])); + int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0])); + int4 info3 = *(__global int4*)(&(currentnodeptr->p[2][0])); + float4 w = *(__global float4*)(&(currentnodeptr->weight[0])); + float2 alpha2 = *(__global float2*)(&(currentnodeptr->alpha[0])); + float nodethreshold = w.w * variance_norm_factor; - float classsum = (lcldata[mad24(info1.y,readwidth,info1.x)] - lcldata[mad24(info1.y,readwidth,info1.z)] - - lcldata[mad24(info1.w,readwidth,info1.x)] + lcldata[mad24(info1.w,readwidth,info1.z)]) * w.x; + info1.x +=queue_pixel; + info1.z +=queue_pixel; + info2.x +=queue_pixel; + info2.z +=queue_pixel; + + float classsum = (lcldata[mad24(info1.y,readwidth,info1.x)] - lcldata[mad24(info1.y,readwidth,info1.z)] - + lcldata[mad24(info1.w,readwidth,info1.x)] + lcldata[mad24(info1.w,readwidth,info1.z)]) * w.x; - classsum += (lcldata[mad24(info2.y,readwidth,info2.x)] - lcldata[mad24(info2.y,readwidth,info2.z)] - - lcldata[mad24(info2.w,readwidth,info2.x)] + lcldata[mad24(info2.w,readwidth,info2.z)]) * w.y; - //if((info3.z - info3.x) && (!stageinfo.z)) - //{ + classsum += (lcldata[mad24(info2.y,readwidth,info2.x)] - lcldata[mad24(info2.y,readwidth,info2.z)] - + lcldata[mad24(info2.w,readwidth,info2.x)] + lcldata[mad24(info2.w,readwidth,info2.z)]) * w.y; + //if((info3.z - info3.x) && (!stageinfo.z)) + //{ info3.x +=queue_pixel; info3.z +=queue_pixel; classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] - - lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z; - //} - part_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x; - tempnodecounter +=lcl_compute_win; - }//end for(int lcl_loop=0;lcl_loop= nodethreshold ? alpha2.y : alpha2.x; + tempnodecounter +=lcl_compute_win; + }//end for(int lcl_loop=0;lcl_loop= stagethreshold && (lcl_compute_id==0)) + { + int queueindex = atomic_inc(lclcount); + lcloutindex[queueindex<<1] = temp_coord; + lcloutindex[(queueindex<<1)+1] = as_int(variance_norm_factor); + } + lcl_compute_win_id +=(1<= stagethreshold && (lcl_compute_id==0)) - { - int queueindex = atomic_inc(lclcount); - lcloutindex[queueindex<<1] = temp_coord; - lcloutindex[(queueindex<<1)+1] = as_int(variance_norm_factor); - } - lcl_compute_win_id +=(1<0;stageloop++) //barrier(CLK_LOCAL_MEM_FENCE); @@ -420,139 +425,139 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa - /* - if(stagecascade->two_rects) - { - #pragma unroll - for( n = 0; n < stagecascade->count; n++ ) - { - t1 = *(node + counter); - t = t1.threshold * variance_norm_factor; - classsum = calc_sum1(t1,p_offset,0) * t1.weight[0]; - - classsum += calc_sum1(t1, p_offset,1) * t1.weight[1]; - stage_sum += classsum >= t ? t1.alpha[1]:t1.alpha[0]; - - counter++; - } - } - else - { - #pragma unroll - for( n = 0; n < stagecascade->count; n++ ) - { - t = node[counter].threshold*variance_norm_factor; - classsum = calc_sum1(node[counter],p_offset,0) * node[counter].weight[0]; - classsum += calc_sum1(node[counter],p_offset,1) * node[counter].weight[1]; - - if( node[counter].p0[2] ) - classsum += calc_sum1(node[counter],p_offset,2) * node[counter].weight[2]; - - stage_sum += classsum >= t ? node[counter].alpha[1]:node[counter].alpha[0];// modify - - counter++; - } - } - */ - /* -__kernel void gpuRunHaarClassifierCascade_ScaleWindow( - constant GpuHidHaarClassifierCascade * _cascade, - global GpuHidHaarStageClassifier * stagecascadeptr, - //global GpuHidHaarClassifier * classifierptr, - global GpuHidHaarTreeNode * nodeptr, - global int * sum, - global float * sqsum, - global int * _candidate, - int pixel_step, - int cols, - int rows, - int start_stage, - int end_stage, - //int counts, - int nodenum, - int ystep, - int detect_width, - //int detect_height, - int loopcount, - int outputstep) - //float scalefactor) +/* +if(stagecascade->two_rects) { - unsigned int x1 = get_global_id(0); - unsigned int y1 = get_global_id(1); - int p_offset; - int m, n; - int result; - int counter; - float mean, variance_norm_factor; - for(int i=0;icount; n++ ) { - constant GpuHidHaarClassifierCascade * cascade = _cascade + i; - global int * candidate = _candidate + i*outputstep; - int window_width = cascade->p1 - cascade->p0; - int window_height = window_width; - result = 1; - counter = 0; - unsigned int x = mul24(x1,ystep); - unsigned int y = mul24(y1,ystep); - if((x < cols - window_width - 1) && (y < rows - window_height -1)) - { - global GpuHidHaarStageClassifier *stagecascade = stagecascadeptr +cascade->count*i+ start_stage; - //global GpuHidHaarClassifier *classifier = classifierptr; - global GpuHidHaarTreeNode *node = nodeptr + nodenum*i; + t1 = *(node + counter); + t = t1.threshold * variance_norm_factor; + classsum = calc_sum1(t1,p_offset,0) * t1.weight[0]; - p_offset = mad24(y, pixel_step, x);// modify + classsum += calc_sum1(t1, p_offset,1) * t1.weight[1]; + stage_sum += classsum >= t ? t1.alpha[1]:t1.alpha[0]; - mean = (*(sum + p_offset + (int)cascade->p0) - *(sum + p_offset + (int)cascade->p1) - - *(sum + p_offset + (int)cascade->p2) + *(sum + p_offset + (int)cascade->p3)) - *cascade->inv_window_area; - - variance_norm_factor = *(sqsum + p_offset + cascade->p0) - *(sqsum + cascade->p1 + p_offset) - - *(sqsum + p_offset + cascade->p2) + *(sqsum + cascade->p3 + p_offset); - variance_norm_factor = variance_norm_factor * cascade->inv_window_area - mean * mean; - variance_norm_factor = variance_norm_factor >=0.f ? sqrt(variance_norm_factor) : 1;//modify - - // if( cascade->is_stump_based ) - //{ - for( m = start_stage; m < end_stage; m++ ) - { - float stage_sum = 0.f; - float t, classsum; - GpuHidHaarTreeNode t1; - - //#pragma unroll - for( n = 0; n < stagecascade->count; n++ ) - { - t1 = *(node + counter); - t = t1.threshold * variance_norm_factor; - classsum = calc_sum1(t1, p_offset ,0) * t1.weight[0] + calc_sum1(t1, p_offset ,1) * t1.weight[1]; - - if((t1.p0[2]) && (!stagecascade->two_rects)) - classsum += calc_sum1(t1, p_offset, 2) * t1.weight[2]; - - stage_sum += classsum >= t ? t1.alpha[1] : t1.alpha[0];// modify - counter++; - } - - if (stage_sum < stagecascade->threshold) - { - result = 0; - break; - } - - stagecascade++; - - } - if(result) - { - candidate[4 * (y1 * detect_width + x1)] = x; - candidate[4 * (y1 * detect_width + x1) + 1] = y; - candidate[4 * (y1 * detect_width + x1)+2] = window_width; - candidate[4 * (y1 * detect_width + x1) + 3] = window_height; - } - //} - } + counter++; } } +else +{ + #pragma unroll + for( n = 0; n < stagecascade->count; n++ ) + { + t = node[counter].threshold*variance_norm_factor; + classsum = calc_sum1(node[counter],p_offset,0) * node[counter].weight[0]; + classsum += calc_sum1(node[counter],p_offset,1) * node[counter].weight[1]; + + if( node[counter].p0[2] ) + classsum += calc_sum1(node[counter],p_offset,2) * node[counter].weight[2]; + + stage_sum += classsum >= t ? node[counter].alpha[1]:node[counter].alpha[0];// modify + + counter++; + } +} +*/ +/* +__kernel void gpuRunHaarClassifierCascade_ScaleWindow( + constant GpuHidHaarClassifierCascade * _cascade, + global GpuHidHaarStageClassifier * stagecascadeptr, + //global GpuHidHaarClassifier * classifierptr, + global GpuHidHaarTreeNode * nodeptr, + global int * sum, + global float * sqsum, + global int * _candidate, + int pixel_step, + int cols, + int rows, + int start_stage, + int end_stage, + //int counts, + int nodenum, + int ystep, + int detect_width, + //int detect_height, + int loopcount, + int outputstep) + //float scalefactor) +{ +unsigned int x1 = get_global_id(0); +unsigned int y1 = get_global_id(1); +int p_offset; +int m, n; +int result; +int counter; +float mean, variance_norm_factor; +for(int i=0;ip1 - cascade->p0; +int window_height = window_width; +result = 1; +counter = 0; +unsigned int x = mul24(x1,ystep); +unsigned int y = mul24(y1,ystep); +if((x < cols - window_width - 1) && (y < rows - window_height -1)) +{ +global GpuHidHaarStageClassifier *stagecascade = stagecascadeptr +cascade->count*i+ start_stage; +//global GpuHidHaarClassifier *classifier = classifierptr; +global GpuHidHaarTreeNode *node = nodeptr + nodenum*i; + +p_offset = mad24(y, pixel_step, x);// modify + +mean = (*(sum + p_offset + (int)cascade->p0) - *(sum + p_offset + (int)cascade->p1) - + *(sum + p_offset + (int)cascade->p2) + *(sum + p_offset + (int)cascade->p3)) + *cascade->inv_window_area; + +variance_norm_factor = *(sqsum + p_offset + cascade->p0) - *(sqsum + cascade->p1 + p_offset) - + *(sqsum + p_offset + cascade->p2) + *(sqsum + cascade->p3 + p_offset); +variance_norm_factor = variance_norm_factor * cascade->inv_window_area - mean * mean; +variance_norm_factor = variance_norm_factor >=0.f ? sqrt(variance_norm_factor) : 1;//modify + +// if( cascade->is_stump_based ) +//{ +for( m = start_stage; m < end_stage; m++ ) +{ +float stage_sum = 0.f; +float t, classsum; +GpuHidHaarTreeNode t1; + +//#pragma unroll +for( n = 0; n < stagecascade->count; n++ ) +{ + t1 = *(node + counter); + t = t1.threshold * variance_norm_factor; + classsum = calc_sum1(t1, p_offset ,0) * t1.weight[0] + calc_sum1(t1, p_offset ,1) * t1.weight[1]; + + if((t1.p0[2]) && (!stagecascade->two_rects)) + classsum += calc_sum1(t1, p_offset, 2) * t1.weight[2]; + + stage_sum += classsum >= t ? t1.alpha[1] : t1.alpha[0];// modify + counter++; +} + +if (stage_sum < stagecascade->threshold) +{ + result = 0; + break; +} + +stagecascade++; + +} +if(result) +{ + candidate[4 * (y1 * detect_width + x1)] = x; + candidate[4 * (y1 * detect_width + x1) + 1] = y; + candidate[4 * (y1 * detect_width + x1)+2] = window_width; + candidate[4 * (y1 * detect_width + x1) + 3] = window_height; +} +//} +} +} +} */ From 9bbf170054ba9017459858e711c5b51b625d76e4 Mon Sep 17 00:00:00 2001 From: yao Date: Sat, 23 Feb 2013 15:34:43 +0800 Subject: [PATCH 2/2] some cleanup --- modules/ocl/src/haar.cpp | 1252 +------------------------------------- 1 file changed, 2 insertions(+), 1250 deletions(-) diff --git a/modules/ocl/src/haar.cpp b/modules/ocl/src/haar.cpp index 26e6a40704..506dc6b0c4 100644 --- a/modules/ocl/src/haar.cpp +++ b/modules/ocl/src/haar.cpp @@ -47,15 +47,10 @@ // //M*/ -/* Haar features calculation */ -//#define EMU - #include "precomp.hpp" #include #include -#ifdef EMU -#include "runCL.h" -#endif + using namespace cv; using namespace cv::ocl; using namespace std; @@ -1114,30 +1109,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS args.push_back ( make_pair(sizeof(cl_int4) , (void *)&p )); args.push_back ( make_pair(sizeof(cl_int4) , (void *)&pq )); args.push_back ( make_pair(sizeof(cl_float) , (void *)&correction )); - /* - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&stagebuffer)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&scaleinfobuffer)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&nodebuffer)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsum.data)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsqsum.data)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&candidatebuffer)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&pixelstep)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&loopcount)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startstage)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitstage)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&endstage)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startnode)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitnode)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int4), (void *)&p)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int4), (void *)&pq)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_float), (void *)&correction));*/ - //openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&n)); - //openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&grpnumperline)); - //openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&totalgrp)); - // openCLSafeCall(clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL)); - - // openCLSafeCall(clFinish(gsum.clCxt->impl->clCmdQueue)); openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1); //t = (double)cvGetTickCount() - t; //printf( "detection time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) ); @@ -1258,13 +1230,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS scaleinfo[i].factor = factor; int startnodenum = nodenum * i; float factor2 = (float)factor; - /* - openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_mem), (void *)&nodebuffer)); - openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_mem), (void *)&newnodebuffer)); - openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_float), (void *)&factor2)); - openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_float), (void *)&correction[i])); - openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_int), (void *)&startnodenum)); - */ vector > args1; args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&nodebuffer )); @@ -1298,22 +1263,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS correctionbuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(cl_float) * loopcount); openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, correctionbuffer, 1, 0, sizeof(cl_float)*loopcount, correction, 0, NULL, NULL)); //int argcount = 0; - /*openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&stagebuffer)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&scaleinfobuffer)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&newnodebuffer)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsum.data)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsqsum.data)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&candidatebuffer)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&step)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&loopcount)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startstage)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitstage)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&endstage)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startnode)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitnode)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&pbuffer)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&correctionbuffer)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&nodenum));*/ vector > args; args.push_back ( make_pair(sizeof(cl_mem) , (void *)&stagebuffer )); @@ -1335,8 +1284,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1); - //openCLSafeCall(clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL)); - //openCLSafeCall(clFinish(gsum.clCxt->impl->clCmdQueue)); //openCLSafeCall(clEnqueueReadBuffer(gsum.clCxt->clCmdQueue,candidatebuffer,1,0,4*sizeof(int)*outputsz,candidate,0,NULL,NULL)); candidate = (int *)clEnqueueMapBuffer(gsum.clCxt->impl->clCmdQueue, candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int), 0, 0, 0, &status); @@ -1407,204 +1354,10 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS } -// static CvHaarClassifierCascade * gpuLoadCascadeCART( const char **input_cascade, int n, CvSize orig_window_size ) -// { -// int i; -// CvHaarClassifierCascade *cascade = gpuCreateHaarClassifierCascade(n); -// cascade->orig_window_size = orig_window_size; - -// for( i = 0; i < n; i++ ) -// { -// int j, count, l; -// float threshold = 0; -// const char *stage = input_cascade[i]; -// int dl = 0; - -// /* tree links */ -// int parent = -1; -// int next = -1; - -// sscanf( stage, "%d%n", &count, &dl ); -// stage += dl; - -// assert( count > 0 ); -// cascade->stage_classifier[i].count = count; -// cascade->stage_classifier[i].classifier = -// (CvHaarClassifier *)cvAlloc( count * sizeof(cascade->stage_classifier[i].classifier[0])); - -// for( j = 0; j < count; j++ ) -// { -// CvHaarClassifier *classifier = cascade->stage_classifier[i].classifier + j; -// int k, rects = 0; -// char str[100]; - -// sscanf( stage, "%d%n", &classifier->count, &dl ); -// stage += dl; - -// classifier->haar_feature = (CvHaarFeature *) cvAlloc( -// classifier->count * ( sizeof( *classifier->haar_feature ) + -// sizeof( *classifier->threshold ) + -// sizeof( *classifier->left ) + -// sizeof( *classifier->right ) ) + -// (classifier->count + 1) * sizeof( *classifier->alpha ) ); -// classifier->threshold = (float *) (classifier->haar_feature + classifier->count); -// classifier->left = (int *) (classifier->threshold + classifier->count); -// classifier->right = (int *) (classifier->left + classifier->count); -// classifier->alpha = (float *) (classifier->right + classifier->count); - -// for( l = 0; l < classifier->count; l++ ) -// { -// sscanf( stage, "%d%n", &rects, &dl ); -// stage += dl; - -// assert( rects >= 2 && rects <= CV_HAAR_FEATURE_MAX ); - -// for( k = 0; k < rects; k++ ) -// { -// CvRect r; -// int band = 0; -// sscanf( stage, "%d%d%d%d%d%f%n", -// &r.x, &r.y, &r.width, &r.height, &band, -// &(classifier->haar_feature[l].rect[k].weight), &dl ); -// stage += dl; -// classifier->haar_feature[l].rect[k].r = r; -// } -// sscanf( stage, "%s%n", str, &dl ); -// stage += dl; - -// classifier->haar_feature[l].tilted = strncmp( str, "tilted", 6 ) == 0; - -// for( k = rects; k < CV_HAAR_FEATURE_MAX; k++ ) -// { -// memset( classifier->haar_feature[l].rect + k, 0, -// sizeof(classifier->haar_feature[l].rect[k]) ); -// } - -// sscanf( stage, "%f%d%d%n", &(classifier->threshold[l]), -// &(classifier->left[l]), -// &(classifier->right[l]), &dl ); -// stage += dl; -// } -// for( l = 0; l <= classifier->count; l++ ) -// { -// sscanf( stage, "%f%n", &(classifier->alpha[l]), &dl ); -// stage += dl; -// } -// } - -// sscanf( stage, "%f%n", &threshold, &dl ); -// stage += dl; - -// cascade->stage_classifier[i].threshold = threshold; - -// /* load tree links */ -// if( sscanf( stage, "%d%d%n", &parent, &next, &dl ) != 2 ) -// { -// parent = i - 1; -// next = -1; -// } -// stage += dl; - -// cascade->stage_classifier[i].parent = parent; -// cascade->stage_classifier[i].next = next; -// cascade->stage_classifier[i].child = -1; - -// if( parent != -1 && cascade->stage_classifier[parent].child == -1 ) -// { -// cascade->stage_classifier[parent].child = i; -// } -// } - -// return cascade; -// } - #ifndef _MAX_PATH #define _MAX_PATH 1024 #endif -// static CvHaarClassifierCascade * gpuLoadHaarClassifierCascade( const char *directory, CvSize orig_window_size ) -// { -// const char **input_cascade = 0; -// CvHaarClassifierCascade *cascade = 0; - -// int i, n; -// const char *slash; -// char name[_MAX_PATH]; -// int size = 0; -// char *ptr = 0; - -// if( !directory ) -// CV_Error( CV_StsNullPtr, "Null path is passed" ); - -// n = (int)strlen(directory) - 1; -// slash = directory[n] == '\\' || directory[n] == '/' ? "" : "/"; - -// /* try to read the classifier from directory */ -// for( n = 0; ; n++ ) -// { -// sprintf( name, "%s%s%d/AdaBoostCARTHaarClassifier.txt", directory, slash, n ); -// FILE *f = fopen( name, "rb" ); -// if( !f ) -// break; -// fseek( f, 0, SEEK_END ); -// size += ftell( f ) + 1; -// fclose(f); -// } - -// if( n == 0 && slash[0] ) -// return (CvHaarClassifierCascade *)cvLoad( directory ); - -// if( n == 0 ) -// CV_Error( CV_StsBadArg, "Invalid path" ); - -// size += (n + 1) * sizeof(char *); -// input_cascade = (const char **)cvAlloc( size ); -// ptr = (char *)(input_cascade + n + 1); - -// for( i = 0; i < n; i++ ) -// { -// sprintf( name, "%s/%d/AdaBoostCARTHaarClassifier.txt", directory, i ); -// FILE *f = fopen( name, "rb" ); -// if( !f ) -// CV_Error( CV_StsError, "" ); -// fseek( f, 0, SEEK_END ); -// size = ftell( f ); -// fseek( f, 0, SEEK_SET ); -// CV_Assert((size_t)size == fread( ptr, 1, size, f )); -// fclose(f); -// input_cascade[i] = ptr; -// ptr += size; -// *ptr++ = '\0'; -// } - -// input_cascade[n] = 0; -// cascade = gpuLoadCascadeCART( input_cascade, n, orig_window_size ); - -// if( input_cascade ) -// cvFree( &input_cascade ); - -// return cascade; -// } - - -// static void gpuReleaseHaarClassifierCascade( CvHaarClassifierCascade **_cascade ) -// { -// if( _cascade && *_cascade ) -// { -// int i, j; -// CvHaarClassifierCascade *cascade = *_cascade; - -// for( i = 0; i < cascade->count; i++ ) -// { -// for( j = 0; j < cascade->stage_classifier[i].count; j++ ) -// cvFree( &cascade->stage_classifier[i].classifier[j].haar_feature ); -// cvFree( &cascade->stage_classifier[i].classifier ); -// } -// gpuReleaseHidHaarClassifierCascade( (GpuHidHaarClassifierCascade **)&cascade->hid_cascade ); -// cvFree( _cascade ); -// } -// } - /****************************************************************************************\ * Persistence functions * @@ -1627,937 +1380,11 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS #define ICV_HAAR_PARENT_NAME "parent" #define ICV_HAAR_NEXT_NAME "next" -// static int gpuIsHaarClassifier( const void *struct_ptr ) -// { -// return CV_IS_HAAR_CLASSIFIER( struct_ptr ); -// } - -// static void * gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node ) -// { -// CvHaarClassifierCascade *cascade = NULL; - -// char buf[256]; -// CvFileNode *seq_fn = NULL; /* sequence */ -// CvFileNode *fn = NULL; -// CvFileNode *stages_fn = NULL; -// CvSeqReader stages_reader; -// int n; -// int i, j, k, l; -// int parent, next; - -// stages_fn = cvGetFileNodeByName( fs, node, ICV_HAAR_STAGES_NAME ); -// if( !stages_fn || !CV_NODE_IS_SEQ( stages_fn->tag) ) -// CV_Error( CV_StsError, "Invalid stages node" ); - -// n = stages_fn->data.seq->total; -// cascade = gpuCreateHaarClassifierCascade(n); - -// /* read size */ -// seq_fn = cvGetFileNodeByName( fs, node, ICV_HAAR_SIZE_NAME ); -// if( !seq_fn || !CV_NODE_IS_SEQ( seq_fn->tag ) || seq_fn->data.seq->total != 2 ) -// CV_Error( CV_StsError, "size node is not a valid sequence." ); -// fn = (CvFileNode *) cvGetSeqElem( seq_fn->data.seq, 0 ); -// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0 ) -// CV_Error( CV_StsError, "Invalid size node: width must be positive integer" ); -// cascade->orig_window_size.width = fn->data.i; -// fn = (CvFileNode *) cvGetSeqElem( seq_fn->data.seq, 1 ); -// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0 ) -// CV_Error( CV_StsError, "Invalid size node: height must be positive integer" ); -// cascade->orig_window_size.height = fn->data.i; - -// cvStartReadSeq( stages_fn->data.seq, &stages_reader ); -// for( i = 0; i < n; ++i ) -// { -// CvFileNode *stage_fn; -// CvFileNode *trees_fn; -// CvSeqReader trees_reader; - -// stage_fn = (CvFileNode *) stages_reader.ptr; -// if( !CV_NODE_IS_MAP( stage_fn->tag ) ) -// { -// sprintf( buf, "Invalid stage %d", i ); -// CV_Error( CV_StsError, buf ); -// } - -// trees_fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_TREES_NAME ); -// if( !trees_fn || !CV_NODE_IS_SEQ( trees_fn->tag ) -// || trees_fn->data.seq->total <= 0 ) -// { -// sprintf( buf, "Trees node is not a valid sequence. (stage %d)", i ); -// CV_Error( CV_StsError, buf ); -// } - -// cascade->stage_classifier[i].classifier = -// (CvHaarClassifier *) cvAlloc( trees_fn->data.seq->total -// * sizeof( cascade->stage_classifier[i].classifier[0] ) ); -// for( j = 0; j < trees_fn->data.seq->total; ++j ) -// { -// cascade->stage_classifier[i].classifier[j].haar_feature = NULL; -// } -// cascade->stage_classifier[i].count = trees_fn->data.seq->total; - -// cvStartReadSeq( trees_fn->data.seq, &trees_reader ); -// for( j = 0; j < trees_fn->data.seq->total; ++j ) -// { -// CvFileNode *tree_fn; -// CvSeqReader tree_reader; -// CvHaarClassifier *classifier; -// int last_idx; - -// classifier = &cascade->stage_classifier[i].classifier[j]; -// tree_fn = (CvFileNode *) trees_reader.ptr; -// if( !CV_NODE_IS_SEQ( tree_fn->tag ) || tree_fn->data.seq->total <= 0 ) -// { -// sprintf( buf, "Tree node is not a valid sequence." -// " (stage %d, tree %d)", i, j ); -// CV_Error( CV_StsError, buf ); -// } - -// classifier->count = tree_fn->data.seq->total; -// classifier->haar_feature = (CvHaarFeature *) cvAlloc( -// classifier->count * ( sizeof( *classifier->haar_feature ) + -// sizeof( *classifier->threshold ) + -// sizeof( *classifier->left ) + -// sizeof( *classifier->right ) ) + -// (classifier->count + 1) * sizeof( *classifier->alpha ) ); -// classifier->threshold = (float *) (classifier->haar_feature + classifier->count); -// classifier->left = (int *) (classifier->threshold + classifier->count); -// classifier->right = (int *) (classifier->left + classifier->count); -// classifier->alpha = (float *) (classifier->right + classifier->count); - -// cvStartReadSeq( tree_fn->data.seq, &tree_reader ); -// for( k = 0, last_idx = 0; k < tree_fn->data.seq->total; ++k ) -// { -// CvFileNode *node_fn; -// CvFileNode *feature_fn; -// CvFileNode *rects_fn; -// CvSeqReader rects_reader; - -// node_fn = (CvFileNode *) tree_reader.ptr; -// if( !CV_NODE_IS_MAP( node_fn->tag ) ) -// { -// sprintf( buf, "Tree node %d is not a valid map. (stage %d, tree %d)", -// k, i, j ); -// CV_Error( CV_StsError, buf ); -// } -// feature_fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_FEATURE_NAME ); -// if( !feature_fn || !CV_NODE_IS_MAP( feature_fn->tag ) ) -// { -// sprintf( buf, "Feature node is not a valid map. " -// "(stage %d, tree %d, node %d)", i, j, k ); -// CV_Error( CV_StsError, buf ); -// } -// rects_fn = cvGetFileNodeByName( fs, feature_fn, ICV_HAAR_RECTS_NAME ); -// if( !rects_fn || !CV_NODE_IS_SEQ( rects_fn->tag ) -// || rects_fn->data.seq->total < 1 -// || rects_fn->data.seq->total > CV_HAAR_FEATURE_MAX ) -// { -// sprintf( buf, "Rects node is not a valid sequence. " -// "(stage %d, tree %d, node %d)", i, j, k ); -// CV_Error( CV_StsError, buf ); -// } -// cvStartReadSeq( rects_fn->data.seq, &rects_reader ); -// for( l = 0; l < rects_fn->data.seq->total; ++l ) -// { -// CvFileNode *rect_fn; -// CvRect r; - -// rect_fn = (CvFileNode *) rects_reader.ptr; -// if( !CV_NODE_IS_SEQ( rect_fn->tag ) || rect_fn->data.seq->total != 5 ) -// { -// sprintf( buf, "Rect %d is not a valid sequence. " -// "(stage %d, tree %d, node %d)", l, i, j, k ); -// CV_Error( CV_StsError, buf ); -// } - -// fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 0 ); -// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i < 0 ) -// { -// sprintf( buf, "x coordinate must be non-negative integer. " -// "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); -// CV_Error( CV_StsError, buf ); -// } -// r.x = fn->data.i; -// fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 1 ); -// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i < 0 ) -// { -// sprintf( buf, "y coordinate must be non-negative integer. " -// "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); -// CV_Error( CV_StsError, buf ); -// } -// r.y = fn->data.i; -// fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 2 ); -// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0 -// || r.x + fn->data.i > cascade->orig_window_size.width ) -// { -// sprintf( buf, "width must be positive integer and " -// "(x + width) must not exceed window width. " -// "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); -// CV_Error( CV_StsError, buf ); -// } -// r.width = fn->data.i; -// fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 3 ); -// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0 -// || r.y + fn->data.i > cascade->orig_window_size.height ) -// { -// sprintf( buf, "height must be positive integer and " -// "(y + height) must not exceed window height. " -// "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); -// CV_Error( CV_StsError, buf ); -// } -// r.height = fn->data.i; -// fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 4 ); -// if( !CV_NODE_IS_REAL( fn->tag ) ) -// { -// sprintf( buf, "weight must be real number. " -// "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); -// CV_Error( CV_StsError, buf ); -// } - -// classifier->haar_feature[k].rect[l].weight = (float) fn->data.f; -// classifier->haar_feature[k].rect[l].r = r; - -// CV_NEXT_SEQ_ELEM( sizeof( *rect_fn ), rects_reader ); -// } /* for each rect */ -// for( l = rects_fn->data.seq->total; l < CV_HAAR_FEATURE_MAX; ++l ) -// { -// classifier->haar_feature[k].rect[l].weight = 0; -// classifier->haar_feature[k].rect[l].r = cvRect( 0, 0, 0, 0 ); -// } - -// fn = cvGetFileNodeByName( fs, feature_fn, ICV_HAAR_TILTED_NAME); -// if( !fn || !CV_NODE_IS_INT( fn->tag ) ) -// { -// sprintf( buf, "tilted must be 0 or 1. " -// "(stage %d, tree %d, node %d)", i, j, k ); -// CV_Error( CV_StsError, buf ); -// } -// classifier->haar_feature[k].tilted = ( fn->data.i != 0 ); -// fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_THRESHOLD_NAME); -// if( !fn || !CV_NODE_IS_REAL( fn->tag ) ) -// { -// sprintf( buf, "threshold must be real number. " -// "(stage %d, tree %d, node %d)", i, j, k ); -// CV_Error( CV_StsError, buf ); -// } -// classifier->threshold[k] = (float) fn->data.f; -// fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_LEFT_NODE_NAME); -// if( fn ) -// { -// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= k -// || fn->data.i >= tree_fn->data.seq->total ) -// { -// sprintf( buf, "left node must be valid node number. " -// "(stage %d, tree %d, node %d)", i, j, k ); -// CV_Error( CV_StsError, buf ); -// } -// /* left node */ -// classifier->left[k] = fn->data.i; -// } -// else -// { -// fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_LEFT_VAL_NAME ); -// if( !fn ) -// { -// sprintf( buf, "left node or left value must be specified. " -// "(stage %d, tree %d, node %d)", i, j, k ); -// CV_Error( CV_StsError, buf ); -// } -// if( !CV_NODE_IS_REAL( fn->tag ) ) -// { -// sprintf( buf, "left value must be real number. " -// "(stage %d, tree %d, node %d)", i, j, k ); -// CV_Error( CV_StsError, buf ); -// } -// /* left value */ -// if( last_idx >= classifier->count + 1 ) -// { -// sprintf( buf, "Tree structure is broken: too many values. " -// "(stage %d, tree %d, node %d)", i, j, k ); -// CV_Error( CV_StsError, buf ); -// } -// classifier->left[k] = -last_idx; -// classifier->alpha[last_idx++] = (float) fn->data.f; -// } -// fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_RIGHT_NODE_NAME); -// if( fn ) -// { -// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= k -// || fn->data.i >= tree_fn->data.seq->total ) -// { -// sprintf( buf, "right node must be valid node number. " -// "(stage %d, tree %d, node %d)", i, j, k ); -// CV_Error( CV_StsError, buf ); -// } -// /* right node */ -// classifier->right[k] = fn->data.i; -// } -// else -// { -// fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_RIGHT_VAL_NAME ); -// if( !fn ) -// { -// sprintf( buf, "right node or right value must be specified. " -// "(stage %d, tree %d, node %d)", i, j, k ); -// CV_Error( CV_StsError, buf ); -// } -// if( !CV_NODE_IS_REAL( fn->tag ) ) -// { -// sprintf( buf, "right value must be real number. " -// "(stage %d, tree %d, node %d)", i, j, k ); -// CV_Error( CV_StsError, buf ); -// } -// /* right value */ -// if( last_idx >= classifier->count + 1 ) -// { -// sprintf( buf, "Tree structure is broken: too many values. " -// "(stage %d, tree %d, node %d)", i, j, k ); -// CV_Error( CV_StsError, buf ); -// } -// classifier->right[k] = -last_idx; -// classifier->alpha[last_idx++] = (float) fn->data.f; -// } - -// CV_NEXT_SEQ_ELEM( sizeof( *node_fn ), tree_reader ); -// } /* for each node */ -// if( last_idx != classifier->count + 1 ) -// { -// sprintf( buf, "Tree structure is broken: too few values. " -// "(stage %d, tree %d)", i, j ); -// CV_Error( CV_StsError, buf ); -// } - -// CV_NEXT_SEQ_ELEM( sizeof( *tree_fn ), trees_reader ); -// } /* for each tree */ - -// fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_STAGE_THRESHOLD_NAME); -// if( !fn || !CV_NODE_IS_REAL( fn->tag ) ) -// { -// sprintf( buf, "stage threshold must be real number. (stage %d)", i ); -// CV_Error( CV_StsError, buf ); -// } -// cascade->stage_classifier[i].threshold = (float) fn->data.f; - -// parent = i - 1; -// next = -1; - -// fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_PARENT_NAME ); -// if( !fn || !CV_NODE_IS_INT( fn->tag ) -// || fn->data.i < -1 || fn->data.i >= cascade->count ) -// { -// sprintf( buf, "parent must be integer number. (stage %d)", i ); -// CV_Error( CV_StsError, buf ); -// } -// parent = fn->data.i; -// fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_NEXT_NAME ); -// if( !fn || !CV_NODE_IS_INT( fn->tag ) -// || fn->data.i < -1 || fn->data.i >= cascade->count ) -// { -// sprintf( buf, "next must be integer number. (stage %d)", i ); -// CV_Error( CV_StsError, buf ); -// } -// next = fn->data.i; - -// cascade->stage_classifier[i].parent = parent; -// cascade->stage_classifier[i].next = next; -// cascade->stage_classifier[i].child = -1; - -// if( parent != -1 && cascade->stage_classifier[parent].child == -1 ) -// { -// cascade->stage_classifier[parent].child = i; -// } - -// CV_NEXT_SEQ_ELEM( sizeof( *stage_fn ), stages_reader ); -// } /* for each stage */ - -// return cascade; -// } - -// static void gpuWriteHaarClassifier( CvFileStorage *fs, const char *name, const void *struct_ptr, -// CvAttrList attributes ) -// { -// int i, j, k, l; -// char buf[256]; -// const CvHaarClassifierCascade *cascade = (const CvHaarClassifierCascade *) struct_ptr; - -// /* TODO: parameters check */ - -// cvStartWriteStruct( fs, name, CV_NODE_MAP, CV_TYPE_NAME_HAAR, attributes ); - -// cvStartWriteStruct( fs, ICV_HAAR_SIZE_NAME, CV_NODE_SEQ | CV_NODE_FLOW ); -// cvWriteInt( fs, NULL, cascade->orig_window_size.width ); -// cvWriteInt( fs, NULL, cascade->orig_window_size.height ); -// cvEndWriteStruct( fs ); /* size */ - -// cvStartWriteStruct( fs, ICV_HAAR_STAGES_NAME, CV_NODE_SEQ ); -// for( i = 0; i < cascade->count; ++i ) -// { -// cvStartWriteStruct( fs, NULL, CV_NODE_MAP ); -// sprintf( buf, "stage %d", i ); -// cvWriteComment( fs, buf, 1 ); - -// cvStartWriteStruct( fs, ICV_HAAR_TREES_NAME, CV_NODE_SEQ ); - -// for( j = 0; j < cascade->stage_classifier[i].count; ++j ) -// { -// CvHaarClassifier *tree = &cascade->stage_classifier[i].classifier[j]; - -// cvStartWriteStruct( fs, NULL, CV_NODE_SEQ ); -// sprintf( buf, "tree %d", j ); -// cvWriteComment( fs, buf, 1 ); - -// for( k = 0; k < tree->count; ++k ) -// { -// CvHaarFeature *feature = &tree->haar_feature[k]; - -// cvStartWriteStruct( fs, NULL, CV_NODE_MAP ); -// if( k ) -// { -// sprintf( buf, "node %d", k ); -// } -// else -// { -// sprintf( buf, "root node" ); -// } -// cvWriteComment( fs, buf, 1 ); - -// cvStartWriteStruct( fs, ICV_HAAR_FEATURE_NAME, CV_NODE_MAP ); - -// cvStartWriteStruct( fs, ICV_HAAR_RECTS_NAME, CV_NODE_SEQ ); -// for( l = 0; l < CV_HAAR_FEATURE_MAX && feature->rect[l].r.width != 0; ++l ) -// { -// cvStartWriteStruct( fs, NULL, CV_NODE_SEQ | CV_NODE_FLOW ); -// cvWriteInt( fs, NULL, feature->rect[l].r.x ); -// cvWriteInt( fs, NULL, feature->rect[l].r.y ); -// cvWriteInt( fs, NULL, feature->rect[l].r.width ); -// cvWriteInt( fs, NULL, feature->rect[l].r.height ); -// cvWriteReal( fs, NULL, feature->rect[l].weight ); -// cvEndWriteStruct( fs ); /* rect */ -// } -// cvEndWriteStruct( fs ); /* rects */ -// cvWriteInt( fs, ICV_HAAR_TILTED_NAME, feature->tilted ); -// cvEndWriteStruct( fs ); /* feature */ - -// cvWriteReal( fs, ICV_HAAR_THRESHOLD_NAME, tree->threshold[k]); - -// if( tree->left[k] > 0 ) -// { -// cvWriteInt( fs, ICV_HAAR_LEFT_NODE_NAME, tree->left[k] ); -// } -// else -// { -// cvWriteReal( fs, ICV_HAAR_LEFT_VAL_NAME, -// tree->alpha[-tree->left[k]] ); -// } - -// if( tree->right[k] > 0 ) -// { -// cvWriteInt( fs, ICV_HAAR_RIGHT_NODE_NAME, tree->right[k] ); -// } -// else -// { -// cvWriteReal( fs, ICV_HAAR_RIGHT_VAL_NAME, -// tree->alpha[-tree->right[k]] ); -// } - -// cvEndWriteStruct( fs ); /* split */ -// } - -// cvEndWriteStruct( fs ); /* tree */ -// } - -// cvEndWriteStruct( fs ); /* trees */ - -// cvWriteReal( fs, ICV_HAAR_STAGE_THRESHOLD_NAME, cascade->stage_classifier[i].threshold); -// cvWriteInt( fs, ICV_HAAR_PARENT_NAME, cascade->stage_classifier[i].parent ); -// cvWriteInt( fs, ICV_HAAR_NEXT_NAME, cascade->stage_classifier[i].next ); - -// cvEndWriteStruct( fs ); /* stage */ -// } /* for each stage */ - -// cvEndWriteStruct( fs ); /* stages */ -// cvEndWriteStruct( fs ); /* root */ -// } - -// static void * gpuCloneHaarClassifier( const void *struct_ptr ) -// { -// CvHaarClassifierCascade *cascade = NULL; - -// int i, j, k, n; -// const CvHaarClassifierCascade *cascade_src = -// (const CvHaarClassifierCascade *) struct_ptr; - -// n = cascade_src->count; -// cascade = gpuCreateHaarClassifierCascade(n); -// cascade->orig_window_size = cascade_src->orig_window_size; - -// for( i = 0; i < n; ++i ) -// { -// cascade->stage_classifier[i].parent = cascade_src->stage_classifier[i].parent; -// cascade->stage_classifier[i].next = cascade_src->stage_classifier[i].next; -// cascade->stage_classifier[i].child = cascade_src->stage_classifier[i].child; -// cascade->stage_classifier[i].threshold = cascade_src->stage_classifier[i].threshold; - -// cascade->stage_classifier[i].count = 0; -// cascade->stage_classifier[i].classifier = -// (CvHaarClassifier *) cvAlloc( cascade_src->stage_classifier[i].count -// * sizeof( cascade->stage_classifier[i].classifier[0] ) ); - -// cascade->stage_classifier[i].count = cascade_src->stage_classifier[i].count; - -// for( j = 0; j < cascade->stage_classifier[i].count; ++j ) -// cascade->stage_classifier[i].classifier[j].haar_feature = NULL; - -// for( j = 0; j < cascade->stage_classifier[i].count; ++j ) -// { -// const CvHaarClassifier *classifier_src = -// &cascade_src->stage_classifier[i].classifier[j]; -// CvHaarClassifier *classifier = -// &cascade->stage_classifier[i].classifier[j]; - -// classifier->count = classifier_src->count; -// classifier->haar_feature = (CvHaarFeature *) cvAlloc( -// classifier->count * ( sizeof( *classifier->haar_feature ) + -// sizeof( *classifier->threshold ) + -// sizeof( *classifier->left ) + -// sizeof( *classifier->right ) ) + -// (classifier->count + 1) * sizeof( *classifier->alpha ) ); -// classifier->threshold = (float *) (classifier->haar_feature + classifier->count); -// classifier->left = (int *) (classifier->threshold + classifier->count); -// classifier->right = (int *) (classifier->left + classifier->count); -// classifier->alpha = (float *) (classifier->right + classifier->count); -// for( k = 0; k < classifier->count; ++k ) -// { -// classifier->haar_feature[k] = classifier_src->haar_feature[k]; -// classifier->threshold[k] = classifier_src->threshold[k]; -// classifier->left[k] = classifier_src->left[k]; -// classifier->right[k] = classifier_src->right[k]; -// classifier->alpha[k] = classifier_src->alpha[k]; -// } -// classifier->alpha[classifier->count] = -// classifier_src->alpha[classifier->count]; -// } -// } - -// return cascade; -// } - -#if 0 -CvType haar_type( CV_TYPE_NAME_HAAR, gpuIsHaarClassifier, - (CvReleaseFunc)gpuReleaseHaarClassifierCascade, - gpuReadHaarClassifier, gpuWriteHaarClassifier, - gpuCloneHaarClassifier ); - - -namespace cv +static int gpuRunHaarClassifierCascade( /*const CvHaarClassifierCascade *_cascade, CvPoint pt, int start_stage */) { - -HaarClassifierCascade::HaarClassifierCascade() {} -HaarClassifierCascade::HaarClassifierCascade(const String &filename) -{ - load(filename); -} - -bool HaarClassifierCascade::load(const String &filename) -{ - cascade = Ptr((CvHaarClassifierCascade *)cvLoad(filename.c_str(), 0, 0, 0)); - return (CvHaarClassifierCascade *)cascade != 0; -} - -void HaarClassifierCascade::detectMultiScale( const Mat &image, - Vector &objects, double scaleFactor, - int minNeighbors, int flags, - Size minSize ) -{ - MemStorage storage(cvCreateMemStorage(0)); - CvMat _image = image; - CvSeq *_objects = gpuHaarDetectObjects( &_image, cascade, storage, scaleFactor, - minNeighbors, flags, minSize ); - Seq(_objects).copyTo(objects); -} - -int HaarClassifierCascade::runAt(Point pt, int startStage, int) const -{ - return gpuRunHaarClassifierCascade(cascade, pt, startStage); -} - -void HaarClassifierCascade::setImages( const Mat &sum, const Mat &sqsum, - const Mat &tilted, double scale ) -{ - CvMat _sum = sum, _sqsum = sqsum, _tilted = tilted; - gpuSetImagesForHaarClassifierCascade( cascade, &_sum, &_sqsum, &_tilted, scale ); -} - -} -#endif - - - - - - - - - - - - - - - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -////////////////////////////////////////////reserved functios////////////////////////////////////////////////////////////////////////// -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - -/*#if CV_SSE2 -# if CV_SSE4 || defined __SSE4__ -# include -# else -# define _mm_blendv_pd(a, b, m) _mm_xor_pd(a, _mm_and_pd(_mm_xor_pd(b, a), m)) -# define _mm_blendv_ps(a, b, m) _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(b, a), m)) -# endif -#if defined CV_ICC -# define CV_HAAR_USE_SSE 1 -#endif -#endif*/ - - -/* -CV_IMPL void -gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade* _cascade, -const CvArr* _sum, -const CvArr* _sqsum, -const CvArr* _tilted_sum, -double scale ) -{ -CvMat sum_stub, *sum = (CvMat*)_sum; -CvMat sqsum_stub, *sqsum = (CvMat*)_sqsum; -CvMat tilted_stub, *tilted = (CvMat*)_tilted_sum; -GpuHidHaarClassifierCascade* cascade; -int coi0 = 0, coi1 = 0; -int i; -int datasize; -int totalclassifier; -CvRect equRect; -double weight_scale; -int rows,cols; - -if( !CV_IS_HAAR_CLASSIFIER(_cascade) ) -CV_Error( !_cascade ? CV_StsNullPtr : CV_StsBadArg, "Invalid classifier pointer" ); - -if( scale <= 0 ) -CV_Error( CV_StsOutOfRange, "Scale must be positive" ); - -sum = cvGetMat( sum, &sum_stub, &coi0 ); -sqsum = cvGetMat( sqsum, &sqsum_stub, &coi1 ); - -if( coi0 || coi1 ) -CV_Error( CV_BadCOI, "COI is not supported" ); - -if( !CV_ARE_SIZES_EQ( sum, sqsum )) -CV_Error( CV_StsUnmatchedSizes, "All integral images must have the same size" ); - -if( CV_MAT_TYPE(sqsum->type) != CV_64FC1 || -CV_MAT_TYPE(sum->type) != CV_32SC1 ) -CV_Error( CV_StsUnsupportedFormat, -"Only (32s, 64f, 32s) combination of (sum,sqsum,tilted_sum) formats is allowed" ); - -if( !_cascade->hid_cascade ) -gpuCreateHidHaarClassifierCascade(_cascade,&datasize,&totalclassifier); - -cascade =(GpuHidHaarClassifierCascade *)_cascade->hid_cascade; - -if( cascade->has_tilted_features ) -{ -tilted = cvGetMat( tilted, &tilted_stub, &coi1 ); - -if( CV_MAT_TYPE(tilted->type) != CV_32SC1 ) -CV_Error( CV_StsUnsupportedFormat, -"Only (32s, 64f, 32s) combination of (sum,sqsum,tilted_sum) formats is allowed" ); - -if( sum->step != tilted->step ) -CV_Error( CV_StsUnmatchedSizes, -"Sum and tilted_sum must have the same stride (step, widthStep)" ); - -if( !CV_ARE_SIZES_EQ( sum, tilted )) -CV_Error( CV_StsUnmatchedSizes, "All integral images must have the same size" ); -//cascade->tilted = *tilted; -} - -_cascade->scale = scale; -_cascade->real_window_size.width = cvRound( _cascade->orig_window_size.width * scale ); -_cascade->real_window_size.height = cvRound( _cascade->orig_window_size.height * scale ); - -//cascade->sum = *sum; -//cascade->sqsum = *sqsum; - -equRect.x = equRect.y = cvRound(scale); -equRect.width = cvRound((_cascade->orig_window_size.width-2)*scale); -equRect.height = cvRound((_cascade->orig_window_size.height-2)*scale); -weight_scale = 1./(equRect.width*equRect.height); -cascade->inv_window_area = weight_scale; - -cascade->p0 = sum_elem_ptr(*sum, equRect.y, equRect.x); -cascade->p1 = sum_elem_ptr(*sum, equRect.y, equRect.x + equRect.width ); -cascade->p2 = sum_elem_ptr(*sum, equRect.y + equRect.height, equRect.x ); -cascade->p3 = sum_elem_ptr(*sum, equRect.y + equRect.height, -equRect.x + equRect.width ); -*/ -/* rows=sum->rows; -cols=sum->cols; -cascade->p0 = equRect.y*cols + equRect.x; -cascade->p1 = equRect.y*cols + equRect.x + equRect.width; -cascade->p2 = (equRect.y + equRect.height) * cols + equRect.x; -cascade->p3 = (equRect.y + equRect.height) * cols + equRect.x + equRect.width ; -*/ -/* -cascade->pq0 = sqsum_elem_ptr(*sqsum, equRect.y, equRect.x); -cascade->pq1 = sqsum_elem_ptr(*sqsum, equRect.y, equRect.x + equRect.width ); -cascade->pq2 = sqsum_elem_ptr(*sqsum, equRect.y + equRect.height, equRect.x ); -cascade->pq3 = sqsum_elem_ptr(*sqsum, equRect.y + equRect.height, -equRect.x + equRect.width ); -*/ -/* init pointers in haar features according to real window size and -given image pointers */ -/* for( i = 0; i < _cascade->count; i++ ) -{ -int j, k, l; -for( j = 0; j < cascade->stage_classifier[i].count; j++ ) -{ -for( l = 0; l < cascade->stage_classifier[i].classifier[j].count; l++ ) -{ -CvHaarFeature* feature = -&_cascade->stage_classifier[i].classifier[j].haar_feature[l]; -*/ /* GpuHidHaarClassifier* classifier = -cascade->stage_classifier[i].classifier + j; */ -//GpuHidHaarFeature* hidfeature = -// &cascade->stage_classifier[i].classifier[j].node[l].feature; -/* double sum0 = 0, area0 = 0; -CvRect r[3]; - -int base_w = -1, base_h = -1; -int new_base_w = 0, new_base_h = 0; -int kx, ky; -int flagx = 0, flagy = 0; -int x0 = 0, y0 = 0; -int nr; -*/ -/* align blocks */ -/* for( k = 0; k < CV_HAAR_FEATURE_MAX; k++ ) -{ -//if( !hidfeature->rect[k].p0 ) -// break; -r[k] = feature->rect[k].r; -base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].width-1) ); -base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].x - r[0].x-1) ); -base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].height-1) ); -base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].y - r[0].y-1) ); -} - -nr = k; - -base_w += 1; -base_h += 1; -kx = r[0].width / base_w; -ky = r[0].height / base_h; - -if( kx <= 0 ) -{ -flagx = 1; -new_base_w = cvRound( r[0].width * scale ) / kx; -x0 = cvRound( r[0].x * scale ); -} - -if( ky <= 0 ) -{ -flagy = 1; -new_base_h = cvRound( r[0].height * scale ) / ky; -y0 = cvRound( r[0].y * scale ); -} - -for( k = 0; k < nr; k++ ) -{ -CvRect tr; -double correction_ratio; - -if( flagx ) -{ -tr.x = (r[k].x - r[0].x) * new_base_w / base_w + x0; -tr.width = r[k].width * new_base_w / base_w; -} -else -{ -tr.x = cvRound( r[k].x * scale ); -tr.width = cvRound( r[k].width * scale ); -} - -if( flagy ) -{ -tr.y = (r[k].y - r[0].y) * new_base_h / base_h + y0; -tr.height = r[k].height * new_base_h / base_h; -} -else -{ -tr.y = cvRound( r[k].y * scale ); -tr.height = cvRound( r[k].height * scale ); -} - -#if CV_ADJUST_WEIGHTS -{ -// RAINER START -const float orig_feature_size = (float)(feature->rect[k].r.width)*feature->rect[k].r.height; -const float orig_norm_size = (float)(_cascade->orig_window_size.width)*(_cascade->orig_window_size.height); -const float feature_size = float(tr.width*tr.height); -//const float normSize = float(equRect.width*equRect.height); -float target_ratio = orig_feature_size / orig_norm_size; -//float isRatio = featureSize / normSize; -//correctionRatio = targetRatio / isRatio / normSize; -correction_ratio = target_ratio / feature_size; -// RAINER END -} -#else -correction_ratio = weight_scale * (!feature->tilted ? 1 : 0.5); -#endif - -if( !feature->tilted ) -{ -hidfeature->rect[k].p0 = tr.y * rows + tr.x; -hidfeature->rect[k].p1 = tr.y * rows + tr.x + tr.width; -hidfeature->rect[k].p2 = (tr.y + tr.height) * rows + tr.x; -hidfeature->rect[k].p3 = (tr.y + tr.height) * rows + tr.x + tr.width; - -} -else -{ -hidfeature->rect[k].p2 = (tr.y + tr.width) * rows + tr.x + tr.width; -hidfeature->rect[k].p3 = (tr.y + tr.width + tr.height) * rows + tr.x + tr.width - tr.height; -hidfeature->rect[k].p0 = tr.y*rows + tr.x; -hidfeature->rect[k].p1 = (tr.y + tr.height) * rows + tr.x - tr.height; - -} - -//hidfeature->rect[k].weight = (float)(feature->rect[k].weight * correction_ratio); - -if( k == 0 ) -area0 = tr.width * tr.height; -else -;// sum0 += hidfeature->rect[k].weight * tr.width * tr.height; -} - -//hidfeature->rect[0].weight = (float)(-sum0/area0);*/ -// } /* l */ -// } /* j */ -// } -//} -/* -CV_INLINE -double gpuEvalHidHaarClassifier( GpuHidHaarClassifier *classifier, -double variance_norm_factor, -size_t p_offset ) -{ - - int idx = 0; - do - { - GpuHidHaarTreeNode* node = classifier->node + idx; - double t = node->threshold * variance_norm_factor; - - double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight; - sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight; - - if( node->feature.rect[2].p0 ) - sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight; - - idx = sum < t ? node->left : node->right; - } - while( idx > 0 ); - return classifier->alpha[-idx]; - - return 0.; -} - - -*/ -static int gpuRunHaarClassifierCascade( /*const CvHaarClassifierCascade *_cascade, -CvPoint pt, int start_stage */) -{ - /* - int result = -1; - - int p_offset, pq_offset; - int i, j; - double mean, variance_norm_factor; - GpuHidHaarClassifierCascade* cascade; - - if( !CV_IS_HAAR_CLASSIFIER(_cascade) ) - CV_Error( !_cascade ? CV_StsNullPtr : CV_StsBadArg, "Invalid cascade pointer" ); - - cascade = (GpuHidHaarClassifierCascade*) _cascade->hid_cascade; - if( !cascade ) - CV_Error( CV_StsNullPtr, "Hidden cascade has not been created.\n" - "Use gpuSetImagesForHaarClassifierCascade" ); - - if( pt.x < 0 || pt.y < 0 || - pt.x + _cascade->real_window_size.width >= cascade->sum.width-2 || - pt.y + _cascade->real_window_size.height >= cascade->sum.height-2 ) - return -1; - - p_offset = pt.y * (cascade->sum.step/sizeof(sumtype)) + pt.x; - pq_offset = pt.y * (cascade->sqsum.step/sizeof(sqsumtype)) + pt.x; - mean = calc_sum(*cascade,p_offset)*cascade->inv_window_area; - variance_norm_factor = cascade->pq0[pq_offset] - cascade->pq1[pq_offset] - - cascade->pq2[pq_offset] + cascade->pq3[pq_offset]; - variance_norm_factor = variance_norm_factor*cascade->inv_window_area - mean*mean; - if( variance_norm_factor >= 0. ) - variance_norm_factor = sqrt(variance_norm_factor); - else - variance_norm_factor = 1.; - - - if( cascade->is_stump_based ) - { - for( i = start_stage; i < cascade->count; i++ ) - { - double stage_sum = 0; - - if( cascade->stage_classifier[i].two_rects ) - { - for( j = 0; j < cascade->stage_classifier[i].count; j++ ) - { - GpuHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j; - GpuHidHaarTreeNode* node = classifier->node; - double t = node->threshold*variance_norm_factor; - double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight; - sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight; - stage_sum += classifier->alpha[sum >= t]; - } - } - else - { - for( j = 0; j < cascade->stage_classifier[i].count; j++ ) - { - GpuHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j; - GpuHidHaarTreeNode* node = classifier->node; - double t = node->threshold*variance_norm_factor; - double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight; - sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight; - if( node->feature.rect[2].p0 ) - sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight; - - stage_sum += classifier->alpha[sum >= t]; - } - } - - if( stage_sum < cascade->stage_classifier[i].threshold ) - return -i; - } - } - */ return 1; } - namespace cv { namespace ocl @@ -2671,78 +1498,3 @@ struct gpuHaarDetectObjects_ScaleCascade_Invoker } } - -/* -typedef struct _ALIGNED_ON(128) GpuHidHaarFeature -{ -struct _ALIGNED_ON(32) -{ -int p0 _ALIGNED_ON(4); -int p1 _ALIGNED_ON(4); -int p2 _ALIGNED_ON(4); -int p3 _ALIGNED_ON(4); -float weight _ALIGNED_ON(4); -} -rect[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(32); -} -GpuHidHaarFeature; - - -typedef struct _ALIGNED_ON(128) GpuHidHaarTreeNode -{ -int left _ALIGNED_ON(4); -int right _ALIGNED_ON(4); -float threshold _ALIGNED_ON(4); -int p0[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(16); -int p1[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(16); -int p2[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(16); -int p3[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(16); -float weight[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(16); -float alpha[2] _ALIGNED_ON(8); -// GpuHidHaarFeature feature __attribute__((aligned (128))); -} -GpuHidHaarTreeNode; - - -typedef struct _ALIGNED_ON(32) GpuHidHaarClassifier -{ -int count _ALIGNED_ON(4); -//CvHaarFeature* orig_feature; -GpuHidHaarTreeNode* node _ALIGNED_ON(8); -float* alpha _ALIGNED_ON(8); -} -GpuHidHaarClassifier; - - -typedef struct _ALIGNED_ON(64) __attribute__((aligned (64))) GpuHidHaarStageClassifier -{ -int count _ALIGNED_ON(4); -float threshold _ALIGNED_ON(4); -int two_rects _ALIGNED_ON(4); -GpuHidHaarClassifier* classifier _ALIGNED_ON(8); -struct GpuHidHaarStageClassifier* next _ALIGNED_ON(8); -struct GpuHidHaarStageClassifier* child _ALIGNED_ON(8); -struct GpuHidHaarStageClassifier* parent _ALIGNED_ON(8); -} -GpuHidHaarStageClassifier; - - -typedef struct _ALIGNED_ON(64) GpuHidHaarClassifierCascade -{ -int count _ALIGNED_ON(4); -int is_stump_based _ALIGNED_ON(4); -int has_tilted_features _ALIGNED_ON(4); -int is_tree _ALIGNED_ON(4); -int pq0 _ALIGNED_ON(4); -int pq1 _ALIGNED_ON(4); -int pq2 _ALIGNED_ON(4); -int pq3 _ALIGNED_ON(4); -int p0 _ALIGNED_ON(4); -int p1 _ALIGNED_ON(4); -int p2 _ALIGNED_ON(4); -int p3 _ALIGNED_ON(4); -float inv_window_area _ALIGNED_ON(4); -// GpuHidHaarStageClassifier* stage_classifier __attribute__((aligned (8))); -}GpuHidHaarClassifierCascade; -*/ -/* End of file. */