Merge pull request #536 from bitwangyaoyao:2.4_fixHaar

2025-08-05 05:56:36 +08:00 · 2013-02-24 15:26:06 +04:00 · 2013-02-24 15:26:06 +04:00 · 3b1fc16f36
commit 3b1fc16f36
parent e6dd4e840d 9bbf170054
2 changed files with 222 additions and 1465 deletions
--- a/modules/ocl/src/haar.cpp
+++ b/modules/ocl/src/haar.cpp
--- a/modules/ocl/src/kernels/haarobjectdetect.cl
+++ b/modules/ocl/src/kernels/haarobjectdetect.cl
@ -9,6 +9,7 @@
 //    Niko Li, newlife20080214@gmail.com
 //    Wang Weiyan, wangweiyanster@gmail.com
 //    Jia Haipeng, jiahaipeng95@gmail.com
 //    Nathan, liujun@multicorewareinc.com
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
@ -299,6 +300,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                int queuecount  = lclcount[0];
                barrier(CLK_LOCAL_MEM_FENCE);
                nodecounter = splitnode;
                for(int stageloop = split_stage; stageloop< end_stage && queuecount>0; stageloop++)
                {
@ -324,7 +326,8 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
                        int queue_pixel = mad24(((temp_coord  & (int)0xffff0000)>>16),readwidth,temp_coord & 0xffff);
                        //barrier(CLK_LOCAL_MEM_FENCE);
-            if(lcl_compute_win_id < queuecount) {
+                        if(lcl_compute_win_id < queuecount)
                        {
                            int tempnodecounter = lcl_compute_id;
                            float part_sum = 0.f;
@ -363,7 +366,8 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
                            partialsum[lcl_id]=part_sum;
                        }
                        barrier(CLK_LOCAL_MEM_FENCE);
-            if(lcl_compute_win_id < queuecount) {
+                        if(lcl_compute_win_id < queuecount)
                        {
                            for(int i=0; i<lcl_compute_win && (lcl_compute_id==0); i++)
                            {
                                stage_sum += partialsum[lcl_id+i];
@ -378,8 +382,9 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
                        }
                        barrier(CLK_LOCAL_MEM_FENCE);
                    }//end for(int queueloop=0;queueloop<queuecount_loop;queueloop++)
-                  barrier(CLK_LOCAL_MEM_FENCE);
+                    //barrier(CLK_LOCAL_MEM_FENCE);
                    queuecount = lclcount[0];
                    barrier(CLK_LOCAL_MEM_FENCE);
                    nodecounter += stageinfo.x;
                }//end for(int stageloop = splitstage; stageloop< endstage && queuecount>0;stageloop++)
                //barrier(CLK_LOCAL_MEM_FENCE);