mirror of
https://github.com/opencv/opencv.git
synced 2024-11-25 11:40:44 +08:00
Race condition bug-fix in hog.cu
See https://github.com/Itseez/opencv/issues/5721 COMMENTS: * The second __syncthreads() is necessary, I am sure of that. * The code works without the first __syncthreads() too, but I have however added it for symmetry. Anyway it doesn't affect time performances, I have checked it with some profiling with nvvp
This commit is contained in:
parent
a0f8645541
commit
5a72be08fd
@ -331,11 +331,13 @@ namespace cv { namespace cuda { namespace device
|
|||||||
if (threadIdx.x < block_hist_size)
|
if (threadIdx.x < block_hist_size)
|
||||||
elem = hist[0];
|
elem = hist[0];
|
||||||
|
|
||||||
|
__syncthreads(); // prevent race condition (redundant?)
|
||||||
float sum = reduce_smem<nthreads>(squares, elem * elem);
|
float sum = reduce_smem<nthreads>(squares, elem * elem);
|
||||||
|
|
||||||
float scale = 1.0f / (::sqrtf(sum) + 0.1f * block_hist_size);
|
float scale = 1.0f / (::sqrtf(sum) + 0.1f * block_hist_size);
|
||||||
elem = ::min(elem * scale, threshold);
|
elem = ::min(elem * scale, threshold);
|
||||||
|
|
||||||
|
__syncthreads(); // prevent race condition
|
||||||
sum = reduce_smem<nthreads>(squares, elem * elem);
|
sum = reduce_smem<nthreads>(squares, elem * elem);
|
||||||
|
|
||||||
scale = 1.0f / (::sqrtf(sum) + 1e-3f);
|
scale = 1.0f / (::sqrtf(sum) + 1e-3f);
|
||||||
|
Loading…
Reference in New Issue
Block a user