mirror of
https://github.com/opencv/opencv.git
synced 2024-11-25 03:30:34 +08:00
Race condition bug-fix in hog.cu
See https://github.com/Itseez/opencv/issues/5721 COMMENTS: * The second __syncthreads() is necessary, I am sure of that. * The code works without the first __syncthreads() too, but I have however added it for symmetry. Anyway it doesn't affect time performances, I have checked it with some profiling with nvvp
This commit is contained in:
parent
a0f8645541
commit
5a72be08fd
@ -331,11 +331,13 @@ namespace cv { namespace cuda { namespace device
|
||||
if (threadIdx.x < block_hist_size)
|
||||
elem = hist[0];
|
||||
|
||||
__syncthreads(); // prevent race condition (redundant?)
|
||||
float sum = reduce_smem<nthreads>(squares, elem * elem);
|
||||
|
||||
float scale = 1.0f / (::sqrtf(sum) + 0.1f * block_hist_size);
|
||||
elem = ::min(elem * scale, threshold);
|
||||
|
||||
__syncthreads(); // prevent race condition
|
||||
sum = reduce_smem<nthreads>(squares, elem * elem);
|
||||
|
||||
scale = 1.0f / (::sqrtf(sum) + 1e-3f);
|
||||
|
Loading…
Reference in New Issue
Block a user