diff --git a/modules/gpu/src/cuda/hog.cu b/modules/gpu/src/cuda/hog.cu index c358ef3c3f..f42cbbc0cd 100644 --- a/modules/gpu/src/cuda/hog.cu +++ b/modules/gpu/src/cuda/hog.cu @@ -507,8 +507,7 @@ void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, i dim3 threads(nthreads, 1); dim3 grid(img_win_width, img_win_height); - int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / - block_stride_x; + int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; extract_descrs_by_cols_kernel<<>>( img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors); cudaSafeCall( cudaGetLastError() ); @@ -689,7 +688,7 @@ __global__ void compute_gradients_8UC1_kernel(int height, int width, const PtrEl hidx = (hidx + cnbins) % cnbins; ((uchar2*)qangle.ptr(blockIdx.y))[x] = make_uchar2(hidx, (hidx + 1) % cnbins); - ((float2*)grad.ptr(blockIdx.y))[x] = make_float2(mag * (1.f - ang), mag * ang); + ((float2*) grad.ptr(blockIdx.y))[x] = make_float2(mag * (1.f - ang), mag * ang); } } @@ -725,7 +724,7 @@ __global__ void resize_for_hog_kernel(float sx, float sy, DevMem2D_ dst, unsigned int y = blockIdx.y * blockDim.y + threadIdx.y; if (x < dst.cols && y < dst.rows) - ((unsigned char*)dst.ptr(y))[x] = tex2D(resize8UC1_tex, x * sx + colOfs, y * sy) * 255; + dst.ptr(y)[x] = tex2D(resize8UC1_tex, x * sx + colOfs, y * sy) * 255; } __global__ void resize_for_hog_kernel(float sx, float sy, DevMem2D_ dst, int colOfs) @@ -760,12 +759,14 @@ static void resize_for_hog(const DevMem2D& src, DevMem2D dst, TEX& tex) dim3 threads(32, 8); dim3 grid(divUp(dst.cols, threads.x), divUp(dst.rows, threads.y)); - float sx = static_cast(src.cols) / dst.cols; + + float sx = static_cast(src.cols) / dst.cols; float sy = static_cast(src.rows) / dst.rows; + resize_for_hog_kernel<<>>(sx, sy, (DevMem2D_)dst, colOfs); cudaSafeCall( cudaGetLastError() ); - cudaSafeCall(cudaThreadSynchronize()); - cudaSafeCall(cudaUnbindTexture(resize8UC1_tex)); + cudaSafeCall( cudaThreadSynchronize() ); + cudaSafeCall( cudaUnbindTexture(tex) ); } void resize_8UC1(const DevMem2D& src, DevMem2D dst) { resize_for_hog (src, dst, resize8UC1_tex); }