diff --git a/modules/gpu/src/cuda/isf-sc.cu b/modules/gpu/src/cuda/isf-sc.cu index 37c6e30235..b5eb5ad17e 100644 --- a/modules/gpu/src/cuda/isf-sc.cu +++ b/modules/gpu/src/cuda/isf-sc.cu @@ -85,143 +85,6 @@ namespace icf { } } -enum { - HOG_BINS = 6, - HOG_LUV_BINS = 10, - WIDTH = 640, - HEIGHT = 480, - GREY_OFFSET = HEIGHT * HOG_LUV_BINS -}; - -/* Returns the nearest upper power of two, works only for -the typical GPU thread count (pert block) values */ -int power_2up(unsigned int n) -{ - if (n < 1) return 1; - else if (n < 2) return 2; - else if (n < 4) return 4; - else if (n < 8) return 8; - else if (n < 16) return 16; - else if (n < 32) return 32; - else if (n < 64) return 64; - else if (n < 128) return 128; - else if (n < 256) return 256; - else if (n < 512) return 512; - else if (n < 1024) return 1024; - return -1; // Input is too big -} - - -__device__ __forceinline__ uchar grey(const uchar3 rgb) -{ - return saturate_cast(rgb.x * 0.114f + rgb.y * 0.587f + rgb.z * 0.299f); -} - -__device__ __forceinline__ void luv(const uchar3 rgb, uchar& l, uchar& u, uchar& v) -{ - -} - -__global__ void rgb2grayluv(const uchar3* __restrict__ rgb, uchar* __restrict__ hog, - const int rgbPitch, const int hogPitch) -{ - const int y = blockIdx.y * blockDim.y + threadIdx.y; - const int x = blockIdx.x * blockDim.x + threadIdx.x; - - const uchar3 color = rgb[rgbPitch * y + x]; - - uchar l, u, v; - luv(color, l, u, v); - - hog[hogPitch * y + x] = l; - hog[hogPitch * (y + HEIGHT) + x] = u; - hog[hogPitch * (y + 2 * HEIGHT) + x] = v; - hog[hogPitch * (y + 3 * HEIGHT) + x] = grey(color); -} - -__device__ __forceinline__ -int qangle(const float &y, const float &x) -{ - int bin = 0; -// const float2 &bin_vector_zero = const_angle_bins_vectors[0]; -// float max_dot_product = fabs(x*bin_vector_zero.x + y*bin_vector_zero.y); - -// // let us hope this gets unrolled -// #pragma unroll -// for(int i=1; i < num_angles_bin; i+=1) -// { -// const float2 &bin_vector_i = const_angle_bins_vectors[i]; -// //const float2 bin_vector_i = const_angle_bins_vectors[i]; -// //const float2 &bin_vector_i = angle_bins_vectors[i]; -// const float dot_product = fabs(x*bin_vector_i.x + y*bin_vector_i.y); -// if(dot_product > max_dot_product) -// { -// max_dot_product = dot_product; -// index = i; -// } -// } - - return bin; -} - -// texture tgray; -__global__ void gray2hog(const uchar* __restrict__ gray, uchar* __restrict__ hog, const int pitch, const float norm) -{ - const int y = blockIdx.y * blockDim.y + threadIdx.y; - const int x = blockIdx.x * blockDim.x + threadIdx.x; - - // derivative - float dx = gray[y * pitch + x + 1]; - dx -= gray[y * pitch + x - 1]; - - float dy = gray[(y + 1) * pitch + x]; - dy -= gray[(y -1) * pitch + x - 1]; - - // mag and angle - const uchar mag = saturate_cast(sqrtf(dy * dy + dx * dx) * norm); - const int bin = qangle(dx, dy); - -} - -template -__device__ __forceinline__ uchar shrink(const uchar* ptr, const int pitch, const int y, const int x) -{ - int out = 0; -#pragma unroll - for(int dy = 0; dy < FACTOR; ++dy) -#pragma unroll - for(int dx = 0; dx < FACTOR; ++dx) - { - out += ptr[dy * pitch + dx]; - } - - return saturate_cast(out / FACTOR); -} - -template -__global__ void decimate(const uchar* __restrict__ hogluv, uchar* __restrict__ shrank, - const int inPitch, const int outPitch ) -{ - const int y = blockIdx.y * blockDim.y + threadIdx.y; - const int x = blockIdx.x * blockDim.x + threadIdx.x; - - const uchar* ptr = hogluv + (FACTOR * y) * inPitch + (FACTOR * x); - - shrank[ y * outPitch + x]= shrink(ptr, inPitch, y, x); -} - -__global__ void intRow(const uchar* __restrict__ hogluv, ushort* __restrict__ sum, - const int inPitch, const int outPitch) -{ - -} - -__global__ void intCol(ushort* __restrict__ sum, const int pitch) -{ - -} - - __global__ void detect(const cv::gpu::icf::Cascade cascade, const uchar* __restrict__ hogluv, const int pitch, PtrStepSz objects) { @@ -351,46 +214,4 @@ void icf::Cascade::detect(const cv::gpu::PtrStepSzb& hogluv, PtrStepSz o } -//////////////////////////////////////////////////// - - - -void icf::ChannelStorage::frame(const cv::gpu::PtrStepSz& rgb, cudaStream_t stream) -{ -// // // color convertin kernel -// // dim3 block(32, 8); -// // dim3 grid(FRAME_WIDTH / 32, FRAME_HEIGHT / 8); - -// // uchar * channels = (uchar*)dmem.ptr(FRAME_HEIGHT * HOG_BINS); -// // device::rgb2grayluv<<>>((uchar3*)rgb.ptr(), channels, -// // rgb.step / sizeof(uchar3), dmem.step); -// // cudaSafeCall( cudaGetLastError()); - -// // // hog calculation kernel -// // channels = (uchar*)dmem.ptr(FRAME_HEIGHT * HOG_LUV_BINS); -// // device::gray2hog<<>>(channels, (uchar*)dmem.ptr(), dmem.step, magnitudeScaling); -// // cudaSafeCall( cudaGetLastError() ); - -// // const int shrWidth = FRAME_WIDTH / shrinkage; -// // const int shrHeight = FRAME_HEIGHT / shrinkage; - -// // // decimate kernel -// // grid = dim3(shrWidth / 32, shrHeight / 8); -// // device::decimate<4><<>>((uchar*)dmem.ptr(), (uchar*)shrunk.ptr(), dmem.step, shrunk.step); -// // cudaSafeCall( cudaGetLastError() ); - -// // // integrate rows -// // block = dim3(shrWidth, 1); -// // grid = dim3(shrHeight * HOG_LUV_BINS, 1); -// // device::intRow<<>>((uchar*)shrunk.ptr(), (ushort*)hogluv.ptr(), -// // shrunk.step, hogluv.step / sizeof(ushort)); -// // cudaSafeCall( cudaGetLastError() ); - -// // // integrate cols -// // block = dim3(128, 1); -// // grid = dim3(shrWidth * HOG_LUV_BINS, 1); -// // device::intCol<<>>((ushort*)hogluv.ptr(), hogluv.step / hogluv.step / sizeof(ushort)); -// // cudaSafeCall( cudaGetLastError() ); -} - }} \ No newline at end of file diff --git a/modules/gpu/src/icf.hpp b/modules/gpu/src/icf.hpp index 454dad8812..a8ce8d483e 100644 --- a/modules/gpu/src/icf.hpp +++ b/modules/gpu/src/icf.hpp @@ -124,7 +124,7 @@ struct ChannelStorage const cv::gpu::PtrStepSzb& itg, const int s) : dmem (buff), shrunk(shr), hogluv(itg), shrinkage(s) {} - void frame(const cv::gpu::PtrStepSz& rgb, cudaStream_t stream); + void frame(const cv::gpu::PtrStepSz& rgb, cudaStream_t stream){} PtrStepSzb dmem; PtrStepSzb shrunk;