mirror of
https://github.com/opencv/opencv.git
synced 2024-11-28 21:20:18 +08:00
removed division
This commit is contained in:
parent
8fb1f4093b
commit
6c3eeb7d7c
@ -371,8 +371,9 @@ int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, Gp
|
||||
{
|
||||
int acc = level.sFrame.width + 1;
|
||||
float iniScale = level.scale;
|
||||
|
||||
cv::Size area = level.workArea;
|
||||
float step = (float)(1 + (level.scale <= 2.f));
|
||||
int step = 1 + (level.scale <= 2.f);
|
||||
|
||||
int total = 0, prev = 0;
|
||||
|
||||
@ -387,19 +388,22 @@ int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, Gp
|
||||
gpu::resize(image, src, level.sFrame, 0, 0, CV_INTER_LINEAR);
|
||||
gpu::integralBuffered(src, sint, buff);
|
||||
|
||||
total += cvCeil(area.width / step) * cvCeil(area.height / step);
|
||||
// std::cout << "Total for scale: " << total << " this step contribution " << cvCeil(area.width / step) * cvCeil(area.height / step) << " previous width shift " << prev << " acc " << acc << " scales: " << cvCeil(area.width / step) << std::endl;
|
||||
// calculate job
|
||||
int totalWidth = level.workArea.width / step;
|
||||
// totalWidth = ((totalWidth + WARP_MASK) / WARP_SIZE) << WARP_LOG;
|
||||
|
||||
// increment pyr lavel
|
||||
total += totalWidth * (level.workArea.height / step);
|
||||
|
||||
// go to next pyramide level
|
||||
level = level.next(scaleFactor, image.size(), NxM);
|
||||
area = level.workArea;
|
||||
|
||||
step = (float)(1 + (level.scale <= 2.f));
|
||||
step = (1 + (level.scale <= 2.f));
|
||||
prev = acc;
|
||||
acc += level.sFrame.width + 1;
|
||||
}
|
||||
|
||||
device::lbp::classifyPyramid(image.cols, image.rows, NxM.width, NxM.height, iniScale, scaleFactor, total, stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat,
|
||||
device::lbp::classifyPyramid(image.cols, image.rows, NxM.width - 1, NxM.height - 1, iniScale, scaleFactor, total, stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat,
|
||||
leaves_mat, subsets_mat, features_mat, subsetSize, candidates, dclassified.ptr<unsigned int>(), integral);
|
||||
}
|
||||
|
||||
@ -412,8 +416,6 @@ int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, Gp
|
||||
// candidates.copyTo(objects);
|
||||
cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
// std::cout << classified << " !!!!!!!!!!" << std::endl;
|
||||
|
||||
return classified;
|
||||
}
|
||||
|
||||
|
@ -240,59 +240,47 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
// stepShift, scale, width_k, sum_prev => y = sum_prev + tid_k / width_k, x = tid_k - tid_k / width_k
|
||||
__global__ void lbp_cascade(const Cascade cascade, int frameW, int frameH, int windowW, int windowH, float scale, const float factor,
|
||||
const int workAmount, int* integral, const int pitch, DevMem2D_<int4> objects, unsigned int* classified)
|
||||
const int total, int* integral, const int pitch, DevMem2D_<int4> objects, unsigned int* classified)
|
||||
{
|
||||
int ftid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (ftid >= workAmount ) return;
|
||||
if (ftid >= total) return;
|
||||
|
||||
int sum = 0;
|
||||
// float scale = 1.0f;
|
||||
float stepShift = (scale <= 2.f) ? 2.0 : 1.0;
|
||||
int w = ceilf( ( __float2int_rn(frameW / scale) - windowW + 1) / stepShift);
|
||||
int h = ceilf( ( __float2int_rn(frameH / scale) - windowH + 1) / stepShift);
|
||||
int step = (scale <= 2.f);
|
||||
|
||||
// if (!ftid)
|
||||
// printf("!!!!: %d %d", w, h);
|
||||
int windowsForLine = (__float2int_rn( __fdividef(frameW, scale)) - windowW) >> step;
|
||||
int stotal = windowsForLine * ( (__float2int_rn( __fdividef(frameH, scale)) - windowH) >> step);
|
||||
int wshift = 0;
|
||||
|
||||
int framTid = ftid;
|
||||
int i = 0;
|
||||
int scaleTid = ftid;
|
||||
|
||||
while (1)
|
||||
while (scaleTid >= stotal)
|
||||
{
|
||||
if (framTid < (w - 1) * (h - 1)) break;
|
||||
i++;
|
||||
sum += __float2int_rn(frameW / scale) + 1;
|
||||
framTid -= w * h;
|
||||
scaleTid -= stotal;
|
||||
wshift += __float2int_rn(__fdividef(frameW, scale)) + 1;
|
||||
scale *= factor;
|
||||
stepShift = (scale <= 2.f) ? 2.0 : 1.0;
|
||||
int w = ceilf( ( __float2int_rn(frameW / scale) - windowW + 1) / stepShift);
|
||||
int h = ceilf( ( __float2int_rn(frameH / scale) - windowH + 1) / stepShift);
|
||||
step = (scale <= 2.f);
|
||||
windowsForLine = ( ((__float2int_rn(__fdividef(frameW, scale)) - windowW) >> step));
|
||||
stotal = windowsForLine * ( (__float2int_rn(__fdividef(frameH, scale)) - windowH) >> step);
|
||||
}
|
||||
|
||||
int y = (framTid / w);
|
||||
int x = (framTid - y * w) * stepShift;
|
||||
y *= stepShift;
|
||||
x += sum;
|
||||
int y = __fdividef(scaleTid, windowsForLine);
|
||||
int x = scaleTid - y * windowsForLine;
|
||||
|
||||
// if (i == 2)
|
||||
// printf("!!!!!!!!!!!!!! %f %d %d %d\n", windowW * scale, sum, y, x);
|
||||
x <<= step;
|
||||
y <<= step;
|
||||
|
||||
if (cascade(y, x, integral, pitch))
|
||||
if (cascade(y, x + wshift, integral, pitch))
|
||||
{
|
||||
if(x >= __float2int_rn(__fdividef(frameW, scale)) - windowW) return;
|
||||
|
||||
int4 rect;
|
||||
rect.x = roundf( (x - sum) * scale);
|
||||
rect.y = roundf(y * scale);
|
||||
rect.z = roundf(windowW * scale);
|
||||
rect.w = roundf(windowH * scale);
|
||||
|
||||
if (rect.x > frameW || rect.y > frameH) return;
|
||||
// printf("OUTLAUER %d %d %d %d %d %d %d %d %d %f %f\n", x, y, ftid, framTid, rect.x, rect.y, sum, w, h, stepShift, scale);
|
||||
|
||||
// printf("passed: %d %d ---- %d %d %d %d %d\n", y, x, rect.x, rect.y, rect.z, rect.w, sum);
|
||||
rect.x = __float2int_rn(x * scale);
|
||||
rect.y = __float2int_rn(y * scale);
|
||||
rect.z = __float2int_rn(windowW * scale);
|
||||
rect.w = __float2int_rn(windowH * scale);
|
||||
|
||||
int res = Emulation::smem::atomicInc(classified, (unsigned int)objects.cols);
|
||||
objects(0, res) = rect;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user