Merge pull request #1740 from ilya-lavrenov:ocl_corners

This commit is contained in:
Andrey Pavlenko 2013-11-06 13:31:43 +04:00 committed by OpenCV Buildbot
commit a3fa7a243d
7 changed files with 30 additions and 31 deletions

View File

@ -48,22 +48,22 @@
#define T_MEAN_VAR float
#define CONVERT_TYPE convert_uchar_sat
#define F_ZERO (0.0f)
float cvt(uchar val)
inline float cvt(uchar val)
{
return val;
}
float sqr(float val)
inline float sqr(float val)
{
return val * val;
}
float sum(float val)
inline float sum(float val)
{
return val;
}
float clamp1(float var, float learningRate, float diff, float minVar)
static float clamp1(float var, float learningRate, float diff, float minVar)
{
return fmax(var + learningRate * (diff * diff - var), minVar);
}
@ -72,7 +72,7 @@ float clamp1(float var, float learningRate, float diff, float minVar)
#define T_MEAN_VAR float4
#define CONVERT_TYPE convert_uchar4_sat
#define F_ZERO (0.0f, 0.0f, 0.0f, 0.0f)
float4 cvt(const uchar4 val)
inline float4 cvt(const uchar4 val)
{
float4 result;
result.x = val.x;
@ -83,17 +83,17 @@ float4 cvt(const uchar4 val)
return result;
}
float sqr(const float4 val)
inline float sqr(const float4 val)
{
return val.x * val.x + val.y * val.y + val.z * val.z;
}
float sum(const float4 val)
inline float sum(const float4 val)
{
return (val.x + val.y + val.z);
}
float4 clamp1(const float4 var, float learningRate, const float4 diff, float minVar)
static float4 clamp1(const float4 var, float learningRate, const float4 diff, float minVar)
{
float4 result;
result.x = fmax(var.x + learningRate * (diff.x * diff.x - var.x), minVar);
@ -116,14 +116,14 @@ typedef struct
uchar c_shadowVal;
}con_srtuct_t;
void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step)
static void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step)
{
float val = ptr[(k * rows + y) * ptr_step + x];
ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
ptr[((k + 1) * rows + y) * ptr_step + x] = val;
}
void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
static void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
{
float4 val = ptr[(k * rows + y) * ptr_step + x];
ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
@ -412,7 +412,7 @@ __kernel void mog2_kernel(__global T_FRAME * frame, __global int* fgmask, __glob
if (_weight < -prune)
{
_weight = 0.0;
_weight = 0.0f;
nmodes--;
}

View File

@ -292,7 +292,6 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
for(int scalei = 0; scalei <loopcount; scalei++)
{
int4 scaleinfo1= info[scalei];
int width = (scaleinfo1.x & 0xffff0000) >> 16;
int height = scaleinfo1.x & 0xffff;
int grpnumperline =(scaleinfo1.y & 0xffff0000) >> 16;
int totalgrp = scaleinfo1.y & 0xffff;

View File

@ -136,8 +136,6 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
{
int4 scaleinfo1;
scaleinfo1 = info[scalei];
int width = (scaleinfo1.x & 0xffff0000) >> 16;
int height = scaleinfo1.x & 0xffff;
int grpnumperline = (scaleinfo1.y & 0xffff0000) >> 16;
int totalgrp = scaleinfo1.y & 0xffff;
float factor = as_float(scaleinfo1.w);

View File

@ -125,10 +125,12 @@ __kernel void calcHarris(__global const float *Dx, __global const float *Dy, __g
int indexDx = (dx_startY+i)*(dx_step>>2)+(dx_startX+col);
float dx_s = dx_con ? Dx[indexDx] : 0.0f;
dx_data[i] = dx_s;
bool dy_con = dy_startX+col >= 0 && dy_startX+col < dy_whole_cols && dy_startY+i >= 0 && dy_startY+i < dy_whole_rows;
int indexDy = (dy_startY+i)*(dy_step>>2)+(dy_startX+col);
float dy_s = dx_con ? Dy[indexDy] : 0.0f;
float dy_s = dy_con ? Dy[indexDy] : 0.0f;
dy_data[i] = dy_s;
data[0][i] = dx_data[i] * dx_data[i];
data[1][i] = dx_data[i] * dy_data[i];
data[2][i] = dy_data[i] * dy_data[i];

View File

@ -124,10 +124,12 @@ __kernel void calcMinEigenVal(__global const float *Dx,__global const float *Dy,
int indexDx = (dx_startY+i)*(dx_step>>2)+(dx_startX+col);
float dx_s = dx_con ? Dx[indexDx] : 0.0f;
dx_data[i] = dx_s;
bool dy_con = dy_startX+col >= 0 && dy_startX+col < dy_whole_cols && dy_startY+i >= 0 && dy_startY+i < dy_whole_rows;
int indexDy = (dy_startY+i)*(dy_step>>2)+(dy_startX+col);
float dy_s = dx_con ? Dy[indexDy] : 0.0f;
float dy_s = dy_con ? Dy[indexDy] : 0.0f;
dy_data[i] = dy_s;
data[0][i] = dx_data[i] * dx_data[i];
data[1][i] = dx_data[i] * dy_data[i];
data[2][i] = dy_data[i] * dy_data[i];

View File

@ -69,23 +69,16 @@ __global float* dx, __global float* dy, int dx_step)
}
float bicubicCoeff(float x_)
static float bicubicCoeff(float x_)
{
float x = fabs(x_);
if (x <= 1.0f)
{
return x * x * (1.5f * x - 2.5f) + 1.0f;
}
else if (x < 2.0f)
{
return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
}
else
{
return 0.0f;
}
}
__kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_col, int I0_row,
@ -170,12 +163,10 @@ __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_c
}
float readImage(__global const float *image, const int x, const int y, const int rows, const int cols, const int elemCntPerRow)
static float readImage(__global const float *image, const int x, const int y, const int rows, const int cols, const int elemCntPerRow)
{
int i0 = clamp(x, 0, cols - 1);
int j0 = clamp(y, 0, rows - 1);
int i1 = clamp(x + 1, 0, cols - 1);
int j1 = clamp(y + 1, 0, rows - 1);
return image[j0 * elemCntPerRow + i0];
}
@ -303,7 +294,7 @@ __kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col,
}
float divergence(__global const float* v1, __global const float* v2, int y, int x, int v1_step, int v2_step)
static float divergence(__global const float* v1, __global const float* v2, int y, int x, int v1_step, int v2_step)
{
if (x > 0 && y > 0)
@ -407,5 +398,4 @@ __kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx
error[y * I1wx_step + x] = n1 + n2;
}
}
}

View File

@ -212,11 +212,19 @@ struct CornerTestBase :
Mat image = readImageType("gpu/stereobm/aloe-L.png", type);
ASSERT_FALSE(image.empty());
bool isFP = CV_MAT_DEPTH(type) >= CV_32F;
float val = 255.0f;
if (isFP)
{
image.convertTo(image, -1, 1.0 / 255);
val /= 255.0f;
}
Size roiSize = image.size();
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
Size wholeSize = Size(roiSize.width + srcBorder.lef + srcBorder.rig, roiSize.height + srcBorder.top + srcBorder.bot);
src = randomMat(wholeSize, type, -255, 255, false);
src = randomMat(wholeSize, type, -val, val, false);
src_roi = src(Rect(srcBorder.lef, srcBorder.top, roiSize.width, roiSize.height));
image.copyTo(src_roi);
@ -527,7 +535,7 @@ INSTANTIATE_TEST_CASE_P(Imgproc, CornerMinEigenVal, Combine(
Bool()));
INSTANTIATE_TEST_CASE_P(Imgproc, CornerHarris, Combine(
Values((MatType)CV_8UC1), // TODO does not work properly with CV_32FC1
Values((MatType)CV_8UC1, CV_32FC1),
Values(3, 5),
Values( (int)BORDER_CONSTANT, (int)BORDER_REPLICATE, (int)BORDER_REFLECT, (int)BORDER_REFLECT_101),
Bool()));