CV_USE_UNROLLED for imgproc

This commit is contained in:
Victoria Zhislina 2012-02-21 11:31:23 +00:00
parent ff2af7d8bb
commit 0630e7010e
10 changed files with 96 additions and 43 deletions

View File

@ -32,7 +32,7 @@ PERF_TEST_P( TestFilter2d, Filter2d,
Mat kernel(kSize, kSize, CV_32FC1);
randu(kernel, -3, 10);
float s = (float)fabs( sum(kernel)[0] );
float s = fabs( sum(kernel)[0] );
if(s > 1e-3) kernel /= s;
declare.in(src, WARMUP_RNG).out(dst).time(20);

View File

@ -345,7 +345,7 @@ void prefix##remove_at_##type(_CVLIST* l, CVPOS pos)\
void prefix##set_##type(CVPOS pos, type* data)\
{\
ELEMENT_##type* element = ((ELEMENT_##type*)(pos.m_pos));\
memcpy(&(element->m_data), data, sizeof(*data));\
memcpy(&(element->m_data), data, sizeof(data));\
}\
type* prefix##get_##type(CVPOS pos)\
{\

View File

@ -53,6 +53,7 @@ acc_( const T* src, AT* dst, const uchar* mask, int len, int cn )
if( !mask )
{
len *= cn;
#if CV_ENABLE_UNROLLED
for( ; i <= len - 4; i += 4 )
{
AT t0, t1;
@ -64,7 +65,7 @@ acc_( const T* src, AT* dst, const uchar* mask, int len, int cn )
t1 = src[i+3] + dst[i+3];
dst[i+2] = t0; dst[i+3] = t1;
}
#endif
for( ; i < len; i++ )
dst[i] += src[i];
}
@ -110,6 +111,7 @@ accSqr_( const T* src, AT* dst, const uchar* mask, int len, int cn )
if( !mask )
{
len *= cn;
#if CV_ENABLE_UNROLLED
for( ; i <= len - 4; i += 4 )
{
AT t0, t1;
@ -121,7 +123,7 @@ accSqr_( const T* src, AT* dst, const uchar* mask, int len, int cn )
t1 = (AT)src[i+3]*src[i+3] + dst[i+3];
dst[i+2] = t0; dst[i+3] = t1;
}
#endif
for( ; i < len; i++ )
dst[i] += (AT)src[i]*src[i];
}
@ -167,6 +169,7 @@ accProd_( const T* src1, const T* src2, AT* dst, const uchar* mask, int len, int
if( !mask )
{
len *= cn;
#if CV_ENABLE_UNROLLED
for( ; i <= len - 4; i += 4 )
{
AT t0, t1;
@ -178,7 +181,7 @@ accProd_( const T* src1, const T* src2, AT* dst, const uchar* mask, int len, int
t1 = (AT)src1[i+3]*src2[i+3] + dst[i+3];
dst[i+2] = t0; dst[i+3] = t1;
}
#endif
for( ; i < len; i++ )
dst[i] += (AT)src1[i]*src2[i];
}
@ -225,6 +228,7 @@ accW_( const T* src, AT* dst, const uchar* mask, int len, int cn, double alpha )
if( !mask )
{
len *= cn;
#if CV_ENABLE_UNROLLED
for( ; i <= len - 4; i += 4 )
{
AT t0, t1;
@ -236,7 +240,7 @@ accW_( const T* src, AT* dst, const uchar* mask, int len, int cn, double alpha )
t1 = src[i+3]*a + dst[i+3]*b;
dst[i+2] = t0; dst[i+3] = t1;
}
#endif
for( ; i < len; i++ )
dst[i] = src[i]*a + dst[i]*b;
}

View File

@ -2227,7 +2227,7 @@ template<typename ST, typename DT, class VecOp> struct RowFilter : public BaseRo
i = vecOp(src, dst, width, cn);
width *= cn;
#if CV_ENABLE_UNROLLED
for( ; i <= width - 4; i += 4 )
{
S = (const ST*)src + i;
@ -2245,7 +2245,7 @@ template<typename ST, typename DT, class VecOp> struct RowFilter : public BaseRo
D[i] = s0; D[i+1] = s1;
D[i+2] = s2; D[i+3] = s3;
}
#endif
for( ; i < width; i++ )
{
S = (const ST*)src + i;
@ -2426,6 +2426,7 @@ template<class CastOp, class VecOp> struct ColumnFilter : public BaseColumnFilte
{
DT* D = (DT*)dst;
i = vecOp(src, dst, width);
#if CV_ENABLE_UNROLLED
for( ; i <= width - 4; i += 4 )
{
ST f = ky[0];
@ -2443,7 +2444,7 @@ template<class CastOp, class VecOp> struct ColumnFilter : public BaseColumnFilte
D[i] = castOp(s0); D[i+1] = castOp(s1);
D[i+2] = castOp(s2); D[i+3] = castOp(s3);
}
#endif
for( ; i < width; i++ )
{
ST s0 = ky[0]*((const ST*)src[0])[i] + _delta;
@ -2492,7 +2493,7 @@ template<class CastOp, class VecOp> struct SymmColumnFilter : public ColumnFilte
{
DT* D = (DT*)dst;
i = (this->vecOp)(src, dst, width);
#if CV_ENABLE_UNROLLED
for( ; i <= width - 4; i += 4 )
{
ST f = ky[0];
@ -2514,7 +2515,7 @@ template<class CastOp, class VecOp> struct SymmColumnFilter : public ColumnFilte
D[i] = castOp(s0); D[i+1] = castOp(s1);
D[i+2] = castOp(s2); D[i+3] = castOp(s3);
}
#endif
for( ; i < width; i++ )
{
ST s0 = ky[0]*((const ST*)src[0])[i] + _delta;
@ -2530,7 +2531,7 @@ template<class CastOp, class VecOp> struct SymmColumnFilter : public ColumnFilte
{
DT* D = (DT*)dst;
i = this->vecOp(src, dst, width);
#if CV_ENABLE_UNROLLED
for( ; i <= width - 4; i += 4 )
{
ST f = ky[0];
@ -2551,7 +2552,7 @@ template<class CastOp, class VecOp> struct SymmColumnFilter : public ColumnFilte
D[i] = castOp(s0); D[i+1] = castOp(s1);
D[i+2] = castOp(s2); D[i+3] = castOp(s3);
}
#endif
for( ; i < width; i++ )
{
ST s0 = _delta;
@ -2608,6 +2609,7 @@ struct SymmColumnSmallFilter : public SymmColumnFilter<CastOp, VecOp>
{
if( is_1_2_1 )
{
#if CV_ENABLE_UNROLLED
for( ; i <= width - 4; i += 4 )
{
ST s0 = S0[i] + S1[i]*2 + S2[i] + _delta;
@ -2620,9 +2622,17 @@ struct SymmColumnSmallFilter : public SymmColumnFilter<CastOp, VecOp>
D[i+2] = castOp(s0);
D[i+3] = castOp(s1);
}
#else
for( ; i < width; i ++ )
{
ST s0 = S0[i] + S1[i]*2 + S2[i] + _delta;
D[i] = castOp(s0);
}
#endif
}
else if( is_1_m2_1 )
{
#if CV_ENABLE_UNROLLED
for( ; i <= width - 4; i += 4 )
{
ST s0 = S0[i] - S1[i]*2 + S2[i] + _delta;
@ -2635,9 +2645,17 @@ struct SymmColumnSmallFilter : public SymmColumnFilter<CastOp, VecOp>
D[i+2] = castOp(s0);
D[i+3] = castOp(s1);
}
#else
for( ; i < width; i ++ )
{
ST s0 = S0[i] - S1[i]*2 + S2[i] + _delta;
D[i] = castOp(s0);
}
#endif
}
else
{
#if CV_ENABLE_UNROLLED
for( ; i <= width - 4; i += 4 )
{
ST s0 = (S0[i] + S2[i])*f1 + S1[i]*f0 + _delta;
@ -2650,8 +2668,14 @@ struct SymmColumnSmallFilter : public SymmColumnFilter<CastOp, VecOp>
D[i+2] = castOp(s0);
D[i+3] = castOp(s1);
}
#else
for( ; i < width; i ++ )
{
ST s0 = (S0[i] + S2[i])*f1 + S1[i]*f0 + _delta;
D[i] = castOp(s0);
}
#endif
}
for( ; i < width; i++ )
D[i] = castOp((S0[i] + S2[i])*f1 + S1[i]*f0 + _delta);
}
@ -2661,7 +2685,7 @@ struct SymmColumnSmallFilter : public SymmColumnFilter<CastOp, VecOp>
{
if( f1 < 0 )
std::swap(S0, S2);
#if CV_ENABLE_UNROLLED
for( ; i <= width - 4; i += 4 )
{
ST s0 = S2[i] - S0[i] + _delta;
@ -2674,12 +2698,19 @@ struct SymmColumnSmallFilter : public SymmColumnFilter<CastOp, VecOp>
D[i+2] = castOp(s0);
D[i+3] = castOp(s1);
}
#else
for( ; i < width; i ++ )
{
ST s0 = S2[i] - S0[i] + _delta;
D[i] = castOp(s0);
}
#endif
if( f1 < 0 )
std::swap(S0, S2);
}
else
{
#if CV_ENABLE_UNROLLED
for( ; i <= width - 4; i += 4 )
{
ST s0 = (S2[i] - S0[i])*f1 + _delta;
@ -2692,6 +2723,7 @@ struct SymmColumnSmallFilter : public SymmColumnFilter<CastOp, VecOp>
D[i+2] = castOp(s0);
D[i+3] = castOp(s1);
}
#endif
}
for( ; i < width; i++ )
@ -3043,7 +3075,7 @@ template<typename ST, class CastOp, class VecOp> struct Filter2D : public BaseFi
kp[k] = (const ST*)src[pt[k].y] + pt[k].x*cn;
i = vecOp((const uchar**)kp, dst, width);
#if CV_ENABLE_UNROLLED
for( ; i <= width - 4; i += 4 )
{
KT s0 = _delta, s1 = _delta, s2 = _delta, s3 = _delta;
@ -3061,7 +3093,7 @@ template<typename ST, class CastOp, class VecOp> struct Filter2D : public BaseFi
D[i] = castOp(s0); D[i+1] = castOp(s1);
D[i+2] = castOp(s2); D[i+3] = castOp(s3);
}
#endif
for( ; i < width; i++ )
{
KT s0 = _delta;

View File

@ -60,12 +60,12 @@ cv::Mat cv::getGaborKernel( Size ksize, double sigma, double theta,
if( ksize.width > 0 )
xmax = ksize.width/2;
else
xmax = (int)std::max(fabs(nstds*sigma_x*c), fabs(nstds*sigma_y*s));
xmax = std::max(fabs(nstds*sigma_x*c), fabs(nstds*sigma_y*s));
if( ksize.height > 0 )
ymax = ksize.height/2;
else
ymax = (int)std::max(fabs(nstds*sigma_x*s), fabs(nstds*sigma_y*c));
ymax = std::max(fabs(nstds*sigma_x*s), fabs(nstds*sigma_y*c));
xmin = -xmax;
ymin = -ymax;

View File

@ -439,8 +439,8 @@ static char segSegInt( Point2f a, Point2f b, Point2f c, Point2f d, Point2f& p, P
(0.0 > t) || (t > 1.0) )
code = '0';
p.x = (float)(a.x + s * ( b.x - a.x ));
p.y = (float)(a.y + s * ( b.y - a.y ));
p.x = a.x + s * ( b.x - a.x );
p.y = a.y + s * ( b.y - a.y );
return code;
}
@ -652,7 +652,7 @@ float cv::intersectConvexConvex( InputArray _p1, InputArray _p2, OutputArray _p1
_p12.release();
return 0.f;
}
area = (float)contourArea(_InputArray(result, nr), false);
area = contourArea(_InputArray(result, nr), false);
}
if( _p12.needed() )

View File

@ -877,6 +877,7 @@ struct VResizeLinear
VecOp vecOp;
int x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
#if CV_ENABLE_UNROLLED
for( ; x <= width - 4; x += 4 )
{
WT t0, t1;
@ -887,7 +888,7 @@ struct VResizeLinear
t1 = S0[x+3]*b0 + S1[x+3]*b1;
dst[x+2] = castOp(t0); dst[x+3] = castOp(t1);
}
#endif
for( ; x < width; x++ )
dst[x] = castOp(S0[x]*b0 + S1[x]*b1);
}
@ -1033,7 +1034,7 @@ struct VResizeLanczos4
CastOp castOp;
VecOp vecOp;
int k, x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
#if CV_ENABLE_UNROLLED
for( ; x <= width - 4; x += 4 )
{
WT b = beta[0];
@ -1050,7 +1051,7 @@ struct VResizeLanczos4
dst[x] = castOp(s0); dst[x+1] = castOp(s1);
dst[x+2] = castOp(s2); dst[x+3] = castOp(s3);
}
#endif
for( ; x < width; x++ )
{
dst[x] = castOp(src[0][x]*beta[0] + src[1][x]*beta[1] +
@ -1161,8 +1162,11 @@ static void resizeAreaFast_( const Mat& src, Mat& dst, const int* ofs, const int
{
const T* S = (const T*)(src.data + src.step*sy0) + xofs[dx];
WT sum = 0;
for( k = 0; k <= area - 4; k += 4 )
k=0;
#if CV_ENABLE_UNROLLED
for( ; k <= area - 4; k += 4 )
sum += S[ofs[k]] + S[ofs[k+1]] + S[ofs[k+2]] + S[ofs[k+3]];
#endif
for( ; k < area; k++ )
sum += S[ofs[k]];

View File

@ -700,7 +700,9 @@ template<class Op, class VecOp> struct MorphColumnFilter : public BaseColumnFilt
for( ; _ksize > 1 && count > 1; count -= 2, D += dststep*2, src += 2 )
{
for( i = i0; i <= width - 4; i += 4 )
i = i0;
#if CV_ENABLE_UNROLLED
for( ; i <= width - 4; i += 4 )
{
const T* sptr = src[1] + i;
T s0 = sptr[0], s1 = sptr[1], s2 = sptr[2], s3 = sptr[3];
@ -724,7 +726,7 @@ template<class Op, class VecOp> struct MorphColumnFilter : public BaseColumnFilt
D[i+dststep+2] = op(s2, sptr[2]);
D[i+dststep+3] = op(s3, sptr[3]);
}
#endif
for( ; i < width; i++ )
{
T s0 = src[1][i];
@ -739,7 +741,9 @@ template<class Op, class VecOp> struct MorphColumnFilter : public BaseColumnFilt
for( ; count > 0; count--, D += dststep, src++ )
{
for( i = i0; i <= width - 4; i += 4 )
i = i0;
#if CV_ENABLE_UNROLLED
for( ; i <= width - 4; i += 4 )
{
const T* sptr = src[0] + i;
T s0 = sptr[0], s1 = sptr[1], s2 = sptr[2], s3 = sptr[3];
@ -754,7 +758,7 @@ template<class Op, class VecOp> struct MorphColumnFilter : public BaseColumnFilt
D[i] = s0; D[i+1] = s1;
D[i+2] = s2; D[i+3] = s3;
}
#endif
for( ; i < width; i++ )
{
T s0 = src[0][i];
@ -801,7 +805,7 @@ template<class Op, class VecOp> struct MorphFilter : BaseFilter
kp[k] = (const T*)src[pt[k].y] + pt[k].x*cn;
i = vecOp(&ptrs[0], nz, dst, width);
#if CV_ENABLE_UNROLLED
for( ; i <= width - 4; i += 4 )
{
const T* sptr = kp[0] + i;
@ -817,7 +821,7 @@ template<class Op, class VecOp> struct MorphFilter : BaseFilter
D[i] = s0; D[i+1] = s1;
D[i+2] = s2; D[i+3] = s3;
}
#endif
for( ; i < width; i++ )
{
T s0 = kp[0][i];
@ -1074,8 +1078,10 @@ public:
{
int row0 = min(cvRound(range.begin() * src.rows / nStripes), src.rows);
int row1 = min(cvRound(range.end() * src.rows / nStripes), src.rows);
//printf("Size = (%d, %d), range[%d,%d), row0 = %d, row1 = %d\n", src.rows, src.cols, range.begin(), range.end(), row0, row1);
if(0)
printf("Size = (%d, %d), range[%d,%d), row0 = %d, row1 = %d\n",
src.rows, src.cols, range.begin(), range.end(), row0, row1);
Mat srcStripe = src.rowRange(row0, row1);
Mat dstStripe = dst.rowRange(row0, row1);
@ -1099,7 +1105,7 @@ private:
Point anchor;
int rowBorderType;
int columnBorderType;
Scalar borderValue;
const Scalar& borderValue;
};
static void morphOp( int op, InputArray _src, OutputArray _dst,

View File

@ -454,6 +454,7 @@ cvPyrMeanShiftFiltering( const CvArr* srcarr, CvArr* dstarr,
{
int row_count = 0;
x = minx;
#if CV_ENABLE_UNROLLED
for( ; x + 3 <= maxx; x += 4, ptr += 12 )
{
int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
@ -481,7 +482,7 @@ cvPyrMeanShiftFiltering( const CvArr* srcarr, CvArr* dstarr,
sx += x+3; row_count++;
}
}
#endif
for( ; x <= maxx; x++, ptr += 3 )
{
int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];

View File

@ -248,8 +248,9 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
{
const uchar* src = (const uchar*)(_src.data + _src.step*i);
uchar* dst = (uchar*)(_dst.data + _dst.step*i);
for( j = j_scalar; j <= roi.width - 4; j += 4 )
j = j_scalar;
#if CV_ENABLE_UNROLLED
for( ; j <= roi.width - 4; j += 4 )
{
uchar t0 = tab[src[j]];
uchar t1 = tab[src[j+1]];
@ -263,7 +264,7 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
dst[j+2] = t0;
dst[j+3] = t1;
}
#endif
for( ; j < roi.width; j++ )
dst[j] = tab[src[j]];
}
@ -619,13 +620,16 @@ getThreshVal_Otsu_8u( const Mat& _src )
for( i = 0; i < size.height; i++ )
{
const uchar* src = _src.data + _src.step*i;
for( j = 0; j <= size.width - 4; j += 4 )
j = 0;
#if CV_ENABLE_UNROLLED
for( ; j <= size.width - 4; j += 4 )
{
int v0 = src[j], v1 = src[j+1];
h[v0]++; h[v1]++;
v0 = src[j+2]; v1 = src[j+3];
h[v0]++; h[v1]++;
}
#endif
for( ; j < size.width; j++ )
h[src[j]]++;
}
@ -682,8 +686,10 @@ public:
{
int row0 = std::min(cvRound(range.begin() * src.rows / nStripes), src.rows);
int row1 = std::min(cvRound(range.end() * src.rows / nStripes), src.rows);
//printf("Size = (%d, %d), range[%d,%d), row0 = %d, row1 = %d\n", src.rows, src.cols, range.begin(), range.end(), row0, row1);
if(0)
printf("Size = (%d, %d), range[%d,%d), row0 = %d, row1 = %d\n",
src.rows, src.cols, range.begin(), range.end(), row0, row1);
Mat srcStripe = src.rowRange(row0, row1);
Mat dstStripe = dst.rowRange(row0, row1);