mirror of
https://github.com/opencv/opencv.git
synced 2024-11-29 05:29:54 +08:00
cv::resize area 2x
This commit is contained in:
parent
12001a42f9
commit
c0b702a994
@ -365,8 +365,10 @@ void cv::Canny( InputArray _src, OutputArray _dst,
|
||||
for ( ; j <= width - 8; j += 8)
|
||||
{
|
||||
int16x8_t v_dx = vld1q_s16(_dx + j), v_dy = vld1q_s16(_dy + j);
|
||||
vst1q_s32(_norm + j, vaddq_s32(vmovl_s16(vget_low_s16(v_dx)), vmovl_s16(vget_low_s16(v_dy))));
|
||||
vst1q_s32(_norm + j + 4, vaddq_s32(vmovl_s16(vget_high_s16(v_dx)), vmovl_s16(vget_high_s16(v_dy))));
|
||||
vst1q_s32(_norm + j, vaddq_s32(vabsq_s32(vmovl_s16(vget_low_s16(v_dx))),
|
||||
vabsq_s32(vmovl_s16(vget_low_s16(v_dy)))));
|
||||
vst1q_s32(_norm + j + 4, vaddq_s32(vabsq_s32(vmovl_s16(vget_high_s16(v_dx))),
|
||||
vabsq_s32(vmovl_s16(vget_high_s16(v_dy)))));
|
||||
}
|
||||
#endif
|
||||
for ( ; j < width; ++j)
|
||||
@ -397,13 +399,13 @@ void cv::Canny( InputArray _src, OutputArray _dst,
|
||||
for ( ; j <= width - 8; j += 8)
|
||||
{
|
||||
int16x8_t v_dx = vld1q_s16(_dx + j), v_dy = vld1q_s16(_dy + j);
|
||||
int32x4_t v_dxp = vmovl_s16(vget_low_s16(v_dx)), v_dyp = vmovl_s16(vget_low_s16(v_dy));
|
||||
int32x4_t v_dst = vaddq_s32(vmulq_s32(v_dxp, v_dxp), vmulq_s32(v_dyp, v_dyp));
|
||||
int16x4_t v_dxp = vget_low_s16(v_dx), v_dyp = vget_low_s16(v_dy);
|
||||
int32x4_t v_dst = vmlal_s16(vmull_s16(v_dxp, v_dxp), v_dyp, v_dyp);
|
||||
vst1q_s32(_norm + j, v_dst);
|
||||
|
||||
v_dxp = vmovl_s16(vget_high_s16(v_dx)), v_dyp = vmovl_s16(vget_high_s16(v_dy));
|
||||
v_dst = vaddq_s32(vmulq_s32(v_dxp, v_dxp), vmulq_s32(v_dyp, v_dyp));
|
||||
vst1q_s32(_norm + j, v_dst);
|
||||
v_dxp = vget_high_s16(v_dx), v_dyp = vget_high_s16(v_dy);
|
||||
v_dst = vmlal_s16(vmull_s16(v_dxp, v_dxp), v_dyp, v_dyp);
|
||||
vst1q_s32(_norm + j + 4, v_dst);
|
||||
}
|
||||
#endif
|
||||
for ( ; j < width; ++j)
|
||||
|
@ -147,16 +147,15 @@ static void calcHarris( const Mat& _cov, Mat& _dst, double k )
|
||||
}
|
||||
}
|
||||
#elif CV_NEON
|
||||
float32x4_t v_k = vdupq_n_f32((float)k));
|
||||
float32x4_t v_k = vdupq_n_f32((float)k);
|
||||
|
||||
for( ; j <= size.width - 4; j += 4 )
|
||||
{
|
||||
float32x4x3_t v_src = vld3q_f32(cov + j + 3);
|
||||
float32x4_t v_a = v_src.val[0], v_b = v_src.val[1], v_c = v_src.val[2];
|
||||
float32x4_t v_ac_bb = vsubq_f32(vmulq_f32(v_a, v_c), vmulq_f32(v_b, v_b));
|
||||
float32x4_t v_ac_bb = vmlsq_f32(vmulq_f32(v_a, v_c), v_b, v_b);
|
||||
float32x4_t v_ac = vaddq_f32(v_a, v_c);
|
||||
float32x4_t v_prod = vmulq_f32(v_k, vmulq_f32(v_ac, v_ac));
|
||||
vst1q_f32(dst + j, vsubq_f32(v_ac_bb, v_prod));
|
||||
vst1q_f32(dst + j, vmlsq_f32(v_ac_bb, v_k, vmulq_f32(v_ac, v_ac)));
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -619,10 +618,11 @@ void cv::preCornerDetect( InputArray _src, OutputArray _dst, int ksize, int bord
|
||||
if( src.depth() == CV_8U )
|
||||
factor *= 255;
|
||||
factor = 1./(factor * factor * factor);
|
||||
float factor_f = (float)factor;
|
||||
|
||||
#if CV_SSE2
|
||||
volatile bool haveSSE2 = cv::checkHardwareSupport(CV_CPU_SSE2);
|
||||
__m128 v_factor = _mm_set1_ps((float)factor), v_m2 = _mm_set1_ps(-2.0f);
|
||||
__m128 v_factor = _mm_set1_ps(factor_f), v_m2 = _mm_set1_ps(-2.0f);
|
||||
#endif
|
||||
|
||||
Size size = src.size();
|
||||
@ -657,10 +657,10 @@ void cv::preCornerDetect( InputArray _src, OutputArray _dst, int ksize, int bord
|
||||
for( ; j <= size.width - 4; j += 4 )
|
||||
{
|
||||
float32x4_t v_dx = vld1q_f32(dxdata + j), v_dy = vld1q_f32(dydata + j);
|
||||
float32x4_t v_s1 = vmulq_f32(v_dx, vmulq_f32(v_dx, vld1q_f32(d2ydata + j)));
|
||||
float32x4_t v_s2 = vmulq_f32(v_dy, vmulq_f32(v_dy, vld1q_f32(d2xdata + j)));
|
||||
float32x4_t v_s3 = vmulq_f32(v_dx, vmulq_f32(v_dy, vld1q_f32(dxydata + j)));
|
||||
vst1q_f32(dstdata + j, vaddq_f32(vaddq_f32(v_s1, v_s2), vmulq_n_f32(v_s3, -2.0f)));
|
||||
float32x4_t v_s = vmulq_f32(v_dx, vmulq_f32(v_dx, vld1q_f32(d2ydata + j)));
|
||||
v_s = vmlaq_f32(v_s, vld1q_f32(d2xdata + j), vmulq_f32(v_dy, v_dy));
|
||||
v_s = vmlaq_f32(v_s, vld1q_f32(dxydata + j), vmulq_n_f32(vmulq_f32(v_dy, v_dx), -2));
|
||||
vst1q_f32(dstdata + j, vmulq_n_f32(v_s, factor_f));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -1322,7 +1322,72 @@ struct ResizeAreaFastNoVec
|
||||
{ return 0; }
|
||||
};
|
||||
|
||||
#if CV_SSE2
|
||||
#if CV_NEON
|
||||
|
||||
class ResizeAreaFastVec_SIMD_8u
|
||||
{
|
||||
public:
|
||||
ResizeAreaFastVec_SIMD_8u(int _cn, int _step) :
|
||||
cn(_cn), step(_step)
|
||||
{
|
||||
}
|
||||
|
||||
int operator() (const uchar* S, uchar* D, int w) const
|
||||
{
|
||||
int dx = 0;
|
||||
const uchar* S0 = S, * S1 = S0 + step;
|
||||
|
||||
uint16x8_t v_2 = vdupq_n_u16(2);
|
||||
|
||||
if (cn == 1)
|
||||
{
|
||||
for ( ; dx <= w - 16; dx += 16, S0 += 32, S1 += 32, D += 16)
|
||||
{
|
||||
uint8x16x2_t v_row0 = vld2q_u8(S0), v_row1 = vld2q_u8(S1);
|
||||
|
||||
uint16x8_t v_dst0 = vaddl_u8(vget_low_u8(v_row0.val[0]), vget_low_u8(v_row0.val[1]));
|
||||
v_dst0 = vaddq_u16(v_dst0, vaddl_u8(vget_low_u8(v_row1.val[0]), vget_low_u8(v_row1.val[1])));
|
||||
v_dst0 = vshrq_n_u16(vaddq_u16(v_dst0, v_2), 2);
|
||||
|
||||
uint16x8_t v_dst1 = vaddl_u8(vget_high_u8(v_row0.val[0]), vget_high_u8(v_row0.val[1]));
|
||||
v_dst1 = vaddq_u16(v_dst1, vaddl_u8(vget_high_u8(v_row1.val[0]), vget_high_u8(v_row1.val[1])));
|
||||
v_dst1 = vshrq_n_u16(vaddq_u16(v_dst1, v_2), 2);
|
||||
|
||||
vst1q_u8(D, vcombine_u8(vmovn_u16(v_dst0), vmovn_u16(v_dst1)));
|
||||
}
|
||||
}
|
||||
else if (cn == 4)
|
||||
{
|
||||
for ( ; dx <= w - 8; dx += 8, S0 += 16, S1 += 16, D += 8)
|
||||
{
|
||||
uint8x16_t v_row0 = vld1q_u8(S0), v_row1 = vld1q_u8(S1);
|
||||
|
||||
uint16x8_t v_row00 = vmovl_u8(vget_low_u8(v_row0));
|
||||
uint16x8_t v_row01 = vmovl_u8(vget_high_u8(v_row0));
|
||||
uint16x8_t v_row10 = vmovl_u8(vget_low_u8(v_row1));
|
||||
uint16x8_t v_row11 = vmovl_u8(vget_high_u8(v_row1));
|
||||
|
||||
uint16x4_t v_p0 = vadd_u16(vadd_u16(vget_low_u16(v_row00), vget_high_u16(v_row00)),
|
||||
vadd_u16(vget_low_u16(v_row10), vget_high_u16(v_row10)));
|
||||
uint16x4_t v_p1 = vadd_u16(vadd_u16(vget_low_u16(v_row01), vget_high_u16(v_row01)),
|
||||
vadd_u16(vget_low_u16(v_row11), vget_high_u16(v_row11)));
|
||||
uint16x8_t v_dst = vshrq_n_u16(vaddq_u16(vcombine_u16(v_p0, v_p1), v_2), 2);
|
||||
|
||||
vst1_u8(D, vmovn_u16(v_dst));
|
||||
}
|
||||
}
|
||||
|
||||
return dx;
|
||||
}
|
||||
|
||||
private:
|
||||
int cn, step;
|
||||
};
|
||||
|
||||
typedef ResizeAreaFastNoVec<ushort, ushort> ResizeAreaFastVec_SIMD_16u;
|
||||
|
||||
#elif CV_SSE2
|
||||
|
||||
class ResizeAreaFastVec_SIMD_8u
|
||||
{
|
||||
public:
|
||||
|
@ -1545,4 +1545,52 @@ TEST(Imgproc_InitUndistortMap, accuracy) { CV_UndistortMapTest test; test.safe_r
|
||||
TEST(Imgproc_GetRectSubPix, accuracy) { CV_GetRectSubPixTest test; test.safe_run(); }
|
||||
TEST(Imgproc_GetQuadSubPix, accuracy) { CV_GetQuadSubPixTest test; test.safe_run(); }
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename T, typename WT>
|
||||
void resizeArea(const cv::Mat & src, cv::Mat & dst)
|
||||
{
|
||||
int cn = src.channels();
|
||||
|
||||
for (int y = 0; y < dst.rows; ++y)
|
||||
{
|
||||
const T * sptr0 = src.ptr<T>(y << 1);
|
||||
const T * sptr1 = src.ptr<T>((y << 1) + 1);
|
||||
T * dptr = dst.ptr<T>(y);
|
||||
|
||||
for (int x = 0; x < dst.cols * cn; x += cn)
|
||||
{
|
||||
int x1 = x << 1;
|
||||
|
||||
for (int c = 0; c < cn; ++c)
|
||||
{
|
||||
WT sum = WT(sptr0[x1 + c]) + WT(sptr0[x1 + c + cn]);
|
||||
sum += WT(sptr1[x1 + c]) + WT(sptr1[x1 + c + cn]) + (WT)(2);
|
||||
|
||||
dptr[x + c] = cv::saturate_cast<T>(sum >> 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Resize, Area_half)
|
||||
{
|
||||
int types[] = { CV_8UC1, CV_8UC4 };
|
||||
|
||||
for (int i = 0, size = sizeof(types) / sizeof(types[0]); i < size; ++i)
|
||||
{
|
||||
int type = types[i];
|
||||
cv::Mat src(100, 100, type), dst_actual(50, 50, type), dst_reference(50, 50, type);
|
||||
|
||||
if (CV_MAT_DEPTH(type) == CV_8U)
|
||||
resizeArea<uchar, ushort>(src, dst_reference);
|
||||
else
|
||||
CV_Assert(0);
|
||||
|
||||
cv::resize(src, dst_actual, dst_actual.size(), 0, 0, cv::INTER_AREA);
|
||||
|
||||
ASSERT_EQ(0, cvtest::norm(dst_reference, dst_actual, cv::NORM_INF));
|
||||
}
|
||||
}
|
||||
|
||||
/* End of file. */
|
||||
|
Loading…
Reference in New Issue
Block a user