From 09fbc78a97ff23b34bb92f858964468d49c573e8 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Fri, 26 Sep 2014 10:55:40 +0000 Subject: [PATCH] cv::threshold --- modules/imgproc/src/thresh.cpp | 152 +++++++++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) diff --git a/modules/imgproc/src/thresh.cpp b/modules/imgproc/src/thresh.cpp index 09a1f6eeab..721a596e70 100644 --- a/modules/imgproc/src/thresh.cpp +++ b/modules/imgproc/src/thresh.cpp @@ -264,6 +264,74 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type ) } } } +#elif CV_NEON + uint8x16_t v_thresh = vdupq_n_u8(thresh), v_maxval = vdupq_n_u8(maxval); + + switch( type ) + { + case THRESH_BINARY: + for( i = 0; i < roi.height; i++ ) + { + const uchar* src = _src.ptr() + src_step*i; + uchar* dst = _dst.ptr() + dst_step*i; + + for ( j_scalar = 0; j_scalar <= roi.width - 16; j_scalar += 16) + vst1q_u8(dst + j_scalar, vandq_u8(vcgtq_u8(vld1q_u8(src + j_scalar), v_thresh), v_maxval)); + } + break; + + case THRESH_BINARY_INV: + for( i = 0; i < roi.height; i++ ) + { + const uchar* src = _src.ptr() + src_step*i; + uchar* dst = _dst.ptr() + dst_step*i; + + for ( j_scalar = 0; j_scalar <= roi.width - 16; j_scalar += 16) + vst1q_u8(dst + j_scalar, vandq_u8(vcleq_u8(vld1q_u8(src + j_scalar), v_thresh), v_maxval)); + } + break; + + case THRESH_TRUNC: + for( i = 0; i < roi.height; i++ ) + { + const uchar* src = _src.ptr() + src_step*i; + uchar* dst = _dst.ptr() + dst_step*i; + + for ( j_scalar = 0; j_scalar <= roi.width - 16; j_scalar += 16) + vst1q_u8(dst + j_scalar, vminq_u8(vld1q_u8(src + j_scalar), v_thresh)); + } + break; + + case THRESH_TOZERO: + for( i = 0; i < roi.height; i++ ) + { + const uchar* src = _src.ptr() + src_step*i; + uchar* dst = _dst.ptr() + dst_step*i; + + for ( j_scalar = 0; j_scalar <= roi.width - 16; j_scalar += 16) + { + uint8x16_t v_src = vld1q_u8(src + j_scalar), v_mask = vcgtq_u8(v_src, v_thresh); + vst1q_u8(dst + j_scalar, vandq_u8(v_mask, v_src)); + } + } + break; + + case THRESH_TOZERO_INV: + for( i = 0; i < roi.height; i++ ) + { + const uchar* src = _src.ptr() + src_step*i; + uchar* dst = _dst.ptr() + dst_step*i; + + for ( j_scalar = 0; j_scalar <= roi.width - 16; j_scalar += 16) + { + uint8x16_t v_src = vld1q_u8(src + j_scalar), v_mask = vcleq_u8(v_src, v_thresh); + vst1q_u8(dst + j_scalar, vandq_u8(v_mask, v_src)); + } + } + break; + default: + return CV_Error( CV_StsBadArg, "" ); + } #endif if( j_scalar < roi.width ) @@ -382,6 +450,14 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type ) _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } + #elif CV_NEON + int16x8_t v_thresh = vdupq_n_s16(thresh), v_maxval = vdupq_n_s16(maxval); + + for( ; j <= roi.width - 8; j += 8 ) + { + uint16x8_t v_mask = vcgtq_s16(vld1q_s16(src + j), v_thresh); + vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_maxval)); + } #endif for( ; j < roi.width; j++ ) @@ -410,6 +486,14 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type ) _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } + #elif CV_NEON + int16x8_t v_thresh = vdupq_n_s16(thresh), v_maxval = vdupq_n_s16(maxval); + + for( ; j <= roi.width - 8; j += 8 ) + { + uint16x8_t v_mask = vcleq_s16(vld1q_s16(src + j), v_thresh); + vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_maxval)); + } #endif for( ; j < roi.width; j++ ) @@ -436,6 +520,11 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type ) _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } + #elif CV_NEON + int16x8_t v_thresh = vdupq_n_s16(thresh); + + for( ; j <= roi.width - 8; j += 8 ) + vst1q_s16(dst + j, vminq_s16(vld1q_s16(src + j), v_thresh)); #endif for( ; j < roi.width; j++ ) @@ -462,6 +551,15 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type ) _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } + #elif CV_NEON + int16x8_t v_thresh = vdupq_n_s16(thresh); + + for( ; j <= roi.width - 8; j += 8 ) + { + int16x8_t v_src = vld1q_s16(src + j); + uint16x8_t v_mask = vcgtq_s16(v_src, v_thresh); + vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_src)); + } #endif for( ; j < roi.width; j++ ) @@ -491,6 +589,15 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type ) _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } + #elif CV_NEON + int16x8_t v_thresh = vdupq_n_s16(thresh); + + for( ; j <= roi.width - 8; j += 8 ) + { + int16x8_t v_src = vld1q_s16(src + j); + uint16x8_t v_mask = vcleq_s16(v_src, v_thresh); + vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_src)); + } #endif for( ; j < roi.width; j++ ) { @@ -576,6 +683,16 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type ) _mm_storeu_ps( dst + j + 4, v1 ); } } +#elif CV_NEON + float32x4_t v_thresh = vdupq_n_f32(thresh); + uint32x4_t v_maxval = vreinterpretq_u32_f32(vdupq_n_f32(maxval)); + + for( ; j <= roi.width - 4; j += 4 ) + { + float32x4_t v_src = vld1q_f32(src + j); + uint32x4_t v_dst = vandq_u32(vcgtq_f32(v_src, v_thresh), v_maxval); + vst1q_f32(dst + j, vreinterpretq_f32_u32(v_dst)); + } #endif for( ; j < roi.width; j++ ) @@ -604,6 +721,16 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type ) _mm_storeu_ps( dst + j + 4, v1 ); } } +#elif CV_NEON + float32x4_t v_thresh = vdupq_n_f32(thresh); + uint32x4_t v_maxval = vreinterpretq_u32_f32(vdupq_n_f32(maxval)); + + for( ; j <= roi.width - 4; j += 4 ) + { + float32x4_t v_src = vld1q_f32(src + j); + uint32x4_t v_dst = vandq_u32(vcleq_f32(v_src, v_thresh), v_maxval); + vst1q_f32(dst + j, vreinterpretq_f32_u32(v_dst)); + } #endif for( ; j < roi.width; j++ ) @@ -630,6 +757,11 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type ) _mm_storeu_ps( dst + j + 4, v1 ); } } +#elif CV_NEON + float32x4_t v_thresh = vdupq_n_f32(thresh); + + for( ; j <= roi.width - 4; j += 4 ) + vst1q_f32(dst + j, vminq_f32(vld1q_f32(src + j), v_thresh)); #endif for( ; j < roi.width; j++ ) @@ -656,6 +788,16 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type ) _mm_storeu_ps( dst + j + 4, v1 ); } } +#elif CV_NEON + float32x4_t v_thresh = vdupq_n_f32(thresh); + + for( ; j <= roi.width - 4; j += 4 ) + { + float32x4_t v_src = vld1q_f32(src + j); + uint32x4_t v_dst = vandq_u32(vcgtq_f32(v_src, v_thresh), + vreinterpretq_u32_f32(v_src)); + vst1q_f32(dst + j, vreinterpretq_f32_u32(v_dst)); + } #endif for( ; j < roi.width; j++ ) @@ -685,6 +827,16 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type ) _mm_storeu_ps( dst + j + 4, v1 ); } } +#elif CV_NEON + float32x4_t v_thresh = vdupq_n_f32(thresh); + + for( ; j <= roi.width - 4; j += 4 ) + { + float32x4_t v_src = vld1q_f32(src + j); + uint32x4_t v_dst = vandq_u32(vcleq_f32(v_src, v_thresh), + vreinterpretq_u32_f32(v_src)); + vst1q_f32(dst + j, vreinterpretq_f32_u32(v_dst)); + } #endif for( ; j < roi.width; j++ ) {