cv::threshold

This commit is contained in:
Ilya Lavrenov 2014-09-26 10:55:40 +00:00
parent b4fc87c380
commit 09fbc78a97

View File

@ -264,6 +264,74 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
}
}
}
#elif CV_NEON
uint8x16_t v_thresh = vdupq_n_u8(thresh), v_maxval = vdupq_n_u8(maxval);
switch( type )
{
case THRESH_BINARY:
for( i = 0; i < roi.height; i++ )
{
const uchar* src = _src.ptr() + src_step*i;
uchar* dst = _dst.ptr() + dst_step*i;
for ( j_scalar = 0; j_scalar <= roi.width - 16; j_scalar += 16)
vst1q_u8(dst + j_scalar, vandq_u8(vcgtq_u8(vld1q_u8(src + j_scalar), v_thresh), v_maxval));
}
break;
case THRESH_BINARY_INV:
for( i = 0; i < roi.height; i++ )
{
const uchar* src = _src.ptr() + src_step*i;
uchar* dst = _dst.ptr() + dst_step*i;
for ( j_scalar = 0; j_scalar <= roi.width - 16; j_scalar += 16)
vst1q_u8(dst + j_scalar, vandq_u8(vcleq_u8(vld1q_u8(src + j_scalar), v_thresh), v_maxval));
}
break;
case THRESH_TRUNC:
for( i = 0; i < roi.height; i++ )
{
const uchar* src = _src.ptr() + src_step*i;
uchar* dst = _dst.ptr() + dst_step*i;
for ( j_scalar = 0; j_scalar <= roi.width - 16; j_scalar += 16)
vst1q_u8(dst + j_scalar, vminq_u8(vld1q_u8(src + j_scalar), v_thresh));
}
break;
case THRESH_TOZERO:
for( i = 0; i < roi.height; i++ )
{
const uchar* src = _src.ptr() + src_step*i;
uchar* dst = _dst.ptr() + dst_step*i;
for ( j_scalar = 0; j_scalar <= roi.width - 16; j_scalar += 16)
{
uint8x16_t v_src = vld1q_u8(src + j_scalar), v_mask = vcgtq_u8(v_src, v_thresh);
vst1q_u8(dst + j_scalar, vandq_u8(v_mask, v_src));
}
}
break;
case THRESH_TOZERO_INV:
for( i = 0; i < roi.height; i++ )
{
const uchar* src = _src.ptr() + src_step*i;
uchar* dst = _dst.ptr() + dst_step*i;
for ( j_scalar = 0; j_scalar <= roi.width - 16; j_scalar += 16)
{
uint8x16_t v_src = vld1q_u8(src + j_scalar), v_mask = vcleq_u8(v_src, v_thresh);
vst1q_u8(dst + j_scalar, vandq_u8(v_mask, v_src));
}
}
break;
default:
return CV_Error( CV_StsBadArg, "" );
}
#endif
if( j_scalar < roi.width )
@ -382,6 +450,14 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
_mm_storeu_si128((__m128i*)(dst + j + 8), v1 );
}
}
#elif CV_NEON
int16x8_t v_thresh = vdupq_n_s16(thresh), v_maxval = vdupq_n_s16(maxval);
for( ; j <= roi.width - 8; j += 8 )
{
uint16x8_t v_mask = vcgtq_s16(vld1q_s16(src + j), v_thresh);
vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_maxval));
}
#endif
for( ; j < roi.width; j++ )
@ -410,6 +486,14 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
_mm_storeu_si128((__m128i*)(dst + j + 8), v1 );
}
}
#elif CV_NEON
int16x8_t v_thresh = vdupq_n_s16(thresh), v_maxval = vdupq_n_s16(maxval);
for( ; j <= roi.width - 8; j += 8 )
{
uint16x8_t v_mask = vcleq_s16(vld1q_s16(src + j), v_thresh);
vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_maxval));
}
#endif
for( ; j < roi.width; j++ )
@ -436,6 +520,11 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
_mm_storeu_si128((__m128i*)(dst + j + 8), v1 );
}
}
#elif CV_NEON
int16x8_t v_thresh = vdupq_n_s16(thresh);
for( ; j <= roi.width - 8; j += 8 )
vst1q_s16(dst + j, vminq_s16(vld1q_s16(src + j), v_thresh));
#endif
for( ; j < roi.width; j++ )
@ -462,6 +551,15 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
_mm_storeu_si128((__m128i*)(dst + j + 8), v1 );
}
}
#elif CV_NEON
int16x8_t v_thresh = vdupq_n_s16(thresh);
for( ; j <= roi.width - 8; j += 8 )
{
int16x8_t v_src = vld1q_s16(src + j);
uint16x8_t v_mask = vcgtq_s16(v_src, v_thresh);
vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_src));
}
#endif
for( ; j < roi.width; j++ )
@ -491,6 +589,15 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
_mm_storeu_si128((__m128i*)(dst + j + 8), v1 );
}
}
#elif CV_NEON
int16x8_t v_thresh = vdupq_n_s16(thresh);
for( ; j <= roi.width - 8; j += 8 )
{
int16x8_t v_src = vld1q_s16(src + j);
uint16x8_t v_mask = vcleq_s16(v_src, v_thresh);
vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_src));
}
#endif
for( ; j < roi.width; j++ )
{
@ -576,6 +683,16 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
_mm_storeu_ps( dst + j + 4, v1 );
}
}
#elif CV_NEON
float32x4_t v_thresh = vdupq_n_f32(thresh);
uint32x4_t v_maxval = vreinterpretq_u32_f32(vdupq_n_f32(maxval));
for( ; j <= roi.width - 4; j += 4 )
{
float32x4_t v_src = vld1q_f32(src + j);
uint32x4_t v_dst = vandq_u32(vcgtq_f32(v_src, v_thresh), v_maxval);
vst1q_f32(dst + j, vreinterpretq_f32_u32(v_dst));
}
#endif
for( ; j < roi.width; j++ )
@ -604,6 +721,16 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
_mm_storeu_ps( dst + j + 4, v1 );
}
}
#elif CV_NEON
float32x4_t v_thresh = vdupq_n_f32(thresh);
uint32x4_t v_maxval = vreinterpretq_u32_f32(vdupq_n_f32(maxval));
for( ; j <= roi.width - 4; j += 4 )
{
float32x4_t v_src = vld1q_f32(src + j);
uint32x4_t v_dst = vandq_u32(vcleq_f32(v_src, v_thresh), v_maxval);
vst1q_f32(dst + j, vreinterpretq_f32_u32(v_dst));
}
#endif
for( ; j < roi.width; j++ )
@ -630,6 +757,11 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
_mm_storeu_ps( dst + j + 4, v1 );
}
}
#elif CV_NEON
float32x4_t v_thresh = vdupq_n_f32(thresh);
for( ; j <= roi.width - 4; j += 4 )
vst1q_f32(dst + j, vminq_f32(vld1q_f32(src + j), v_thresh));
#endif
for( ; j < roi.width; j++ )
@ -656,6 +788,16 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
_mm_storeu_ps( dst + j + 4, v1 );
}
}
#elif CV_NEON
float32x4_t v_thresh = vdupq_n_f32(thresh);
for( ; j <= roi.width - 4; j += 4 )
{
float32x4_t v_src = vld1q_f32(src + j);
uint32x4_t v_dst = vandq_u32(vcgtq_f32(v_src, v_thresh),
vreinterpretq_u32_f32(v_src));
vst1q_f32(dst + j, vreinterpretq_f32_u32(v_dst));
}
#endif
for( ; j < roi.width; j++ )
@ -685,6 +827,16 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
_mm_storeu_ps( dst + j + 4, v1 );
}
}
#elif CV_NEON
float32x4_t v_thresh = vdupq_n_f32(thresh);
for( ; j <= roi.width - 4; j += 4 )
{
float32x4_t v_src = vld1q_f32(src + j);
uint32x4_t v_dst = vandq_u32(vcleq_f32(v_src, v_thresh),
vreinterpretq_u32_f32(v_src));
vst1q_f32(dst + j, vreinterpretq_f32_u32(v_dst));
}
#endif
for( ; j < roi.width; j++ )
{