From 3dafdd6a7969f11dd4288b84cad7b5fbcc335ab8 Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Tue, 22 Nov 2011 15:16:45 +0000 Subject: [PATCH] added 16s support to cv::threshold. --- modules/imgproc/src/thresh.cpp | 303 ++++++++++++++++++++++----- modules/imgproc/test/test_thresh.cpp | 151 ++++++++++--- 2 files changed, 372 insertions(+), 82 deletions(-) diff --git a/modules/imgproc/src/thresh.cpp b/modules/imgproc/src/thresh.cpp index 4579c8e583..926f9d4770 100644 --- a/modules/imgproc/src/thresh.cpp +++ b/modules/imgproc/src/thresh.cpp @@ -272,13 +272,13 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type ) static void -thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type ) +thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type ) { int i, j; Size roi = _src.size(); roi.width *= _src.channels(); - const float* src = (const float*)_src.data; - float* dst = (float*)_dst.data; + const short* src = (const short*)_src.data; + short* dst = (short*)_dst.data; size_t src_step = _src.step/sizeof(src[0]); size_t dst_step = _dst.step/sizeof(dst[0]); @@ -301,18 +301,18 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type ) #if CV_SSE2 if( useSIMD ) { - __m128 thresh4 = _mm_set1_ps(thresh), maxval4 = _mm_set1_ps(maxval); - for( ; j <= roi.width - 8; j += 8 ) + __m128i thresh8 = _mm_set1_epi16(thresh), maxval8 = _mm_set1_epi16(maxval); + for( ; j <= roi.width - 16; j += 16 ) { - __m128 v0, v1; - v0 = _mm_loadu_ps( src + j ); - v1 = _mm_loadu_ps( src + j + 4 ); - v0 = _mm_cmpgt_ps( v0, thresh4 ); - v1 = _mm_cmpgt_ps( v1, thresh4 ); - v0 = _mm_and_ps( v0, maxval4 ); - v1 = _mm_and_ps( v1, maxval4 ); - _mm_storeu_ps( dst + j, v0 ); - _mm_storeu_ps( dst + j + 4, v1 ); + __m128i v0, v1; + v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); + v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); + v0 = _mm_cmpgt_epi16( v0, thresh8 ); + v1 = _mm_cmpgt_epi16( v1, thresh8 ); + v0 = _mm_and_si128( v0, maxval8 ); + v1 = _mm_and_si128( v1, maxval8 ); + _mm_storeu_si128((__m128i*)(dst + j), v0 ); + _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #endif @@ -329,18 +329,18 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type ) #if CV_SSE2 if( useSIMD ) { - __m128 thresh4 = _mm_set1_ps(thresh), maxval4 = _mm_set1_ps(maxval); - for( ; j <= roi.width - 8; j += 8 ) + __m128i thresh8 = _mm_set1_epi16(thresh), maxval8 = _mm_set1_epi16(maxval); + for( ; j <= roi.width - 16; j += 16 ) { - __m128 v0, v1; - v0 = _mm_loadu_ps( src + j ); - v1 = _mm_loadu_ps( src + j + 4 ); - v0 = _mm_cmple_ps( v0, thresh4 ); - v1 = _mm_cmple_ps( v1, thresh4 ); - v0 = _mm_and_ps( v0, maxval4 ); - v1 = _mm_and_ps( v1, maxval4 ); - _mm_storeu_ps( dst + j, v0 ); - _mm_storeu_ps( dst + j + 4, v1 ); + __m128i v0, v1; + v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); + v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); + v0 = _mm_cmpgt_epi16( v0, thresh8 ); + v1 = _mm_cmpgt_epi16( v1, thresh8 ); + v0 = _mm_andnot_si128( v0, maxval8 ); + v1 = _mm_andnot_si128( v1, maxval8 ); + _mm_storeu_si128((__m128i*)(dst + j), v0 ); + _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #endif @@ -357,16 +357,16 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type ) #if CV_SSE2 if( useSIMD ) { - __m128 thresh4 = _mm_set1_ps(thresh); - for( ; j <= roi.width - 8; j += 8 ) + __m128i thresh8 = _mm_set1_epi16(thresh); + for( ; j <= roi.width - 16; j += 16 ) { - __m128 v0, v1; - v0 = _mm_loadu_ps( src + j ); - v1 = _mm_loadu_ps( src + j + 4 ); - v0 = _mm_min_ps( v0, thresh4 ); - v1 = _mm_min_ps( v1, thresh4 ); - _mm_storeu_ps( dst + j, v0 ); - _mm_storeu_ps( dst + j + 4, v1 ); + __m128i v0, v1; + v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); + v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); + v0 = _mm_min_epi16( v0, thresh8 ); + v1 = _mm_min_epi16( v1, thresh8 ); + _mm_storeu_si128((__m128i*)(dst + j), v0 ); + _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #endif @@ -383,23 +383,23 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type ) #if CV_SSE2 if( useSIMD ) { - __m128 thresh4 = _mm_set1_ps(thresh); - for( ; j <= roi.width - 8; j += 8 ) + __m128i thresh8 = _mm_set1_epi16(thresh); + for( ; j <= roi.width - 16; j += 16 ) { - __m128 v0, v1; - v0 = _mm_loadu_ps( src + j ); - v1 = _mm_loadu_ps( src + j + 4 ); - v0 = _mm_and_ps(v0, _mm_cmpgt_ps(v0, thresh4)); - v1 = _mm_and_ps(v1, _mm_cmpgt_ps(v1, thresh4)); - _mm_storeu_ps( dst + j, v0 ); - _mm_storeu_ps( dst + j + 4, v1 ); + __m128i v0, v1; + v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); + v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); + v0 = _mm_and_si128(v0, _mm_cmpgt_epi16(v0, thresh8)); + v1 = _mm_and_si128(v1, _mm_cmpgt_epi16(v1, thresh8)); + _mm_storeu_si128((__m128i*)(dst + j), v0 ); + _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #endif for( ; j < roi.width; j++ ) { - float v = src[j]; + short v = src[j]; dst[j] = v > thresh ? v : 0; } } @@ -412,22 +412,22 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type ) #if CV_SSE2 if( useSIMD ) { - __m128 thresh4 = _mm_set1_ps(thresh); - for( ; j <= roi.width - 8; j += 8 ) + __m128i thresh8 = _mm_set1_epi16(thresh); + for( ; j <= roi.width - 16; j += 16 ) { - __m128 v0, v1; - v0 = _mm_loadu_ps( src + j ); - v1 = _mm_loadu_ps( src + j + 4 ); - v0 = _mm_and_ps(v0, _mm_cmple_ps(v0, thresh4)); - v1 = _mm_and_ps(v1, _mm_cmple_ps(v1, thresh4)); - _mm_storeu_ps( dst + j, v0 ); - _mm_storeu_ps( dst + j + 4, v1 ); + __m128i v0, v1; + v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); + v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); + v0 = _mm_andnot_si128(_mm_cmpgt_epi16(v0, thresh8), v0); + v1 = _mm_andnot_si128(_mm_cmpgt_epi16(v1, thresh8), v1); + _mm_storeu_si128((__m128i*)(dst + j), v0 ); + _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #endif for( ; j < roi.width; j++ ) { - float v = src[j]; + short v = src[j]; dst[j] = v <= thresh ? v : 0; } } @@ -437,6 +437,173 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type ) } } + +static void +thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type ) +{ + int i, j; + Size roi = _src.size(); + roi.width *= _src.channels(); + const float* src = (const float*)_src.data; + float* dst = (float*)_dst.data; + size_t src_step = _src.step/sizeof(src[0]); + size_t dst_step = _dst.step/sizeof(dst[0]); + +#if CV_SSE2 + volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE); +#endif + + if( _src.isContinuous() && _dst.isContinuous() ) + { + roi.width *= roi.height; + roi.height = 1; + } + + switch( type ) + { + case THRESH_BINARY: + for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) + { + j = 0; +#if CV_SSE2 + if( useSIMD ) + { + __m128 thresh4 = _mm_set1_ps(thresh), maxval4 = _mm_set1_ps(maxval); + for( ; j <= roi.width - 8; j += 8 ) + { + __m128 v0, v1; + v0 = _mm_loadu_ps( src + j ); + v1 = _mm_loadu_ps( src + j + 4 ); + v0 = _mm_cmpgt_ps( v0, thresh4 ); + v1 = _mm_cmpgt_ps( v1, thresh4 ); + v0 = _mm_and_ps( v0, maxval4 ); + v1 = _mm_and_ps( v1, maxval4 ); + _mm_storeu_ps( dst + j, v0 ); + _mm_storeu_ps( dst + j + 4, v1 ); + } + } +#endif + + for( ; j < roi.width; j++ ) + dst[j] = src[j] > thresh ? maxval : 0; + } + break; + + case THRESH_BINARY_INV: + for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) + { + j = 0; +#if CV_SSE2 + if( useSIMD ) + { + __m128 thresh4 = _mm_set1_ps(thresh), maxval4 = _mm_set1_ps(maxval); + for( ; j <= roi.width - 8; j += 8 ) + { + __m128 v0, v1; + v0 = _mm_loadu_ps( src + j ); + v1 = _mm_loadu_ps( src + j + 4 ); + v0 = _mm_cmple_ps( v0, thresh4 ); + v1 = _mm_cmple_ps( v1, thresh4 ); + v0 = _mm_and_ps( v0, maxval4 ); + v1 = _mm_and_ps( v1, maxval4 ); + _mm_storeu_ps( dst + j, v0 ); + _mm_storeu_ps( dst + j + 4, v1 ); + } + } +#endif + + for( ; j < roi.width; j++ ) + dst[j] = src[j] <= thresh ? maxval : 0; + } + break; + + case THRESH_TRUNC: + for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) + { + j = 0; +#if CV_SSE2 + if( useSIMD ) + { + __m128 thresh4 = _mm_set1_ps(thresh); + for( ; j <= roi.width - 8; j += 8 ) + { + __m128 v0, v1; + v0 = _mm_loadu_ps( src + j ); + v1 = _mm_loadu_ps( src + j + 4 ); + v0 = _mm_min_ps( v0, thresh4 ); + v1 = _mm_min_ps( v1, thresh4 ); + _mm_storeu_ps( dst + j, v0 ); + _mm_storeu_ps( dst + j + 4, v1 ); + } + } +#endif + + for( ; j < roi.width; j++ ) + dst[j] = std::min(src[j], thresh); + } + break; + + case THRESH_TOZERO: + for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) + { + j = 0; +#if CV_SSE2 + if( useSIMD ) + { + __m128 thresh4 = _mm_set1_ps(thresh); + for( ; j <= roi.width - 8; j += 8 ) + { + __m128 v0, v1; + v0 = _mm_loadu_ps( src + j ); + v1 = _mm_loadu_ps( src + j + 4 ); + v0 = _mm_and_ps(v0, _mm_cmpgt_ps(v0, thresh4)); + v1 = _mm_and_ps(v1, _mm_cmpgt_ps(v1, thresh4)); + _mm_storeu_ps( dst + j, v0 ); + _mm_storeu_ps( dst + j + 4, v1 ); + } + } +#endif + + for( ; j < roi.width; j++ ) + { + float v = src[j]; + dst[j] = v > thresh ? v : 0; + } + } + break; + + case THRESH_TOZERO_INV: + for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) + { + j = 0; +#if CV_SSE2 + if( useSIMD ) + { + __m128 thresh4 = _mm_set1_ps(thresh); + for( ; j <= roi.width - 8; j += 8 ) + { + __m128 v0, v1; + v0 = _mm_loadu_ps( src + j ); + v1 = _mm_loadu_ps( src + j + 4 ); + v0 = _mm_and_ps(v0, _mm_cmple_ps(v0, thresh4)); + v1 = _mm_and_ps(v1, _mm_cmple_ps(v1, thresh4)); + _mm_storeu_ps( dst + j, v0 ); + _mm_storeu_ps( dst + j + 4, v1 ); + } + } +#endif + for( ; j < roi.width; j++ ) + { + float v = src[j]; + dst[j] = v <= thresh ? v : 0; + } + } + break; + default: + return CV_Error( CV_StsBadArg, "" ); + } +} + static double getThreshVal_Otsu_8u( const Mat& _src ) @@ -530,7 +697,7 @@ double cv::threshold( InputArray _src, OutputArray _dst, double thresh, double m { int v = type == THRESH_BINARY ? (ithresh >= 255 ? 0 : imaxval) : type == THRESH_BINARY_INV ? (ithresh >= 255 ? imaxval : 0) : - type == THRESH_TRUNC ? imaxval : 0; + /*type == THRESH_TRUNC ? imaxval :*/ 0; dst.setTo(v); } else @@ -539,6 +706,32 @@ double cv::threshold( InputArray _src, OutputArray _dst, double thresh, double m else thresh_8u( src, dst, (uchar)ithresh, (uchar)imaxval, type ); } + else if( src.depth() == CV_16S ) + { + int ithresh = cvFloor(thresh); + thresh = ithresh; + int imaxval = cvRound(maxval); + if( type == THRESH_TRUNC ) + imaxval = ithresh; + imaxval = saturate_cast(imaxval); + + if( ithresh < SHRT_MIN || ithresh >= SHRT_MAX ) + { + if( type == THRESH_BINARY || type == THRESH_BINARY_INV || + ((type == THRESH_TRUNC || type == THRESH_TOZERO_INV) && ithresh < SHRT_MIN) || + (type == THRESH_TOZERO && ithresh >= SHRT_MAX) ) + { + int v = type == THRESH_BINARY ? (ithresh >= SHRT_MAX ? 0 : imaxval) : + type == THRESH_BINARY_INV ? (ithresh >= SHRT_MAX ? imaxval : 0) : + /*type == THRESH_TRUNC ? imaxval :*/ 0; + dst.setTo(v); + } + else + src.copyTo(dst); + } + else + thresh_16s( src, dst, (short)ithresh, (short)imaxval, type ); + } else if( src.depth() == CV_32F ) thresh_32f( src, dst, (float)thresh, (float)maxval, type ); else diff --git a/modules/imgproc/test/test_thresh.cpp b/modules/imgproc/test/test_thresh.cpp index 9ef1bf6ad1..7bac700797 100644 --- a/modules/imgproc/test/test_thresh.cpp +++ b/modules/imgproc/test/test_thresh.cpp @@ -75,9 +75,9 @@ void CV_ThreshTest::get_test_array_types_and_sizes( int test_case_idx, vector >& sizes, vector >& types ) { RNG& rng = ts->get_rng(); - int depth = cvtest::randInt(rng) % 2, cn = cvtest::randInt(rng) % 4 + 1; + int depth = cvtest::randInt(rng) % 3, cn = cvtest::randInt(rng) % 4 + 1; cvtest::ArrayTest::get_test_array_types_and_sizes( test_case_idx, sizes, types ); - depth = depth == 0 ? CV_8U : CV_32F; + depth = depth == 0 ? CV_8U : depth == 1 ? CV_16S : CV_32F; types[INPUT][0] = types[OUTPUT][0] = types[REF_OUTPUT][0] = CV_MAKETYPE(depth,cn); thresh_type = cvtest::randInt(rng) % 5; @@ -87,7 +87,15 @@ void CV_ThreshTest::get_test_array_types_and_sizes( int test_case_idx, thresh_val = (float)(cvtest::randReal(rng)*350. - 50.); max_val = (float)(cvtest::randReal(rng)*350. - 50.); if( cvtest::randInt(rng)%4 == 0 ) - max_val = 255; + max_val = 255.f; + } + else if( depth == CV_16S ) + { + float min_val = SHRT_MIN-100.f, max_val = SHRT_MAX+100.f; + thresh_val = (float)(cvtest::randReal(rng)*(max_val - min_val) + min_val); + max_val = (float)(cvtest::randReal(rng)*(max_val - min_val) + min_val); + if( cvtest::randInt(rng)%4 == 0 ) + max_val = (float)SHRT_MAX; } else { @@ -117,88 +125,177 @@ static void test_threshold( const Mat& _src, Mat& _dst, int depth = _src.depth(), cn = _src.channels(); int width_n = _src.cols*cn, height = _src.rows; int ithresh = cvFloor(thresh), ithresh2, imaxval = cvRound(maxval); - const uchar* src = _src.data; - uchar* dst = _dst.data; - size_t srcstep = _src.step, dststep = _dst.step; - ithresh2 = saturate_cast(ithresh); - imaxval = saturate_cast(imaxval); + if( depth == CV_8U ) + { + ithresh2 = saturate_cast(ithresh); + imaxval = saturate_cast(imaxval); + } + else if( depth == CV_16S ) + { + ithresh2 = saturate_cast(ithresh); + imaxval = saturate_cast(imaxval); + } - assert( depth == CV_8U || depth == CV_32F ); + assert( depth == CV_8U || depth == CV_16S || depth == CV_32F ); switch( thresh_type ) { case CV_THRESH_BINARY: - for( i = 0; i < height; i++, src += srcstep, dst += dststep ) + for( i = 0; i < height; i++ ) { if( depth == CV_8U ) + { + const uchar* src = _src.ptr(i); + uchar* dst = _dst.ptr(i); for( j = 0; j < width_n; j++ ) dst[j] = (uchar)(src[j] > ithresh ? imaxval : 0); - else + } + else if( depth == CV_16S ) + { + const short* src = _src.ptr(i); + short* dst = _dst.ptr(i); for( j = 0; j < width_n; j++ ) - ((float*)dst)[j] = ((const float*)src)[j] > thresh ? maxval : 0.f; + dst[j] = (short)(src[j] > ithresh ? imaxval : 0); + } + else + { + const float* src = _src.ptr(i); + float* dst = _dst.ptr(i); + for( j = 0; j < width_n; j++ ) + dst[j] = src[j] > thresh ? maxval : 0.f; + } } break; case CV_THRESH_BINARY_INV: - for( i = 0; i < height; i++, src += srcstep, dst += dststep ) + for( i = 0; i < height; i++ ) { if( depth == CV_8U ) + { + const uchar* src = _src.ptr(i); + uchar* dst = _dst.ptr(i); for( j = 0; j < width_n; j++ ) dst[j] = (uchar)(src[j] > ithresh ? 0 : imaxval); - else + } + else if( depth == CV_16S ) + { + const short* src = _src.ptr(i); + short* dst = _dst.ptr(i); for( j = 0; j < width_n; j++ ) - ((float*)dst)[j] = ((const float*)src)[j] > thresh ? 0.f : maxval; + dst[j] = (short)(src[j] > ithresh ? 0 : imaxval); + } + else + { + const float* src = _src.ptr(i); + float* dst = _dst.ptr(i); + for( j = 0; j < width_n; j++ ) + dst[j] = src[j] > thresh ? 0.f : maxval; + } } break; case CV_THRESH_TRUNC: - for( i = 0; i < height; i++, src += srcstep, dst += dststep ) + for( i = 0; i < height; i++ ) { if( depth == CV_8U ) + { + const uchar* src = _src.ptr(i); + uchar* dst = _dst.ptr(i); for( j = 0; j < width_n; j++ ) { int s = src[j]; dst[j] = (uchar)(s > ithresh ? ithresh2 : s); } - else + } + else if( depth == CV_16S ) + { + const short* src = _src.ptr(i); + short* dst = _dst.ptr(i); for( j = 0; j < width_n; j++ ) { - float s = ((const float*)src)[j]; - ((float*)dst)[j] = s > thresh ? thresh : s; + int s = src[j]; + dst[j] = (short)(s > ithresh ? ithresh2 : s); } + } + else + { + const float* src = _src.ptr(i); + float* dst = _dst.ptr(i); + for( j = 0; j < width_n; j++ ) + { + float s = src[j]; + dst[j] = s > thresh ? thresh : s; + } + } } break; case CV_THRESH_TOZERO: - for( i = 0; i < height; i++, src += srcstep, dst += dststep ) + for( i = 0; i < height; i++ ) { if( depth == CV_8U ) + { + const uchar* src = _src.ptr(i); + uchar* dst = _dst.ptr(i); for( j = 0; j < width_n; j++ ) { int s = src[j]; dst[j] = (uchar)(s > ithresh ? s : 0); } - else + } + else if( depth == CV_16S ) + { + const short* src = _src.ptr(i); + short* dst = _dst.ptr(i); for( j = 0; j < width_n; j++ ) { - float s = ((const float*)src)[j]; - ((float*)dst)[j] = s > thresh ? s : 0.f; + int s = src[j]; + dst[j] = (short)(s > ithresh ? s : 0); } + } + else + { + const float* src = _src.ptr(i); + float* dst = _dst.ptr(i); + for( j = 0; j < width_n; j++ ) + { + float s = src[j]; + dst[j] = s > thresh ? s : 0.f; + } + } } break; case CV_THRESH_TOZERO_INV: - for( i = 0; i < height; i++, src += srcstep, dst += dststep ) + for( i = 0; i < height; i++ ) { if( depth == CV_8U ) + { + const uchar* src = _src.ptr(i); + uchar* dst = _dst.ptr(i); for( j = 0; j < width_n; j++ ) { int s = src[j]; dst[j] = (uchar)(s > ithresh ? 0 : s); } - else + } + else if( depth == CV_16S ) + { + const short* src = _src.ptr(i); + short* dst = _dst.ptr(i); for( j = 0; j < width_n; j++ ) { - float s = ((const float*)src)[j]; - ((float*)dst)[j] = s > thresh ? 0.f : s; + int s = src[j]; + dst[j] = (short)(s > ithresh ? 0 : s); } + } + else + { + const float* src = _src.ptr(i); + float* dst = _dst.ptr(i); + for( j = 0; j < width_n; j++ ) + { + float s = src[j]; + dst[j] = s > thresh ? 0.f : s; + } + } } break; default: