From 3dafdd6a7969f11dd4288b84cad7b5fbcc335ab8 Mon Sep 17 00:00:00 2001
From: Vadim Pisarevsky <no@email>
Date: Tue, 22 Nov 2011 15:16:45 +0000
Subject: [PATCH] added 16s support to cv::threshold.

---
 modules/imgproc/src/thresh.cpp       | 303 ++++++++++++++++++++++-----
 modules/imgproc/test/test_thresh.cpp | 151 ++++++++++---
 2 files changed, 372 insertions(+), 82 deletions(-)

diff --git a/modules/imgproc/src/thresh.cpp b/modules/imgproc/src/thresh.cpp
index 4579c8e583..926f9d4770 100644
--- a/modules/imgproc/src/thresh.cpp
+++ b/modules/imgproc/src/thresh.cpp
@@ -272,13 +272,13 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
 
 
 static void
-thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
+thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
 {
     int i, j;
     Size roi = _src.size();
     roi.width *= _src.channels();
-    const float* src = (const float*)_src.data;
-    float* dst = (float*)_dst.data;
+    const short* src = (const short*)_src.data;
+    short* dst = (short*)_dst.data;
     size_t src_step = _src.step/sizeof(src[0]);
     size_t dst_step = _dst.step/sizeof(dst[0]);
     
@@ -301,18 +301,18 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
         #if CV_SSE2
             if( useSIMD )
             {
-                __m128 thresh4 = _mm_set1_ps(thresh), maxval4 = _mm_set1_ps(maxval);
-                for( ; j <= roi.width - 8; j += 8 )
+                __m128i thresh8 = _mm_set1_epi16(thresh), maxval8 = _mm_set1_epi16(maxval);
+                for( ; j <= roi.width - 16; j += 16 )
                 {
-                    __m128 v0, v1;
-                    v0 = _mm_loadu_ps( src + j );
-                    v1 = _mm_loadu_ps( src + j + 4 );
-                    v0 = _mm_cmpgt_ps( v0, thresh4 );
-                    v1 = _mm_cmpgt_ps( v1, thresh4 );
-                    v0 = _mm_and_ps( v0, maxval4 );
-                    v1 = _mm_and_ps( v1, maxval4 );
-                    _mm_storeu_ps( dst + j, v0 );
-                    _mm_storeu_ps( dst + j + 4, v1 );
+                    __m128i v0, v1;
+                    v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
+                    v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) );
+                    v0 = _mm_cmpgt_epi16( v0, thresh8 );
+                    v1 = _mm_cmpgt_epi16( v1, thresh8 );
+                    v0 = _mm_and_si128( v0, maxval8 );
+                    v1 = _mm_and_si128( v1, maxval8 );
+                    _mm_storeu_si128((__m128i*)(dst + j), v0 );
+                    _mm_storeu_si128((__m128i*)(dst + j + 8), v1 );
                 }
             }
         #endif
@@ -329,18 +329,18 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
         #if CV_SSE2
             if( useSIMD )
             {
-                __m128 thresh4 = _mm_set1_ps(thresh), maxval4 = _mm_set1_ps(maxval);
-                for( ; j <= roi.width - 8; j += 8 )
+                __m128i thresh8 = _mm_set1_epi16(thresh), maxval8 = _mm_set1_epi16(maxval);
+                for( ; j <= roi.width - 16; j += 16 )
                 {
-                    __m128 v0, v1;
-                    v0 = _mm_loadu_ps( src + j );
-                    v1 = _mm_loadu_ps( src + j + 4 );
-                    v0 = _mm_cmple_ps( v0, thresh4 );
-                    v1 = _mm_cmple_ps( v1, thresh4 );
-                    v0 = _mm_and_ps( v0, maxval4 );
-                    v1 = _mm_and_ps( v1, maxval4 );
-                    _mm_storeu_ps( dst + j, v0 );
-                    _mm_storeu_ps( dst + j + 4, v1 );
+                    __m128i v0, v1;
+                    v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
+                    v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) );
+                    v0 = _mm_cmpgt_epi16( v0, thresh8 );
+                    v1 = _mm_cmpgt_epi16( v1, thresh8 );
+                    v0 = _mm_andnot_si128( v0, maxval8 );
+                    v1 = _mm_andnot_si128( v1, maxval8 );
+                    _mm_storeu_si128((__m128i*)(dst + j), v0 );
+                    _mm_storeu_si128((__m128i*)(dst + j + 8), v1 );
                 }
             }
         #endif            
@@ -357,16 +357,16 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
         #if CV_SSE2
             if( useSIMD )
             {
-                __m128 thresh4 = _mm_set1_ps(thresh);
-                for( ; j <= roi.width - 8; j += 8 )
+                __m128i thresh8 = _mm_set1_epi16(thresh);
+                for( ; j <= roi.width - 16; j += 16 )
                 {
-                    __m128 v0, v1;
-                    v0 = _mm_loadu_ps( src + j );
-                    v1 = _mm_loadu_ps( src + j + 4 );
-                    v0 = _mm_min_ps( v0, thresh4 );
-                    v1 = _mm_min_ps( v1, thresh4 );
-                    _mm_storeu_ps( dst + j, v0 );
-                    _mm_storeu_ps( dst + j + 4, v1 );
+                    __m128i v0, v1;
+                    v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
+                    v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) );
+                    v0 = _mm_min_epi16( v0, thresh8 );
+                    v1 = _mm_min_epi16( v1, thresh8 );
+                    _mm_storeu_si128((__m128i*)(dst + j), v0 );
+                    _mm_storeu_si128((__m128i*)(dst + j + 8), v1 );
                 }
             }
         #endif            
@@ -383,23 +383,23 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
         #if CV_SSE2
             if( useSIMD )
             {
-                __m128 thresh4 = _mm_set1_ps(thresh);
-                for( ; j <= roi.width - 8; j += 8 )
+                __m128i thresh8 = _mm_set1_epi16(thresh);
+                for( ; j <= roi.width - 16; j += 16 )
                 {
-                    __m128 v0, v1;
-                    v0 = _mm_loadu_ps( src + j );
-                    v1 = _mm_loadu_ps( src + j + 4 );
-                    v0 = _mm_and_ps(v0, _mm_cmpgt_ps(v0, thresh4));
-                    v1 = _mm_and_ps(v1, _mm_cmpgt_ps(v1, thresh4));
-                    _mm_storeu_ps( dst + j, v0 );
-                    _mm_storeu_ps( dst + j + 4, v1 );
+                    __m128i v0, v1;
+                    v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
+                    v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) );
+                    v0 = _mm_and_si128(v0, _mm_cmpgt_epi16(v0, thresh8));
+                    v1 = _mm_and_si128(v1, _mm_cmpgt_epi16(v1, thresh8));
+                    _mm_storeu_si128((__m128i*)(dst + j), v0 );
+                    _mm_storeu_si128((__m128i*)(dst + j + 8), v1 );
                 }
             }
         #endif
             
             for( ; j < roi.width; j++ )
             {
-                float v = src[j];
+                short v = src[j];
                 dst[j] = v > thresh ? v : 0;
             }
         }
@@ -412,22 +412,22 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
         #if CV_SSE2
             if( useSIMD )
             {
-                __m128 thresh4 = _mm_set1_ps(thresh);
-                for( ; j <= roi.width - 8; j += 8 )
+                __m128i thresh8 = _mm_set1_epi16(thresh);
+                for( ; j <= roi.width - 16; j += 16 )
                 {
-                    __m128 v0, v1;
-                    v0 = _mm_loadu_ps( src + j );
-                    v1 = _mm_loadu_ps( src + j + 4 );
-                    v0 = _mm_and_ps(v0, _mm_cmple_ps(v0, thresh4));
-                    v1 = _mm_and_ps(v1, _mm_cmple_ps(v1, thresh4));
-                    _mm_storeu_ps( dst + j, v0 );
-                    _mm_storeu_ps( dst + j + 4, v1 );
+                    __m128i v0, v1;
+                    v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
+                    v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) );
+                    v0 = _mm_andnot_si128(_mm_cmpgt_epi16(v0, thresh8), v0);
+                    v1 = _mm_andnot_si128(_mm_cmpgt_epi16(v1, thresh8), v1);
+                    _mm_storeu_si128((__m128i*)(dst + j), v0 );
+                    _mm_storeu_si128((__m128i*)(dst + j + 8), v1 );
                 }
             }
         #endif
             for( ; j < roi.width; j++ )
             {
-                float v = src[j];
+                short v = src[j];
                 dst[j] = v <= thresh ? v : 0;
             }
         }
@@ -437,6 +437,173 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
     }
 }
 
+    
+static void
+thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
+{
+    int i, j;
+    Size roi = _src.size();
+    roi.width *= _src.channels();
+    const float* src = (const float*)_src.data;
+    float* dst = (float*)_dst.data;
+    size_t src_step = _src.step/sizeof(src[0]);
+    size_t dst_step = _dst.step/sizeof(dst[0]);
+    
+#if CV_SSE2
+    volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE);
+#endif
+    
+    if( _src.isContinuous() && _dst.isContinuous() )
+    {
+        roi.width *= roi.height;
+        roi.height = 1;
+    }
+    
+    switch( type )
+    {
+        case THRESH_BINARY:
+            for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
+            {
+                j = 0;
+#if CV_SSE2
+                if( useSIMD )
+                {
+                    __m128 thresh4 = _mm_set1_ps(thresh), maxval4 = _mm_set1_ps(maxval);
+                    for( ; j <= roi.width - 8; j += 8 )
+                    {
+                        __m128 v0, v1;
+                        v0 = _mm_loadu_ps( src + j );
+                        v1 = _mm_loadu_ps( src + j + 4 );
+                        v0 = _mm_cmpgt_ps( v0, thresh4 );
+                        v1 = _mm_cmpgt_ps( v1, thresh4 );
+                        v0 = _mm_and_ps( v0, maxval4 );
+                        v1 = _mm_and_ps( v1, maxval4 );
+                        _mm_storeu_ps( dst + j, v0 );
+                        _mm_storeu_ps( dst + j + 4, v1 );
+                    }
+                }
+#endif
+                
+                for( ; j < roi.width; j++ )
+                    dst[j] = src[j] > thresh ? maxval : 0;
+            }
+            break;
+            
+        case THRESH_BINARY_INV:
+            for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
+            {
+                j = 0;
+#if CV_SSE2
+                if( useSIMD )
+                {
+                    __m128 thresh4 = _mm_set1_ps(thresh), maxval4 = _mm_set1_ps(maxval);
+                    for( ; j <= roi.width - 8; j += 8 )
+                    {
+                        __m128 v0, v1;
+                        v0 = _mm_loadu_ps( src + j );
+                        v1 = _mm_loadu_ps( src + j + 4 );
+                        v0 = _mm_cmple_ps( v0, thresh4 );
+                        v1 = _mm_cmple_ps( v1, thresh4 );
+                        v0 = _mm_and_ps( v0, maxval4 );
+                        v1 = _mm_and_ps( v1, maxval4 );
+                        _mm_storeu_ps( dst + j, v0 );
+                        _mm_storeu_ps( dst + j + 4, v1 );
+                    }
+                }
+#endif            
+                
+                for( ; j < roi.width; j++ )
+                    dst[j] = src[j] <= thresh ? maxval : 0;
+            }
+            break;
+            
+        case THRESH_TRUNC:
+            for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
+            {
+                j = 0;
+#if CV_SSE2
+                if( useSIMD )
+                {
+                    __m128 thresh4 = _mm_set1_ps(thresh);
+                    for( ; j <= roi.width - 8; j += 8 )
+                    {
+                        __m128 v0, v1;
+                        v0 = _mm_loadu_ps( src + j );
+                        v1 = _mm_loadu_ps( src + j + 4 );
+                        v0 = _mm_min_ps( v0, thresh4 );
+                        v1 = _mm_min_ps( v1, thresh4 );
+                        _mm_storeu_ps( dst + j, v0 );
+                        _mm_storeu_ps( dst + j + 4, v1 );
+                    }
+                }
+#endif            
+                
+                for( ; j < roi.width; j++ )
+                    dst[j] = std::min(src[j], thresh);
+            }
+            break;
+            
+        case THRESH_TOZERO:
+            for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
+            {
+                j = 0;
+#if CV_SSE2
+                if( useSIMD )
+                {
+                    __m128 thresh4 = _mm_set1_ps(thresh);
+                    for( ; j <= roi.width - 8; j += 8 )
+                    {
+                        __m128 v0, v1;
+                        v0 = _mm_loadu_ps( src + j );
+                        v1 = _mm_loadu_ps( src + j + 4 );
+                        v0 = _mm_and_ps(v0, _mm_cmpgt_ps(v0, thresh4));
+                        v1 = _mm_and_ps(v1, _mm_cmpgt_ps(v1, thresh4));
+                        _mm_storeu_ps( dst + j, v0 );
+                        _mm_storeu_ps( dst + j + 4, v1 );
+                    }
+                }
+#endif
+                
+                for( ; j < roi.width; j++ )
+                {
+                    float v = src[j];
+                    dst[j] = v > thresh ? v : 0;
+                }
+            }
+            break;
+            
+        case THRESH_TOZERO_INV:
+            for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
+            {
+                j = 0;
+#if CV_SSE2
+                if( useSIMD )
+                {
+                    __m128 thresh4 = _mm_set1_ps(thresh);
+                    for( ; j <= roi.width - 8; j += 8 )
+                    {
+                        __m128 v0, v1;
+                        v0 = _mm_loadu_ps( src + j );
+                        v1 = _mm_loadu_ps( src + j + 4 );
+                        v0 = _mm_and_ps(v0, _mm_cmple_ps(v0, thresh4));
+                        v1 = _mm_and_ps(v1, _mm_cmple_ps(v1, thresh4));
+                        _mm_storeu_ps( dst + j, v0 );
+                        _mm_storeu_ps( dst + j + 4, v1 );
+                    }
+                }
+#endif
+                for( ; j < roi.width; j++ )
+                {
+                    float v = src[j];
+                    dst[j] = v <= thresh ? v : 0;
+                }
+            }
+            break;
+        default:
+            return CV_Error( CV_StsBadArg, "" );
+    }
+}
+    
 
 static double
 getThreshVal_Otsu_8u( const Mat& _src )
@@ -530,7 +697,7 @@ double cv::threshold( InputArray _src, OutputArray _dst, double thresh, double m
             {
                 int v = type == THRESH_BINARY ? (ithresh >= 255 ? 0 : imaxval) :
                         type == THRESH_BINARY_INV ? (ithresh >= 255 ? imaxval : 0) :
-                        type == THRESH_TRUNC ? imaxval : 0;
+                        /*type == THRESH_TRUNC ? imaxval :*/ 0;
                 dst.setTo(v);
             }
             else
@@ -539,6 +706,32 @@ double cv::threshold( InputArray _src, OutputArray _dst, double thresh, double m
         else
             thresh_8u( src, dst, (uchar)ithresh, (uchar)imaxval, type );
     }
+    else if( src.depth() == CV_16S )
+    {
+        int ithresh = cvFloor(thresh);
+        thresh = ithresh;
+        int imaxval = cvRound(maxval);
+        if( type == THRESH_TRUNC )
+            imaxval = ithresh;
+        imaxval = saturate_cast<short>(imaxval);
+        
+        if( ithresh < SHRT_MIN || ithresh >= SHRT_MAX )
+        {
+            if( type == THRESH_BINARY || type == THRESH_BINARY_INV ||
+               ((type == THRESH_TRUNC || type == THRESH_TOZERO_INV) && ithresh < SHRT_MIN) ||
+               (type == THRESH_TOZERO && ithresh >= SHRT_MAX) )
+            {
+                int v = type == THRESH_BINARY ? (ithresh >= SHRT_MAX ? 0 : imaxval) :
+                type == THRESH_BINARY_INV ? (ithresh >= SHRT_MAX ? imaxval : 0) :
+                /*type == THRESH_TRUNC ? imaxval :*/ 0;
+                dst.setTo(v);
+            }
+            else
+                src.copyTo(dst);
+        }
+        else
+            thresh_16s( src, dst, (short)ithresh, (short)imaxval, type );
+    }
     else if( src.depth() == CV_32F )
         thresh_32f( src, dst, (float)thresh, (float)maxval, type );
     else
diff --git a/modules/imgproc/test/test_thresh.cpp b/modules/imgproc/test/test_thresh.cpp
index 9ef1bf6ad1..7bac700797 100644
--- a/modules/imgproc/test/test_thresh.cpp
+++ b/modules/imgproc/test/test_thresh.cpp
@@ -75,9 +75,9 @@ void CV_ThreshTest::get_test_array_types_and_sizes( int test_case_idx,
                                                 vector<vector<Size> >& sizes, vector<vector<int> >& types )
 {
     RNG& rng = ts->get_rng();
-    int depth = cvtest::randInt(rng) % 2, cn = cvtest::randInt(rng) % 4 + 1;
+    int depth = cvtest::randInt(rng) % 3, cn = cvtest::randInt(rng) % 4 + 1;
     cvtest::ArrayTest::get_test_array_types_and_sizes( test_case_idx, sizes, types );
-    depth = depth == 0 ? CV_8U : CV_32F;
+    depth = depth == 0 ? CV_8U : depth == 1 ? CV_16S : CV_32F;
 
     types[INPUT][0] = types[OUTPUT][0] = types[REF_OUTPUT][0] = CV_MAKETYPE(depth,cn);
     thresh_type = cvtest::randInt(rng) % 5;
@@ -87,7 +87,15 @@ void CV_ThreshTest::get_test_array_types_and_sizes( int test_case_idx,
         thresh_val = (float)(cvtest::randReal(rng)*350. - 50.);
         max_val = (float)(cvtest::randReal(rng)*350. - 50.);
         if( cvtest::randInt(rng)%4 == 0 )
-            max_val = 255;
+            max_val = 255.f;
+    }
+    else if( depth == CV_16S )
+    {
+        float min_val = SHRT_MIN-100.f, max_val = SHRT_MAX+100.f;
+        thresh_val = (float)(cvtest::randReal(rng)*(max_val - min_val) + min_val);
+        max_val = (float)(cvtest::randReal(rng)*(max_val - min_val) + min_val);
+        if( cvtest::randInt(rng)%4 == 0 )
+            max_val = (float)SHRT_MAX;
     }
     else
     {
@@ -117,88 +125,177 @@ static void test_threshold( const Mat& _src, Mat& _dst,
     int depth = _src.depth(), cn = _src.channels();
     int width_n = _src.cols*cn, height = _src.rows;
     int ithresh = cvFloor(thresh), ithresh2, imaxval = cvRound(maxval);
-    const uchar* src = _src.data;
-    uchar* dst = _dst.data;
-    size_t srcstep = _src.step, dststep = _dst.step;
     
-    ithresh2 = saturate_cast<uchar>(ithresh);
-    imaxval = saturate_cast<uchar>(imaxval);
+    if( depth == CV_8U )
+    {
+        ithresh2 = saturate_cast<uchar>(ithresh);
+        imaxval = saturate_cast<uchar>(imaxval);
+    }
+    else if( depth == CV_16S )
+    {
+        ithresh2 = saturate_cast<short>(ithresh);
+        imaxval = saturate_cast<short>(imaxval);
+    }
 
-    assert( depth == CV_8U || depth == CV_32F );
+    assert( depth == CV_8U || depth == CV_16S || depth == CV_32F );
     
     switch( thresh_type )
     {
     case CV_THRESH_BINARY:
-        for( i = 0; i < height; i++, src += srcstep, dst += dststep )
+        for( i = 0; i < height; i++ )
         {
             if( depth == CV_8U )
+            {
+                const uchar* src = _src.ptr<uchar>(i);
+                uchar* dst = _dst.ptr<uchar>(i);
                 for( j = 0; j < width_n; j++ )
                     dst[j] = (uchar)(src[j] > ithresh ? imaxval : 0);
-            else
+            }
+            else if( depth == CV_16S )
+            {
+                const short* src = _src.ptr<short>(i);
+                short* dst = _dst.ptr<short>(i);
                 for( j = 0; j < width_n; j++ )
-                    ((float*)dst)[j] = ((const float*)src)[j] > thresh ? maxval : 0.f;
+                    dst[j] = (short)(src[j] > ithresh ? imaxval : 0);
+            }
+            else
+            {
+                const float* src = _src.ptr<float>(i);
+                float* dst = _dst.ptr<float>(i);
+                for( j = 0; j < width_n; j++ )
+                    dst[j] = src[j] > thresh ? maxval : 0.f;
+            }
         }
         break;
     case CV_THRESH_BINARY_INV:
-        for( i = 0; i < height; i++, src += srcstep, dst += dststep )
+        for( i = 0; i < height; i++ )
         {
             if( depth == CV_8U )
+            {
+                const uchar* src = _src.ptr<uchar>(i);
+                uchar* dst = _dst.ptr<uchar>(i);
                 for( j = 0; j < width_n; j++ )
                     dst[j] = (uchar)(src[j] > ithresh ? 0 : imaxval);
-            else
+            }
+            else if( depth == CV_16S )
+            {
+                const short* src = _src.ptr<short>(i);
+                short* dst = _dst.ptr<short>(i);
                 for( j = 0; j < width_n; j++ )
-                    ((float*)dst)[j] = ((const float*)src)[j] > thresh ? 0.f : maxval;
+                    dst[j] = (short)(src[j] > ithresh ? 0 : imaxval);
+            }
+            else
+            {
+                const float* src = _src.ptr<float>(i);
+                float* dst = _dst.ptr<float>(i);
+                for( j = 0; j < width_n; j++ )
+                    dst[j] = src[j] > thresh ? 0.f : maxval;
+            }
         }
         break;
     case CV_THRESH_TRUNC:
-        for( i = 0; i < height; i++, src += srcstep, dst += dststep )
+        for( i = 0; i < height; i++ )
         {
             if( depth == CV_8U )
+            {
+                const uchar* src = _src.ptr<uchar>(i);
+                uchar* dst = _dst.ptr<uchar>(i);
                 for( j = 0; j < width_n; j++ )
                 {
                     int s = src[j];
                     dst[j] = (uchar)(s > ithresh ? ithresh2 : s);
                 }
-            else
+            }
+            else if( depth == CV_16S )
+            {
+                const short* src = _src.ptr<short>(i);
+                short* dst = _dst.ptr<short>(i);
                 for( j = 0; j < width_n; j++ )
                 {
-                    float s = ((const float*)src)[j];
-                    ((float*)dst)[j] = s > thresh ? thresh : s;
+                    int s = src[j];
+                    dst[j] = (short)(s > ithresh ? ithresh2 : s);
                 }
+            }
+            else
+            {
+                const float* src = _src.ptr<float>(i);
+                float* dst = _dst.ptr<float>(i);
+                for( j = 0; j < width_n; j++ )
+                {
+                    float s = src[j];
+                    dst[j] = s > thresh ? thresh : s;
+                }
+            }
         }
         break;
     case CV_THRESH_TOZERO:
-        for( i = 0; i < height; i++, src += srcstep, dst += dststep )
+        for( i = 0; i < height; i++ )
         {
             if( depth == CV_8U )
+            {
+                const uchar* src = _src.ptr<uchar>(i);
+                uchar* dst = _dst.ptr<uchar>(i);
                 for( j = 0; j < width_n; j++ )
                 {
                     int s = src[j];
                     dst[j] = (uchar)(s > ithresh ? s : 0);
                 }
-            else
+            }
+            else if( depth == CV_16S )
+            {
+                const short* src = _src.ptr<short>(i);
+                short* dst = _dst.ptr<short>(i);
                 for( j = 0; j < width_n; j++ )
                 {
-                    float s = ((const float*)src)[j];
-                    ((float*)dst)[j] = s > thresh ? s : 0.f;
+                    int s = src[j];
+                    dst[j] = (short)(s > ithresh ? s : 0);
                 }
+            }
+            else
+            {
+                const float* src = _src.ptr<float>(i);
+                float* dst = _dst.ptr<float>(i);
+                for( j = 0; j < width_n; j++ )
+                {
+                    float s = src[j];
+                    dst[j] = s > thresh ? s : 0.f;
+                }
+            }
         }
         break;
     case CV_THRESH_TOZERO_INV:
-        for( i = 0; i < height; i++, src += srcstep, dst += dststep )
+        for( i = 0; i < height; i++ )
         {
             if( depth == CV_8U )
+            {
+                const uchar* src = _src.ptr<uchar>(i);
+                uchar* dst = _dst.ptr<uchar>(i);
                 for( j = 0; j < width_n; j++ )
                 {
                     int s = src[j];
                     dst[j] = (uchar)(s > ithresh ? 0 : s);
                 }
-            else
+            }
+            else if( depth == CV_16S )
+            {
+                const short* src = _src.ptr<short>(i);
+                short* dst = _dst.ptr<short>(i);
                 for( j = 0; j < width_n; j++ )
                 {
-                    float s = ((const float*)src)[j];
-                    ((float*)dst)[j] = s > thresh ? 0.f : s;
+                    int s = src[j];
+                    dst[j] = (short)(s > ithresh ? 0 : s);
                 }
+            }
+            else
+            {
+                const float* src = _src.ptr<float>(i);
+                float* dst = _dst.ptr<float>(i);
+                for( j = 0; j < width_n; j++ )
+                {
+                    float s = src[j];
+                    dst[j] = s > thresh ? 0.f : s;
+                }
+            }
         }
         break;
     default: