diff --git a/modules/imgproc/src/morph.cpp b/modules/imgproc/src/morph.cpp index 0c8ca30677..9fa1d598a7 100644 --- a/modules/imgproc/src/morph.cpp +++ b/modules/imgproc/src/morph.cpp @@ -1049,6 +1049,61 @@ cv::Mat cv::getStructuringElement(int shape, Size ksize, Point anchor) namespace cv { +class MorphologyRunner +{ +public: + MorphologyRunner(Mat _src, Mat _dst, int _nStripes, int _iterations, + int _op, Mat _kernel, Point _anchor, + int _rowBorderType, int _columnBorderType, const Scalar& _borderValue) : + borderValue(_borderValue) + { + src = _src; + dst = _dst; + + nStripes = _nStripes; + iterations = _iterations; + + op = _op; + kernel = _kernel; + anchor = _anchor; + rowBorderType = _rowBorderType; + columnBorderType = _columnBorderType; + } + + void operator () ( const BlockedRange& range ) const + { + int row0 = min(cvRound(range.begin() * src.rows / nStripes), src.rows); + int row1 = min(cvRound(range.end() * src.rows / nStripes), src.rows); + + if(0) + printf("Size = (%d, %d), range[%d,%d), row0 = %d, row1 = %d\n", + src.rows, src.cols, range.begin(), range.end(), row0, row1); + + Mat srcStripe = src.rowRange(row0, row1); + Mat dstStripe = dst.rowRange(row0, row1); + + Ptr f = createMorphologyFilter(op, src.type(), kernel, anchor, + rowBorderType, columnBorderType, borderValue ); + + f->apply( srcStripe, dstStripe ); + for( int i = 1; i < iterations; i++ ) + f->apply( dstStripe, dstStripe ); + } + +private: + Mat src; + Mat dst; + int nStripes; + int iterations; + + int op; + Mat kernel; + Point anchor; + int rowBorderType; + int columnBorderType; + const Scalar& borderValue; +}; + static void morphOp( int op, InputArray _src, OutputArray _dst, InputArray _kernel, Point anchor, int iterations, @@ -1085,12 +1140,23 @@ static void morphOp( int op, InputArray _src, OutputArray _dst, iterations = 1; } - Ptr f = createMorphologyFilter(op, src.type(), - kernel, anchor, borderType, borderType, borderValue ); + int nStripes = 1; +#if defined HAVE_TBB && defined HAVE_TEGRA_OPTIMIZATION + if (src.data != dst.data && iterations == 1 && //NOTE: threads are not used for inplace processing + (borderType & BORDER_ISOLATED) == 0 && //TODO: check border types + src.rows >= 64 ) //NOTE: just heuristics + nStripes = 4; +#endif - f->apply( src, dst ); - for( int i = 1; i < iterations; i++ ) - f->apply( dst, dst ); + parallel_for(BlockedRange(0, nStripes), + MorphologyRunner(src, dst, nStripes, iterations, op, kernel, anchor, borderType, borderType, borderValue)); + + //Ptr f = createMorphologyFilter(op, src.type(), + // kernel, anchor, borderType, borderType, borderValue ); + + //f->apply( src, dst ); + //for( int i = 1; i < iterations; i++ ) + // f->apply( dst, dst ); } template<> void Ptr::delete_obj() diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.cpp index bd66fb23bd..905caae828 100644 --- a/modules/imgproc/src/smooth.cpp +++ b/modules/imgproc/src/smooth.cpp @@ -1244,8 +1244,6 @@ void cv::medianBlur( InputArray _src0, OutputArray _dst, int ksize ) return; #endif - Size size = src0.size(); - int cn = src0.channels(); bool useSortNet = ksize == 3 || (ksize == 5 #if !CV_SSE2 && src0.depth() > CV_8U @@ -1259,12 +1257,7 @@ void cv::medianBlur( InputArray _src0, OutputArray _dst, int ksize ) src = src0; else src0.copyTo(src); - } - else - cv::copyMakeBorder( src0, src, 0, 0, ksize/2, ksize/2, BORDER_REPLICATE ); - if( useSortNet ) - { if( src.depth() == CV_8U ) medianBlur_SortNet( src, dst, ksize ); else if( src.depth() == CV_16U ) @@ -1275,16 +1268,22 @@ void cv::medianBlur( InputArray _src0, OutputArray _dst, int ksize ) medianBlur_SortNet( src, dst, ksize ); else CV_Error(CV_StsUnsupportedFormat, ""); + return; } - - CV_Assert( src.depth() == CV_8U && (cn == 1 || cn == 3 || cn == 4) ); - - double img_size_mp = (double)(size.width*size.height)/(1 << 20); - if( ksize <= 3 + (img_size_mp < 1 ? 12 : img_size_mp < 4 ? 6 : 2)*(MEDIAN_HAVE_SIMD && checkHardwareSupport(CV_CPU_SSE2) ? 1 : 3)) - medianBlur_8u_Om( src, dst, ksize ); else - medianBlur_8u_O1( src, dst, ksize ); + { + cv::copyMakeBorder( src0, src, 0, 0, ksize/2, ksize/2, BORDER_REPLICATE ); + + int cn = src0.channels(); + CV_Assert( src.depth() == CV_8U && (cn == 1 || cn == 3 || cn == 4) ); + + double img_size_mp = (double)(src0.total())/(1 << 20); + if( ksize <= 3 + (img_size_mp < 1 ? 12 : img_size_mp < 4 ? 6 : 2)*(MEDIAN_HAVE_SIMD && checkHardwareSupport(CV_CPU_SSE2) ? 1 : 3)) + medianBlur_8u_Om( src, dst, ksize ); + else + medianBlur_8u_O1( src, dst, ksize ); + } } /****************************************************************************************\ diff --git a/modules/imgproc/src/thresh.cpp b/modules/imgproc/src/thresh.cpp index 926f9d4770..14c5b16e95 100644 --- a/modules/imgproc/src/thresh.cpp +++ b/modules/imgproc/src/thresh.cpp @@ -663,6 +663,57 @@ getThreshVal_Otsu_8u( const Mat& _src ) return max_val; } +class ThresholdRunner +{ +public: + ThresholdRunner(Mat _src, Mat _dst, int _nStripes, double _thresh, double _maxval, int _thresholdType) + { + src = _src; + dst = _dst; + + nStripes = _nStripes; + + thresh = _thresh; + maxval = _maxval; + thresholdType = _thresholdType; + } + + void operator () ( const BlockedRange& range ) const + { + int row0 = std::min(cvRound(range.begin() * src.rows / nStripes), src.rows); + int row1 = std::min(cvRound(range.end() * src.rows / nStripes), src.rows); + + if(0) + printf("Size = (%d, %d), range[%d,%d), row0 = %d, row1 = %d\n", + src.rows, src.cols, range.begin(), range.end(), row0, row1); + + Mat srcStripe = src.rowRange(row0, row1); + Mat dstStripe = dst.rowRange(row0, row1); + + if (srcStripe.depth() == CV_8U) + { + thresh_8u( srcStripe, dstStripe, (uchar)thresh, (uchar)maxval, thresholdType ); + } + else if( srcStripe.depth() == CV_16S ) + { + thresh_16s( srcStripe, dstStripe, (short)thresh, (short)maxval, thresholdType ); + } + else if( srcStripe.depth() == CV_32F ) + { + thresh_32f( srcStripe, dstStripe, (float)thresh, (float)maxval, thresholdType ); + } + } + +private: + Mat src; + Mat dst; + int nStripes; + + double thresh; + double maxval; + int thresholdType; +}; + } double cv::threshold( InputArray _src, OutputArray _dst, double thresh, double maxval, int type ) @@ -679,7 +730,12 @@ double cv::threshold( InputArray _src, OutputArray _dst, double thresh, double m _dst.create( src.size(), src.type() ); Mat dst = _dst.getMat(); - + + int nStripes = 1; +#if defined HAVE_TBB && defined HAVE_TEGRA_OPTIMIZATION + nStripes = 4; +#endif + if( src.depth() == CV_8U ) { int ithresh = cvFloor(thresh); @@ -704,7 +760,11 @@ double cv::threshold( InputArray _src, OutputArray _dst, double thresh, double m src.copyTo(dst); } else - thresh_8u( src, dst, (uchar)ithresh, (uchar)imaxval, type ); + { + //thresh_8u( src, dst, (uchar)ithresh, (uchar)imaxval, type ); + parallel_for(BlockedRange(0, nStripes), + ThresholdRunner(src, dst, nStripes, (uchar)ithresh, (uchar)imaxval, type)); + } } else if( src.depth() == CV_16S ) { @@ -730,10 +790,18 @@ double cv::threshold( InputArray _src, OutputArray _dst, double thresh, double m src.copyTo(dst); } else - thresh_16s( src, dst, (short)ithresh, (short)imaxval, type ); + { + //thresh_16s( src, dst, (short)ithresh, (short)imaxval, type ); + parallel_for(BlockedRange(0, nStripes), + ThresholdRunner(src, dst, nStripes, (short)ithresh, (short)imaxval, type)); + } } else if( src.depth() == CV_32F ) - thresh_32f( src, dst, (float)thresh, (float)maxval, type ); + { + //thresh_32f( src, dst, (float)thresh, (float)maxval, type ); + parallel_for(BlockedRange(0, nStripes), + ThresholdRunner(src, dst, nStripes, (float)thresh, (float)maxval, type)); + } else CV_Error( CV_StsUnsupportedFormat, "" );