Merge pull request #1414 from lluisgomez:scene_text_detection_computeNMChannels

2024-11-25 19:50:38 +08:00 · 2013-09-13 13:31:14 +04:00 · 2013-09-13 13:31:14 +04:00 · c511d5d649
commit c511d5d649
parent 0033d453f2 ad94628038
2 changed files with 137 additions and 0 deletions
--- a/modules/objdetect/include/opencv2/objdetect/erfilter.hpp
+++ b/modules/objdetect/include/opencv2/objdetect/erfilter.hpp
@ -193,5 +193,28 @@ CV_EXPORTS Ptr<ERFilter> createERFilterNM1(const Ptr<ERFilter::Callback>& cb = P
 CV_EXPORTS Ptr<ERFilter> createERFilterNM2(const Ptr<ERFilter::Callback>& cb = Ptr<ERFilter::Callback>(),
                                                  float minProbability = 0.3);

+
+// computeNMChannels operation modes
+enum { ERFILTER_NM_RGBLGrad = 0,
+       ERFILTER_NM_IHSGrad  = 1
+     };
+
+/*!
+    Compute the different channels to be processed independently in the N&M algorithm
+    Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
+
+    In N&M algorithm, the combination of intensity (I), hue (H), saturation (S), and gradient
+    magnitude channels (Grad) are used in order to obtain high localization recall.
+    This implementation also provides an alternative combination of red (R), green (G), blue (B),
+    lightness (L), and gradient magnitude (Grad).
+
+    \param  _src           Source image. Must be RGB CV_8UC3.
+    \param  _channels      Output vector<Mat> where computed channels are stored.
+    \param  _mode          Mode of operation. Currently the only available options are
+                           ERFILTER_NM_RGBLGrad (by default) and ERFILTER_NM_IHSGrad.
+
+*/
+CV_EXPORTS void computeNMChannels(InputArray _src, OutputArrayOfArrays _channels, int _mode = ERFILTER_NM_RGBLGrad);
+
 }
 #endif // _OPENCV_ERFILTER_HPP_
--- a/modules/objdetect/src/erfilter.cpp
+++ b/modules/objdetect/src/erfilter.cpp
@ -1141,4 +1141,118 @@ Ptr<ERFilter> createERFilterNM2(const Ptr<ERFilter::Callback>& cb, float minProb
    filter->setMinProbability(minProbability);
    return (Ptr<ERFilter>)filter;
 }
+
+
+/* ------------------------------------------------------------------------------------*/
+/* -------------------------------- Compute Channels NM -------------------------------*/
+/* ------------------------------------------------------------------------------------*/
+
+
+void  get_gradient_magnitude(Mat& _grey_img, Mat& _gradient_magnitude);
+
+void get_gradient_magnitude(Mat& _grey_img, Mat& _gradient_magnitude)
+{
+    Mat C = Mat_<float>(_grey_img);
+
+    Mat kernel = (Mat_<float>(1,3) << -1,0,1);
+    Mat grad_x;
+    filter2D(C, grad_x, -1, kernel, Point(-1,-1), 0, BORDER_DEFAULT);
+
+    Mat kernel2 = (Mat_<float>(3,1) << -1,0,1);
+    Mat grad_y;
+    filter2D(C, grad_y, -1, kernel2, Point(-1,-1), 0, BORDER_DEFAULT);
+
+    magnitude( grad_x, grad_y, _gradient_magnitude);
+}
+
+
+/*!
+    Compute the diferent channels to be processed independently in the N&M algorithm
+    Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
+
+    In N&M algorithm, the combination of intensity (I), hue (H), saturation (S), and gradient
+    magnitude channels (Grad) are used in order to obatin high localization recall.
+    This implementation also the alternative combination of red (R), grren (G), blue (B),
+    lightness (L), and gradient magnitude (Grad).
+
+    \param  _src           Source image. Must be RGB CV_8UC3.
+    \param  _channels      Output vector<Mat> where computed channels are stored.
+    \param  _mode          Mode of operation. Currently the only available options are
+                           ERFILTER_NM_RGBLGrad and ERFILTER_NM_IHSGrad.
+
+*/
+void computeNMChannels(InputArray _src, OutputArrayOfArrays _channels, int _mode)
+{
+
+    CV_Assert( ( _mode == ERFILTER_NM_RGBLGrad ) || ( _mode == ERFILTER_NM_IHSGrad ) );
+
+    Mat src = _src.getMat();
+    if( src.empty() )
+    {
+        _channels.release();
+        return;
+    }
+
+    // assert RGB image
+    CV_Assert(src.type() == CV_8UC3);
+
+    if (_mode == ERFILTER_NM_IHSGrad)
+    {
+        _channels.create( 4, 1, src.depth());
+
+        Mat hsv;
+        cvtColor(src, hsv, COLOR_RGB2HSV);
+        vector<Mat> channelsHSV;
+        split(hsv, channelsHSV);
+
+        for (int i = 0; i < src.channels(); i++)
+        {
+            _channels.create(src.rows, src.cols, CV_8UC1, i);
+            Mat channel = _channels.getMat(i);
+            channelsHSV.at(i).copyTo(channel);
+        }
+
+        Mat grey;
+        cvtColor(src, grey, COLOR_RGB2GRAY);
+        Mat gradient_magnitude = Mat_<float>(grey.size());
+        get_gradient_magnitude( grey, gradient_magnitude);
+        gradient_magnitude.convertTo(gradient_magnitude, CV_8UC1);
+
+        _channels.create(src.rows, src.cols, CV_8UC1, 3);
+        Mat channelGrad = _channels.getMat(3);
+        gradient_magnitude.copyTo(channelGrad);
+
+    } else if (_mode == ERFILTER_NM_RGBLGrad) {
+
+        _channels.create( 5, 1, src.depth());
+
+        vector<Mat> channelsRGB;
+        split(src, channelsRGB);
+        for (int i = 0; i < src.channels(); i++)
+        {
+            _channels.create(src.rows, src.cols, CV_8UC1, i);
+            Mat channel = _channels.getMat(i);
+            channelsRGB.at(i).copyTo(channel);
+        }
+
+        Mat hls;
+        cvtColor(src, hls, COLOR_RGB2HLS);
+        vector<Mat> channelsHLS;
+        split(hls, channelsHLS);
+
+        _channels.create(src.rows, src.cols, CV_8UC1, 3);
+        Mat channelL = _channels.getMat(3);
+        channelsHLS.at(1).copyTo(channelL);
+
+        Mat grey;
+        cvtColor(src, grey, COLOR_RGB2GRAY);
+        Mat gradient_magnitude = Mat_<float>(grey.size());
+        get_gradient_magnitude( grey, gradient_magnitude);
+        gradient_magnitude.convertTo(gradient_magnitude, CV_8UC1);
+
+        _channels.create(src.rows, src.cols, CV_8UC1, 4);
+        Mat channelGrad = _channels.getMat(4);
+        gradient_magnitude.copyTo(channelGrad);
+    }
+}
 }