From 8be93a6de7be78495c20afd59bf8bf851eb137c5 Mon Sep 17 00:00:00 2001 From: zihaomu Date: Sun, 30 Apr 2023 22:03:21 +0800 Subject: [PATCH] add scale factor to DB demo. --- modules/dnn/include/opencv2/dnn/dnn.hpp | 2 +- modules/dnn/src/dnn_common.hpp | 15 +++++++++ modules/dnn/src/model.cpp | 41 +++++++++++++++++++------ modules/dnn/test/test_model.cpp | 30 +++++++++++++----- 4 files changed, 70 insertions(+), 18 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index 829d57271d..3233ab3c66 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -1422,7 +1422,7 @@ CV__DNN_INLINE_NS_BEGIN /** @brief Set scalefactor value for frame. * @param[in] scale Multiplier for frame values. */ - CV_WRAP Model& setInputScale(double scale); + CV_WRAP Model& setInputScale(const Scalar& scale); /** @brief Set flag crop for frame. * @param[in] crop Flag which indicates whether image will be cropped after resize or not. diff --git a/modules/dnn/src/dnn_common.hpp b/modules/dnn/src/dnn_common.hpp index f5c3cce7ca..2561de4a9f 100644 --- a/modules/dnn/src/dnn_common.hpp +++ b/modules/dnn/src/dnn_common.hpp @@ -154,6 +154,21 @@ static inline std::string toString(const Mat& blob, const std::string& name = st return ss.str(); } +// Scalefactor is a common parameter used for data scaling. In OpenCV, we often use Scalar to represent it. +// Because 0 is meaningless in scalefactor. +// If the scalefactor is (x, 0, 0, 0), we convert it to (x, x, x, x). The following func will do this hack. +static inline Scalar_ broadcastRealScalar(const Scalar_& _scale) +{ + Scalar_ scale = _scale; + if (scale[1] == 0 && scale[2] == 0 && scale[3] == 0) + { + CV_Assert(scale[0] != 0 && "Scalefactor of 0 is meaningless."); + scale = Scalar_::all(scale[0]); + } + + return scale; +} + CV__DNN_INLINE_NS_END diff --git a/modules/dnn/src/model.cpp b/modules/dnn/src/model.cpp index 7444011a64..8d1a788956 100644 --- a/modules/dnn/src/model.cpp +++ b/modules/dnn/src/model.cpp @@ -21,7 +21,7 @@ struct Model::Impl Size size; Scalar mean; - double scale = 1.0; + Scalar scale = Scalar::all(1.0); bool swapRB = false; bool crop = false; Mat blob; @@ -60,7 +60,7 @@ public: { size = size_; mean = mean_; - scale = scale_; + scale = Scalar::all(scale_); crop = crop_; swapRB = swapRB_; } @@ -75,7 +75,7 @@ public: mean = mean_; } /*virtual*/ - void setInputScale(double scale_) + void setInputScale(const Scalar& scale_) { scale = scale_; } @@ -97,7 +97,17 @@ public: if (size.empty()) CV_Error(Error::StsBadSize, "Input size not specified"); - blob = blobFromImage(frame, scale, size, mean, swapRB, crop); + Image2BlobParams param; + param.scalefactor = scale; + param.size = size; + param.mean = mean; + param.swapRB = swapRB; + if (crop) + { + param.paddingmode = DNN_PMODE_CROP_CENTER; + } + Mat blob = dnn::blobFromImageWithParams(frame, param); // [1, 10, 10, 4] + net.setInput(blob); // Faster-RCNN or R-FCN @@ -162,9 +172,11 @@ Model& Model::setInputMean(const Scalar& mean) return *this; } -Model& Model::setInputScale(double scale) +Model& Model::setInputScale(const Scalar& scale_) { CV_DbgAssert(impl); + + Scalar scale = broadcastRealScalar(scale_); impl->setInputScale(scale); return *this; } @@ -1358,7 +1370,7 @@ struct TextDetectionModel_DB_Impl : public TextDetectionModel_Impl { CV_TRACE_FUNCTION(); std::vector< std::vector > results; - + confidences.clear(); std::vector outs; processFrame(frame, outs); CV_Assert(outs.size() == 1); @@ -1385,7 +1397,8 @@ struct TextDetectionModel_DB_Impl : public TextDetectionModel_Impl std::vector& contour = contours[i]; // Calculate text contour score - if (contourScore(binary, contour) < polygonThreshold) + float score = contourScore(binary, contour); + if (score < polygonThreshold) continue; // Rescale @@ -1398,6 +1411,11 @@ struct TextDetectionModel_DB_Impl : public TextDetectionModel_Impl // Unclip RotatedRect box = minAreaRect(contourScaled); + float minLen = std::min(box.size.height/scaleWidth, box.size.width/scaleHeight); + + // Filter very small boxes + if (minLen < 3) + continue; // minArea() rect is not normalized, it may return rectangles with angle=-90 or height < width const float angle_threshold = 60; // do not expect vertical text, TODO detection algo property @@ -1422,10 +1440,12 @@ struct TextDetectionModel_DB_Impl : public TextDetectionModel_Impl approx.emplace_back(vertex[j]); std::vector polygon; unclip(approx, polygon, unclipRatio); + if (polygon.empty()) + continue; results.push_back(polygon); + confidences.push_back(score); } - confidences = std::vector(contours.size(), 1.0f); return results; } @@ -1458,7 +1478,10 @@ struct TextDetectionModel_DB_Impl : public TextDetectionModel_Impl { double area = contourArea(inPoly); double length = arcLength(inPoly, true); - CV_Assert(length > FLT_EPSILON); + + if(length == 0.) + return; + double distance = area * unclipRatio / length; size_t numPoints = inPoly.size(); diff --git a/modules/dnn/test/test_model.cpp b/modules/dnn/test/test_model.cpp index d3217a0e49..2d6c4c7ac1 100644 --- a/modules/dnn/test/test_model.cpp +++ b/modules/dnn/test/test_model.cpp @@ -153,8 +153,8 @@ public: const std::string& imgPath, const std::vector>& gt, float binThresh, float polyThresh, uint maxCandidates, double unclipRatio, - const Size& size = {-1, -1}, Scalar mean = Scalar(), - double scale = 1.0, bool swapRB = false, bool crop = false) + const Size& size = {-1, -1}, Scalar mean = Scalar(), Scalar scale = Scalar::all(1.0), + double boxes_iou_diff = 0.05, bool swapRB = false, bool crop = false) { checkBackend(); @@ -197,7 +197,7 @@ public: imshow("result", result); // imwrite("result.png", result); waitKey(0); #endif - normAssertTextDetections(gt, contours, "", 0.05f); + normAssertTextDetections(gt, contours, "", boxes_iou_diff); // 2. Check quadrangle-based API // std::vector< std::vector > contours; @@ -209,7 +209,7 @@ public: imshow("result_contours", result); // imwrite("result_contours.png", result); waitKey(0); #endif - normAssertTextDetections(gt, contours, "", 0.05f); + normAssertTextDetections(gt, contours, "", boxes_iou_diff); } void testTextDetectionModelByEAST( @@ -743,7 +743,8 @@ TEST_P(Test_Model, TextDetectionByDB) applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); std::string imgPath = _tf("text_det_test1.png"); - std::string weightPath = _tf("onnx/models/DB_TD500_resnet50.onnx", false); + std::string weightPathDB = _tf("onnx/models/DB_TD500_resnet50.onnx", false); + std::string weightPathPPDB = _tf("onnx/models/PP_OCRv3_DB_text_det.onnx", false); // GroundTruth std::vector> gt = { @@ -752,15 +753,28 @@ TEST_P(Test_Model, TextDetectionByDB) }; Size size{736, 736}; - double scale = 1.0 / 255.0; - Scalar mean = Scalar(122.67891434, 116.66876762, 104.00698793); + Scalar scaleDB = Scalar::all(1.0 / 255.0); + Scalar meanDB = Scalar(122.67891434, 116.66876762, 104.00698793); + + // new mean and stddev + Scalar meanPPDB = Scalar(123.675, 116.28, 103.53); + Scalar stddevPPDB = Scalar(0.229, 0.224, 0.225); + Scalar scalePPDB = scaleDB / stddevPPDB; float binThresh = 0.3; float polyThresh = 0.5; uint maxCandidates = 200; double unclipRatio = 2.0; - testTextDetectionModelByDB(weightPath, "", imgPath, gt, binThresh, polyThresh, maxCandidates, unclipRatio, size, mean, scale); + { + SCOPED_TRACE("Original DB"); + testTextDetectionModelByDB(weightPathDB, "", imgPath, gt, binThresh, polyThresh, maxCandidates, unclipRatio, size, meanDB, scaleDB, 0.05f); + } + + { + SCOPED_TRACE("PP-OCRDBv3"); + testTextDetectionModelByDB(weightPathPPDB, "", imgPath, gt, binThresh, polyThresh, maxCandidates, unclipRatio, size, meanPPDB, scalePPDB, 0.21f); + } } TEST_P(Test_Model, TextDetectionByEAST)