From 5b0b59ecfb1b670fa3a152a8b2b0048d4b128b31 Mon Sep 17 00:00:00 2001 From: Diego Date: Fri, 13 Dec 2019 16:00:06 +0100 Subject: [PATCH] Merge pull request #15189 from dvd42:keypoints_module Keypoints module --- modules/dnn/include/opencv2/dnn/dnn.hpp | 32 ++++++++++++++ modules/dnn/src/model.cpp | 58 +++++++++++++++++++++++++ modules/dnn/test/test_model.cpp | 51 ++++++++++++++++++++++ 3 files changed, 141 insertions(+) diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index eace61d8c7..89f74faea1 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -1135,6 +1135,38 @@ CV__DNN_INLINE_NS_BEGIN CV_WRAP void classify(InputArray frame, CV_OUT int& classId, CV_OUT float& conf); }; + /** @brief This class represents high-level API for keypoints models + * + * KeypointsModel allows to set params for preprocessing input image. + * KeypointsModel creates net from file with trained weights and config, + * sets preprocessing input, runs forward pass and returns the x and y coordinates of each detected keypoint + */ + class CV_EXPORTS_W KeypointsModel: public Model + { + public: + /** + * @brief Create keypoints model from network represented in one of the supported formats. + * An order of @p model and @p config arguments does not matter. + * @param[in] model Binary file contains trained weights. + * @param[in] config Text file contains network configuration. + */ + CV_WRAP KeypointsModel(const String& model, const String& config = ""); + + /** + * @brief Create model from deep learning network. + * @param[in] network Net object. + */ + CV_WRAP KeypointsModel(const Net& network); + + /** @brief Given the @p input frame, create input blob, run net + * @param[in] frame The input image. + * @param thresh minimum confidence threshold to select a keypoint + * @returns a vector holding the x and y coordinates of each detected keypoint + * + */ + CV_WRAP std::vector estimate(InputArray frame, float thresh=0.5); + }; + /** @brief This class represents high-level API for segmentation models * * SegmentationModel allows to set params for preprocessing input image. diff --git a/modules/dnn/src/model.cpp b/modules/dnn/src/model.cpp index c903bac687..027c7bea79 100644 --- a/modules/dnn/src/model.cpp +++ b/modules/dnn/src/model.cpp @@ -137,6 +137,64 @@ void ClassificationModel::classify(InputArray frame, int& classId, float& conf) std::tie(classId, conf) = classify(frame); } +KeypointsModel::KeypointsModel(const String& model, const String& config) + : Model(model, config) {}; + +KeypointsModel::KeypointsModel(const Net& network) : Model(network) {}; + +std::vector KeypointsModel::estimate(InputArray frame, float thresh) +{ + + int frameHeight = frame.getMat().size[0]; + int frameWidth = frame.getMat().size[1]; + std::vector outs; + + impl->predict(*this, frame.getMat(), outs); + CV_Assert(outs.size() == 1); + Mat output = outs[0]; + + const int nPoints = output.size[1]; + std::vector points; + + // If output is a map, extract the keypoints + if (output.dims == 4) + { + int height = output.size[2]; + int width = output.size[3]; + + // find the position of the keypoints (ignore the background) + for (int n=0; n < nPoints - 1; n++) + { + // Probability map of corresponding keypoint + Mat probMap(height, width, CV_32F, output.ptr(0, n)); + + Point2f p(-1, -1); + Point maxLoc; + double prob; + minMaxLoc(probMap, NULL, &prob, NULL, &maxLoc); + if (prob > thresh) + { + p = maxLoc; + p.x *= (float)frameWidth / width; + p.y *= (float)frameHeight / height; + points.push_back(p); + } + } + } + // Otherwise the output is a vector of keypoints and we can just return it + else + { + for (int n=0; n < nPoints; n++) + { + Point2f p; + p.x = *output.ptr(0, n, 0); + p.y = *output.ptr(0, n, 1); + points.push_back(p); + } + } + return points; +} + SegmentationModel::SegmentationModel(const String& model, const String& config) : Model(model, config) {}; diff --git a/modules/dnn/test/test_model.cpp b/modules/dnn/test/test_model.cpp index d079415e4e..2a1c0b72ab 100644 --- a/modules/dnn/test/test_model.cpp +++ b/modules/dnn/test/test_model.cpp @@ -70,6 +70,25 @@ public: ASSERT_NEAR(prediction.second, ref.second, norm); } + void testKeypointsModel(const std::string& weights, const std::string& cfg, + const Mat& frame, const Mat& exp, float norm, + const Size& size = {-1, -1}, Scalar mean = Scalar(), + double scale = 1.0, bool swapRB = false, bool crop = false) + { + checkBackend(); + + std::vector points; + + KeypointsModel model(weights, cfg); + model.setInputSize(size).setInputMean(mean).setInputScale(scale) + .setInputSwapRB(swapRB).setInputCrop(crop); + + points = model.estimate(frame, 0.5); + + Mat out = Mat(points).reshape(1); + normAssert(exp, out, "", norm, norm); + } + void testSegmentationModel(const std::string& weights_file, const std::string& config_file, const std::string& inImgPath, const std::string& outImgPath, float norm, const Size& size = {-1, -1}, Scalar mean = Scalar(), @@ -221,6 +240,38 @@ TEST_P(Test_Model, DetectionMobilenetSSD) scoreDiff, iouDiff, confThreshold, nmsThreshold, size, mean, scale); } +TEST_P(Test_Model, Keypoints_pose) +{ + Mat inp = imread(_tf("pose.png")); + std::string weights = _tf("lightweight_pose_estimation.onnx"); + Mat exp = blobFromNPY(_tf("keypoints_exp.npy")); + + + Size size{256, 256}; + float norm = 1e-4; + double scale = 1.0/255; + Scalar mean = Scalar(128, 128, 128); + bool swapRB = false; + + testKeypointsModel(weights, "", inp, exp, norm, size, mean, scale, swapRB); +} + +TEST_P(Test_Model, Keypoints_face) +{ + Mat inp = imread(_tf("gray_face.png"), 0); + std::string weights = _tf("facial_keypoints.onnx"); + Mat exp = blobFromNPY(_tf("facial_keypoints_exp.npy")); + + Size size{224, 224}; + float norm = 1e-4; + double scale = 1.0/255; + Scalar mean = Scalar(); + bool swapRB = false; + + testKeypointsModel(weights, "", inp, exp, norm, size, mean, scale, swapRB); + +} + TEST_P(Test_Model, Detection_normalized) { std::string img_path = _tf("grace_hopper_227.png");