From 544908d06c7a9788950b3ee5f2f8eb88fe88cd70 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 30 Jun 2017 18:46:00 +0300 Subject: [PATCH 1/3] dnn: some minor fixes in docs, indentation, unused code --- modules/dnn/include/opencv2/dnn.hpp | 2 +- .../dnn/include/opencv2/dnn/all_layers.hpp | 25 +-- modules/dnn/include/opencv2/dnn/dnn.hpp | 188 +++++++++--------- modules/dnn/src/dnn.cpp | 3 - 4 files changed, 108 insertions(+), 110 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn.hpp b/modules/dnn/include/opencv2/dnn.hpp index 7bad7505a0..690a82ab84 100644 --- a/modules/dnn/include/opencv2/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn.hpp @@ -44,7 +44,7 @@ // This is an umbrealla header to include into you project. // We are free to change headers layout in dnn subfolder, so please include -// this header for future compartibility +// this header for future compatibility /** @defgroup dnn Deep Neural Network module diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 3e1fbae811..4f01227573 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -152,7 +152,19 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN int outputNameToIndex(String outputName); }; - //! Classical recurrent layer + /** @brief Classical recurrent layer + + Accepts two inputs @f$x_t@f$ and @f$h_{t-1}@f$ and compute two outputs @f$o_t@f$ and @f$h_t@f$. + + - input: should contain packed input @f$x_t@f$. + - output: should contain output @f$o_t@f$ (and @f$h_t@f$ if setProduceHiddenOutput() is set to true). + + input[0] should have shape [`T`, `N`, `data_dims`] where `T` and `N` is number of timestamps and number of independent samples of @f$x_t@f$ respectively. + + output[0] will have shape [`T`, `N`, @f$N_o@f$], where @f$N_o@f$ is number of rows in @f$ W_{xo} @f$ matrix. + + If setProduceHiddenOutput() is set to true then @p output[1] will contain a Mat with shape [`T`, `N`, @f$N_h@f$], where @f$N_h@f$ is number of rows in @f$ W_{hh} @f$ matrix. + */ class CV_EXPORTS RNNLayer : public Layer { public: @@ -180,17 +192,6 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN */ virtual void setProduceHiddenOutput(bool produce = false) = 0; - /** Accepts two inputs @f$x_t@f$ and @f$h_{t-1}@f$ and compute two outputs @f$o_t@f$ and @f$h_t@f$. - - @param input should contain packed input @f$x_t@f$. - @param output should contain output @f$o_t@f$ (and @f$h_t@f$ if setProduceHiddenOutput() is set to true). - - @p input[0] should have shape [`T`, `N`, `data_dims`] where `T` and `N` is number of timestamps and number of independent samples of @f$x_t@f$ respectively. - - @p output[0] will have shape [`T`, `N`, @f$N_o@f$], where @f$N_o@f$ is number of rows in @f$ W_{xo} @f$ matrix. - - If setProduceHiddenOutput() is set to true then @p output[1] will contain a Mat with shape [`T`, `N`, @f$N_h@f$], where @f$N_h@f$ is number of rows in @f$ W_{hh} @f$ matrix. - */ }; class CV_EXPORTS BaseConvolutionLayer : public Layer diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index 432bcf8e5d..f4369eef4e 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -371,28 +371,28 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN /** @brief Runs forward pass to compute output of layer with name @p outputName. * @param outputName name for layer which output is needed to get * @return blob for first output of specified layer. - * @details By default runs forward pass for the whole network. - */ + * @details By default runs forward pass for the whole network. + */ CV_WRAP Mat forward(const String& outputName = String()); /** @brief Runs forward pass to compute output of layer with name @p outputName. * @param outputBlobs contains all output blobs for specified layer. * @param outputName name for layer which output is needed to get - * @details If @p outputName is empty, runs forward pass for the whole network. - */ + * @details If @p outputName is empty, runs forward pass for the whole network. + */ CV_WRAP void forward(std::vector& outputBlobs, const String& outputName = String()); /** @brief Runs forward pass to compute outputs of layers listed in @p outBlobNames. * @param outputBlobs contains blobs for first outputs of specified layers. * @param outBlobNames names for layers which outputs are needed to get - */ + */ CV_WRAP void forward(std::vector& outputBlobs, const std::vector& outBlobNames); /** @brief Runs forward pass to compute outputs of layers listed in @p outBlobNames. * @param outputBlobs contains all output blobs for each layer specified in @p outBlobNames. * @param outBlobNames names for layers which outputs are needed to get - */ + */ CV_WRAP void forward(std::vector >& outputBlobs, const std::vector& outBlobNames); @@ -460,103 +460,103 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN */ CV_WRAP std::vector getUnconnectedOutLayers() const; /** @brief Returns input and output shapes for all layers in loaded model; - * preliminary inferencing isn't necessary. - * @param netInputShapes shapes for all input blobs in net input layer. - * @param layersIds output parameter for layer IDs. - * @param inLayersShapes output parameter for input layers shapes; - * order is the same as in layersIds - * @param outLayersShapes output parameter for output layers shapes; - * order is the same as in layersIds - */ - CV_WRAP void getLayersShapes(const std::vector& netInputShapes, - std::vector* layersIds, - std::vector >* inLayersShapes, - std::vector >* outLayersShapes) const; + * preliminary inferencing isn't necessary. + * @param netInputShapes shapes for all input blobs in net input layer. + * @param layersIds output parameter for layer IDs. + * @param inLayersShapes output parameter for input layers shapes; + * order is the same as in layersIds + * @param outLayersShapes output parameter for output layers shapes; + * order is the same as in layersIds + */ + CV_WRAP void getLayersShapes(const std::vector& netInputShapes, + std::vector* layersIds, + std::vector >* inLayersShapes, + std::vector >* outLayersShapes) const; - /** @overload */ - CV_WRAP void getLayersShapes(const MatShape& netInputShape, - std::vector* layersIds, - std::vector >* inLayersShapes, - std::vector >* outLayersShapes) const; + /** @overload */ + CV_WRAP void getLayersShapes(const MatShape& netInputShape, + std::vector* layersIds, + std::vector >* inLayersShapes, + std::vector >* outLayersShapes) const; - /** @brief Returns input and output shapes for layer with specified - * id in loaded model; preliminary inferencing isn't necessary. - * @param netInputShape shape input blob in net input layer. - * @param layerId id for layer. - * @param inLayerShapes output parameter for input layers shapes; - * order is the same as in layersIds - * @param outLayerShapes output parameter for output layers shapes; - * order is the same as in layersIds - */ - CV_WRAP void getLayerShapes(const MatShape& netInputShape, + /** @brief Returns input and output shapes for layer with specified + * id in loaded model; preliminary inferencing isn't necessary. + * @param netInputShape shape input blob in net input layer. + * @param layerId id for layer. + * @param inLayerShapes output parameter for input layers shapes; + * order is the same as in layersIds + * @param outLayerShapes output parameter for output layers shapes; + * order is the same as in layersIds + */ + CV_WRAP void getLayerShapes(const MatShape& netInputShape, + const int layerId, + std::vector* inLayerShapes, + std::vector* outLayerShapes) const; + + /** @overload */ + CV_WRAP void getLayerShapes(const std::vector& netInputShapes, const int layerId, std::vector* inLayerShapes, std::vector* outLayerShapes) const; + /** @brief Computes FLOP for whole loaded model with specified input shapes. + * @param netInputShapes vector of shapes for all net inputs. + * @returns computed FLOP. + */ + CV_WRAP int64 getFLOPS(const std::vector& netInputShapes) const; + /** @overload */ + CV_WRAP int64 getFLOPS(const MatShape& netInputShape) const; + /** @overload */ + CV_WRAP int64 getFLOPS(const int layerId, + const std::vector& netInputShapes) const; + /** @overload */ + CV_WRAP int64 getFLOPS(const int layerId, + const MatShape& netInputShape) const; - /** @overload */ - CV_WRAP void getLayerShapes(const std::vector& netInputShapes, - const int layerId, - std::vector* inLayerShapes, - std::vector* outLayerShapes) const; - /** @brief Computes FLOP for whole loaded model with specified input shapes. - * @param netInputShapes vector of shapes for all net inputs. - * @returns computed FLOP. - */ - CV_WRAP int64 getFLOPS(const std::vector& netInputShapes) const; - /** @overload */ - CV_WRAP int64 getFLOPS(const MatShape& netInputShape) const; - /** @overload */ - CV_WRAP int64 getFLOPS(const int layerId, - const std::vector& netInputShapes) const; - /** @overload */ - CV_WRAP int64 getFLOPS(const int layerId, - const MatShape& netInputShape) const; + /** @brief Returns list of types for layer used in model. + * @param layersTypes output parameter for returning types. + */ + CV_WRAP void getLayerTypes(CV_OUT std::vector& layersTypes) const; - /** @brief Returns list of types for layer used in model. - * @param layersTypes output parameter for returning types. - */ - CV_WRAP void getLayerTypes(CV_OUT std::vector& layersTypes) const; + /** @brief Returns count of layers of specified type. + * @param layerType type. + * @returns count of layers + */ + CV_WRAP int getLayersCount(const String& layerType) const; - /** @brief Returns count of layers of specified type. - * @param layerType type. - * @returns count of layers - */ - CV_WRAP int getLayersCount(const String& layerType) const; + /** @brief Computes bytes number which are requered to store + * all weights and intermediate blobs for model. + * @param netInputShapes vector of shapes for all net inputs. + * @param weights output parameter to store resulting bytes for weights. + * @param blobs output parameter to store resulting bytes for intermediate blobs. + */ + CV_WRAP void getMemoryConsumption(const std::vector& netInputShapes, + CV_OUT size_t& weights, CV_OUT size_t& blobs) const; + /** @overload */ + CV_WRAP void getMemoryConsumption(const MatShape& netInputShape, + CV_OUT size_t& weights, CV_OUT size_t& blobs) const; + /** @overload */ + CV_WRAP void getMemoryConsumption(const int layerId, + const std::vector& netInputShapes, + CV_OUT size_t& weights, CV_OUT size_t& blobs) const; + /** @overload */ + CV_WRAP void getMemoryConsumption(const int layerId, + const MatShape& netInputShape, + CV_OUT size_t& weights, CV_OUT size_t& blobs) const; - /** @brief Computes bytes number which are requered to store - * all weights and intermediate blobs for model. - * @param netInputShapes vector of shapes for all net inputs. - * @param weights output parameter to store resulting bytes for weights. - * @param blobs output parameter to store resulting bytes for intermediate blobs. - */ - CV_WRAP void getMemoryConsumption(const std::vector& netInputShapes, - CV_OUT size_t& weights, CV_OUT size_t& blobs) const; - /** @overload */ - CV_WRAP void getMemoryConsumption(const MatShape& netInputShape, - CV_OUT size_t& weights, CV_OUT size_t& blobs) const; - /** @overload */ - CV_WRAP void getMemoryConsumption(const int layerId, - const std::vector& netInputShapes, - CV_OUT size_t& weights, CV_OUT size_t& blobs) const; - /** @overload */ - CV_WRAP void getMemoryConsumption(const int layerId, - const MatShape& netInputShape, - CV_OUT size_t& weights, CV_OUT size_t& blobs) const; - - /** @brief Computes bytes number which are requered to store - * all weights and intermediate blobs for each layer. - * @param netInputShapes vector of shapes for all net inputs. - * @param layerIds output vector to save layer IDs. - * @param weights output parameter to store resulting bytes for weights. - * @param blobs output parameter to store resulting bytes for intermediate blobs. - */ - CV_WRAP void getMemoryConsumption(const std::vector& netInputShapes, - CV_OUT std::vector& layerIds, CV_OUT std::vector& weights, - CV_OUT std::vector& blobs) const; - /** @overload */ - CV_WRAP void getMemoryConsumption(const MatShape& netInputShape, - CV_OUT std::vector& layerIds, CV_OUT std::vector& weights, - CV_OUT std::vector& blobs) const; + /** @brief Computes bytes number which are requered to store + * all weights and intermediate blobs for each layer. + * @param netInputShapes vector of shapes for all net inputs. + * @param layerIds output vector to save layer IDs. + * @param weights output parameter to store resulting bytes for weights. + * @param blobs output parameter to store resulting bytes for intermediate blobs. + */ + CV_WRAP void getMemoryConsumption(const std::vector& netInputShapes, + CV_OUT std::vector& layerIds, CV_OUT std::vector& weights, + CV_OUT std::vector& blobs) const; + /** @overload */ + CV_WRAP void getMemoryConsumption(const MatShape& netInputShape, + CV_OUT std::vector& layerIds, CV_OUT std::vector& weights, + CV_OUT std::vector& blobs) const; private: struct Impl; diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 200c150c54..a371b18540 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -969,9 +969,6 @@ struct Net::Impl } } - #define CV_RETHROW_ERROR(err, newmsg)\ - cv::error(err.code, newmsg, err.func.c_str(), err.file.c_str(), err.line) - void allocateLayer(int lid, const LayersShapesMap& layersShapes) { CV_TRACE_FUNCTION(); From c3e6de293fba79c6698362bd6e7ee99d9d63f1f4 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 13 Jul 2017 16:34:23 +0300 Subject: [PATCH 2/3] dnn: code cleanup, refactor detection output layer --- .../dnn/src/layers/detection_output_layer.cpp | 713 ++++++++---------- 1 file changed, 319 insertions(+), 394 deletions(-) diff --git a/modules/dnn/src/layers/detection_output_layer.cpp b/modules/dnn/src/layers/detection_output_layer.cpp index fba7835147..6da162423f 100644 --- a/modules/dnn/src/layers/detection_output_layer.cpp +++ b/modules/dnn/src/layers/detection_output_layer.cpp @@ -55,29 +55,13 @@ namespace util { template -std::string to_string(T value) -{ - std::ostringstream stream; - stream << value; - return stream.str(); -} - -template -void make_error(const std::string& message1, const T& message2) -{ - std::string error(message1); - error += std::string(util::to_string(message2)); - CV_Error(Error::StsBadArg, error.c_str()); -} - -template -bool SortScorePairDescend(const std::pair& pair1, +static inline bool SortScorePairDescend(const std::pair& pair1, const std::pair& pair2) { return pair1.first > pair2.first; } -} +} // namespace class DetectionOutputLayerImpl : public DetectionOutputLayer { @@ -133,7 +117,7 @@ public: message += " layer parameter does not contain "; message += parameterName; message += " parameter."; - CV_Error(Error::StsBadArg, message); + CV_ErrorNoReturn(Error::StsBadArg, message); } else { @@ -209,180 +193,173 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - const float* locationData = inputs[0]->ptr(); - const float* confidenceData = inputs[1]->ptr(); - const float* priorData = inputs[2]->ptr(); + std::vector allDecodedBBoxes; + std::vector > > allConfidenceScores; int num = inputs[0]->size[0]; - int numPriors = inputs[2]->size[2] / 4; - // Retrieve all location predictions. - std::vector allLocationPredictions; - GetLocPredictions(locationData, num, numPriors, _numLocClasses, - _shareLocation, &allLocationPredictions); + // extract predictions from input layers + { + int numPriors = inputs[2]->size[2] / 4; - // Retrieve all confidences. - std::vector > > allConfidenceScores; - GetConfidenceScores(confidenceData, num, numPriors, _numClasses, - &allConfidenceScores); + const float* locationData = inputs[0]->ptr(); + const float* confidenceData = inputs[1]->ptr(); + const float* priorData = inputs[2]->ptr(); - // Retrieve all prior bboxes. It is same within a batch since we assume all - // images in a batch are of same dimension. - std::vector priorBBoxes; - std::vector > priorVariances; - GetPriorBBoxes(priorData, numPriors, &priorBBoxes, &priorVariances); + // Retrieve all location predictions + std::vector allLocationPredictions; + GetLocPredictions(locationData, num, numPriors, _numLocClasses, + _shareLocation, allLocationPredictions); - const bool clip_bbox = false; - // Decode all loc predictions to bboxes. - std::vector allDecodedBBoxes; - DecodeBBoxesAll(allLocationPredictions, priorBBoxes, priorVariances, num, - _shareLocation, _numLocClasses, _backgroundLabelId, - _codeType, _varianceEncodedInTarget, clip_bbox, &allDecodedBBoxes); + // Retrieve all confidences + GetConfidenceScores(confidenceData, num, numPriors, _numClasses, allConfidenceScores); - int numKept = 0; + // Retrieve all prior bboxes + std::vector priorBBoxes; + std::vector > priorVariances; + GetPriorBBoxes(priorData, numPriors, priorBBoxes, priorVariances); + + // Decode all loc predictions to bboxes + DecodeBBoxesAll(allLocationPredictions, priorBBoxes, priorVariances, num, + _shareLocation, _numLocClasses, _backgroundLabelId, + _codeType, _varianceEncodedInTarget, false, allDecodedBBoxes); + } + + size_t numKept = 0; std::vector > > allIndices; for (int i = 0; i < num; ++i) { - const LabelBBox& decodeBBoxes = allDecodedBBoxes[i]; - const std::vector >& confidenceScores = - allConfidenceScores[i]; - std::map > indices; - int numDetections = 0; - for (int c = 0; c < (int)_numClasses; ++c) - { - if (c == _backgroundLabelId) - { - // Ignore background class. - continue; - } - if (confidenceScores.size() <= c) - { - // Something bad happened if there are no predictions for current label. - util::make_error("Could not find confidence predictions for label ", c); - } - - const std::vector& scores = confidenceScores[c]; - int label = _shareLocation ? -1 : c; - if (decodeBBoxes.find(label) == decodeBBoxes.end()) - { - // Something bad happened if there are no predictions for current label. - util::make_error("Could not find location predictions for label ", label); - continue; - } - const std::vector& bboxes = - decodeBBoxes.find(label)->second; - ApplyNMSFast(bboxes, scores, _confidenceThreshold, _nmsThreshold, 1.0, - _topK, &(indices[c])); - numDetections += indices[c].size(); - } - if (_keepTopK > -1 && numDetections > _keepTopK) - { - std::vector > > scoreIndexPairs; - for (std::map >::iterator it = indices.begin(); - it != indices.end(); ++it) - { - int label = it->first; - const std::vector& labelIndices = it->second; - if (confidenceScores.size() <= label) - { - // Something bad happened for current label. - util::make_error("Could not find location predictions for label ", label); - continue; - } - const std::vector& scores = confidenceScores[label]; - for (size_t j = 0; j < labelIndices.size(); ++j) - { - size_t idx = labelIndices[j]; - CV_Assert(idx < scores.size()); - scoreIndexPairs.push_back( - std::make_pair(scores[idx], std::make_pair(label, idx))); - } - } - // Keep outputs k results per image. - std::sort(scoreIndexPairs.begin(), scoreIndexPairs.end(), - util::SortScorePairDescend >); - scoreIndexPairs.resize(_keepTopK); - // Store the new indices. - std::map > newIndices; - for (size_t j = 0; j < scoreIndexPairs.size(); ++j) - { - int label = scoreIndexPairs[j].second.first; - int idx = scoreIndexPairs[j].second.second; - newIndices[label].push_back(idx); - } - allIndices.push_back(newIndices); - numKept += _keepTopK; - } - else - { - allIndices.push_back(indices); - numKept += numDetections; - } + numKept += processDetections_(allDecodedBBoxes[i], allConfidenceScores[i], allIndices); } if (numKept == 0) { CV_ErrorNoReturn(Error::StsError, "Couldn't find any detections"); - return; } - int outputShape[] = {1, 1, numKept, 7}; + int outputShape[] = {1, 1, (int)numKept, 7}; outputs[0].create(4, outputShape, CV_32F); float* outputsData = outputs[0].ptr(); - int count = 0; + size_t count = 0; for (int i = 0; i < num; ++i) { - const std::vector >& confidenceScores = - allConfidenceScores[i]; - const LabelBBox& decodeBBoxes = allDecodedBBoxes[i]; - for (std::map >::iterator it = allIndices[i].begin(); - it != allIndices[i].end(); ++it) + count += outputDetections_(i, &outputsData[count * 7], + allDecodedBBoxes[i], allConfidenceScores[i], + allIndices[i]); + } + CV_Assert(count == numKept); + } + + size_t outputDetections_( + const int i, float* outputsData, + const LabelBBox& decodeBBoxes, const std::vector >& confidenceScores, + const std::map >& indicesMap + ) + { + size_t count = 0; + for (std::map >::const_iterator it = indicesMap.begin(); it != indicesMap.end(); ++it) + { + int label = it->first; + if (confidenceScores.size() <= label) + CV_ErrorNoReturn_(cv::Error::StsError, ("Could not find confidence predictions for label %d", label)); + const std::vector& scores = confidenceScores[label]; + int locLabel = _shareLocation ? -1 : label; + LabelBBox::const_iterator label_bboxes = decodeBBoxes.find(locLabel); + if (label_bboxes == decodeBBoxes.end()) + CV_ErrorNoReturn_(cv::Error::StsError, ("Could not find location predictions for label %d", locLabel)); + const std::vector& indices = it->second; + + for (size_t j = 0; j < indices.size(); ++j, ++count) + { + int idx = indices[j]; + const caffe::NormalizedBBox& decode_bbox = label_bboxes->second[idx]; + outputsData[count * 7] = i; + outputsData[count * 7 + 1] = label; + outputsData[count * 7 + 2] = scores[idx]; + outputsData[count * 7 + 3] = decode_bbox.xmin(); + outputsData[count * 7 + 4] = decode_bbox.ymin(); + outputsData[count * 7 + 5] = decode_bbox.xmax(); + outputsData[count * 7 + 6] = decode_bbox.ymax(); + } + } + return count; + } + + size_t processDetections_( + const LabelBBox& decodeBBoxes, const std::vector >& confidenceScores, + std::vector > >& allIndices + ) + { + std::map > indices; + size_t numDetections = 0; + for (int c = 0; c < (int)_numClasses; ++c) + { + if (c == _backgroundLabelId) + continue; // Ignore background class. + if (c >= confidenceScores.size()) + CV_ErrorNoReturn_(cv::Error::StsError, ("Could not find confidence predictions for label %d", c)); + + const std::vector& scores = confidenceScores[c]; + int label = _shareLocation ? -1 : c; + + LabelBBox::const_iterator label_bboxes = decodeBBoxes.find(label); + if (label_bboxes == decodeBBoxes.end()) + CV_ErrorNoReturn_(cv::Error::StsError, ("Could not find location predictions for label %d", label)); + ApplyNMSFast(label_bboxes->second, scores, _confidenceThreshold, _nmsThreshold, 1.0, _topK, indices[c]); + numDetections += indices[c].size(); + } + if (_keepTopK > -1 && numDetections > (size_t)_keepTopK) + { + std::vector > > scoreIndexPairs; + for (std::map >::iterator it = indices.begin(); + it != indices.end(); ++it) { int label = it->first; - if (confidenceScores.size() <= label) - { - // Something bad happened if there are no predictions for current label. - util::make_error("Could not find confidence predictions for label ", label); - continue; - } + const std::vector& labelIndices = it->second; + if (label >= confidenceScores.size()) + CV_ErrorNoReturn_(cv::Error::StsError, ("Could not find location predictions for label %d", label)); const std::vector& scores = confidenceScores[label]; - int locLabel = _shareLocation ? -1 : label; - if (decodeBBoxes.find(locLabel) == decodeBBoxes.end()) + for (size_t j = 0; j < labelIndices.size(); ++j) { - // Something bad happened if there are no predictions for current label. - util::make_error("Could not find location predictions for label ", locLabel); - continue; - } - const std::vector& bboxes = - decodeBBoxes.find(locLabel)->second; - std::vector& indices = it->second; - - for (size_t j = 0; j < indices.size(); ++j) - { - int idx = indices[j]; - outputsData[count * 7] = i; - outputsData[count * 7 + 1] = label; - outputsData[count * 7 + 2] = scores[idx]; - caffe::NormalizedBBox clipBBox = bboxes[idx]; - outputsData[count * 7 + 3] = clipBBox.xmin(); - outputsData[count * 7 + 4] = clipBBox.ymin(); - outputsData[count * 7 + 5] = clipBBox.xmax(); - outputsData[count * 7 + 6] = clipBBox.ymax(); - - ++count; + size_t idx = labelIndices[j]; + CV_Assert(idx < scores.size()); + scoreIndexPairs.push_back(std::make_pair(scores[idx], std::make_pair(label, idx))); } } + // Keep outputs k results per image. + std::sort(scoreIndexPairs.begin(), scoreIndexPairs.end(), + util::SortScorePairDescend >); + scoreIndexPairs.resize(_keepTopK); + + std::map > newIndices; + for (size_t j = 0; j < scoreIndexPairs.size(); ++j) + { + int label = scoreIndexPairs[j].second.first; + int idx = scoreIndexPairs[j].second.second; + newIndices[label].push_back(idx); + } + allIndices.push_back(newIndices); + return (size_t)_keepTopK; + } + else + { + allIndices.push_back(indices); + return numDetections; } } - // Compute bbox size. - float BBoxSize(const caffe::NormalizedBBox& bbox, - const bool normalized=true) + + // ************************************************************** + // Utility functions + // ************************************************************** + + // Compute bbox size + template + static float BBoxSize(const caffe::NormalizedBBox& bbox) { if (bbox.xmax() < bbox.xmin() || bbox.ymax() < bbox.ymin()) { - // If bbox is invalid (e.g. xmax < xmin or ymax < ymin), return 0. - return 0; + return 0; // If bbox is invalid (e.g. xmax < xmin or ymax < ymin), return 0. } else { @@ -407,193 +384,155 @@ public: } } - // Clip the caffe::NormalizedBBox such that the range for each corner is [0, 1]. - void ClipBBox(const caffe::NormalizedBBox& bbox, - caffe::NormalizedBBox* clipBBox) - { - clipBBox->set_xmin(std::max(std::min(bbox.xmin(), 1.f), 0.f)); - clipBBox->set_ymin(std::max(std::min(bbox.ymin(), 1.f), 0.f)); - clipBBox->set_xmax(std::max(std::min(bbox.xmax(), 1.f), 0.f)); - clipBBox->set_ymax(std::max(std::min(bbox.ymax(), 1.f), 0.f)); - clipBBox->clear_size(); - clipBBox->set_size(BBoxSize(*clipBBox)); - clipBBox->set_difficult(bbox.difficult()); - } - // Decode a bbox according to a prior bbox. - void DecodeBBox( + // Decode a bbox according to a prior bbox + template + static void DecodeBBox( const caffe::NormalizedBBox& prior_bbox, const std::vector& prior_variance, - const CodeType code_type, const bool variance_encoded_in_target, + const CodeType code_type, const bool clip_bbox, const caffe::NormalizedBBox& bbox, - caffe::NormalizedBBox* decode_bbox) { - if (code_type == caffe::PriorBoxParameter_CodeType_CORNER) { - if (variance_encoded_in_target) { - // variance is encoded in target, we simply need to add the offset - // predictions. - decode_bbox->set_xmin(prior_bbox.xmin() + bbox.xmin()); - decode_bbox->set_ymin(prior_bbox.ymin() + bbox.ymin()); - decode_bbox->set_xmax(prior_bbox.xmax() + bbox.xmax()); - decode_bbox->set_ymax(prior_bbox.ymax() + bbox.ymax()); - } else { - // variance is encoded in bbox, we need to scale the offset accordingly. - decode_bbox->set_xmin( - prior_bbox.xmin() + prior_variance[0] * bbox.xmin()); - decode_bbox->set_ymin( - prior_bbox.ymin() + prior_variance[1] * bbox.ymin()); - decode_bbox->set_xmax( - prior_bbox.xmax() + prior_variance[2] * bbox.xmax()); - decode_bbox->set_ymax( - prior_bbox.ymax() + prior_variance[3] * bbox.ymax()); - } - } else if (code_type == caffe::PriorBoxParameter_CodeType_CENTER_SIZE) { - float prior_width = prior_bbox.xmax() - prior_bbox.xmin(); - CV_Assert(prior_width > 0); - float prior_height = prior_bbox.ymax() - prior_bbox.ymin(); - CV_Assert(prior_height > 0); - float prior_center_x = (prior_bbox.xmin() + prior_bbox.xmax()) / 2.; - float prior_center_y = (prior_bbox.ymin() + prior_bbox.ymax()) / 2.; + caffe::NormalizedBBox& decode_bbox) + { + float bbox_xmin = variance_encoded_in_target ? bbox.xmin() : prior_variance[0] * bbox.xmin(); + float bbox_ymin = variance_encoded_in_target ? bbox.ymin() : prior_variance[1] * bbox.ymin(); + float bbox_xmax = variance_encoded_in_target ? bbox.xmax() : prior_variance[2] * bbox.xmax(); + float bbox_ymax = variance_encoded_in_target ? bbox.ymax() : prior_variance[3] * bbox.ymax(); + switch(code_type) + { + case caffe::PriorBoxParameter_CodeType_CORNER: + decode_bbox.set_xmin(prior_bbox.xmin() + bbox_xmin); + decode_bbox.set_ymin(prior_bbox.ymin() + bbox_ymin); + decode_bbox.set_xmax(prior_bbox.xmax() + bbox_xmax); + decode_bbox.set_ymax(prior_bbox.ymax() + bbox_ymax); + break; + case caffe::PriorBoxParameter_CodeType_CENTER_SIZE: + { + float prior_width = prior_bbox.xmax() - prior_bbox.xmin(); + CV_Assert(prior_width > 0); + float prior_height = prior_bbox.ymax() - prior_bbox.ymin(); + CV_Assert(prior_height > 0); + float prior_center_x = (prior_bbox.xmin() + prior_bbox.xmax()) * .5; + float prior_center_y = (prior_bbox.ymin() + prior_bbox.ymax()) * .5; - float decode_bbox_center_x, decode_bbox_center_y; - float decode_bbox_width, decode_bbox_height; - if (variance_encoded_in_target) { - // variance is encoded in target, we simply need to retore the offset - // predictions. - decode_bbox_center_x = bbox.xmin() * prior_width + prior_center_x; - decode_bbox_center_y = bbox.ymin() * prior_height + prior_center_y; - decode_bbox_width = exp(bbox.xmax()) * prior_width; - decode_bbox_height = exp(bbox.ymax()) * prior_height; - } else { - // variance is encoded in bbox, we need to scale the offset accordingly. - decode_bbox_center_x = - prior_variance[0] * bbox.xmin() * prior_width + prior_center_x; - decode_bbox_center_y = - prior_variance[1] * bbox.ymin() * prior_height + prior_center_y; - decode_bbox_width = - exp(prior_variance[2] * bbox.xmax()) * prior_width; - decode_bbox_height = - exp(prior_variance[3] * bbox.ymax()) * prior_height; + float decode_bbox_center_x, decode_bbox_center_y; + float decode_bbox_width, decode_bbox_height; + decode_bbox_center_x = bbox_xmin * prior_width + prior_center_x; + decode_bbox_center_y = bbox_ymin * prior_height + prior_center_y; + decode_bbox_width = exp(bbox_xmax) * prior_width; + decode_bbox_height = exp(bbox_ymax) * prior_height; + decode_bbox.set_xmin(decode_bbox_center_x - decode_bbox_width * .5); + decode_bbox.set_ymin(decode_bbox_center_y - decode_bbox_height * .5); + decode_bbox.set_xmax(decode_bbox_center_x + decode_bbox_width * .5); + decode_bbox.set_ymax(decode_bbox_center_y + decode_bbox_height * .5); + break; + } + default: + CV_ErrorNoReturn(Error::StsBadArg, "Unknown type."); + }; + if (clip_bbox) + { + // Clip the caffe::NormalizedBBox such that the range for each corner is [0, 1] + decode_bbox.set_xmin(std::max(std::min(decode_bbox.xmin(), 1.f), 0.f)); + decode_bbox.set_ymin(std::max(std::min(decode_bbox.ymin(), 1.f), 0.f)); + decode_bbox.set_xmax(std::max(std::min(decode_bbox.xmax(), 1.f), 0.f)); + decode_bbox.set_ymax(std::max(std::min(decode_bbox.ymax(), 1.f), 0.f)); } - - decode_bbox->set_xmin(decode_bbox_center_x - decode_bbox_width / 2.); - decode_bbox->set_ymin(decode_bbox_center_y - decode_bbox_height / 2.); - decode_bbox->set_xmax(decode_bbox_center_x + decode_bbox_width / 2.); - decode_bbox->set_ymax(decode_bbox_center_y + decode_bbox_height / 2.); - } else { - CV_Error(Error::StsBadArg, "Unknown LocLossType."); - } - float bbox_size = BBoxSize(*decode_bbox); - decode_bbox->set_size(bbox_size); - if (clip_bbox) { - ClipBBox(*decode_bbox, decode_bbox); - } + decode_bbox.clear_size(); + decode_bbox.set_size(BBoxSize(decode_bbox)); } - // Decode a set of bboxes according to a set of prior bboxes. - void DecodeBBoxes( + // Decode a set of bboxes according to a set of prior bboxes + static void DecodeBBoxes( const std::vector& prior_bboxes, const std::vector >& prior_variances, const CodeType code_type, const bool variance_encoded_in_target, const bool clip_bbox, const std::vector& bboxes, - std::vector* decode_bboxes) { - CV_Assert(prior_bboxes.size() == prior_variances.size()); - CV_Assert(prior_bboxes.size() == bboxes.size()); - int num_bboxes = prior_bboxes.size(); - if (num_bboxes >= 1) { - CV_Assert(prior_variances[0].size() == 4); - } - decode_bboxes->clear(); - for (int i = 0; i < num_bboxes; ++i) { - caffe::NormalizedBBox decode_bbox; - DecodeBBox(prior_bboxes[i], prior_variances[i], code_type, - variance_encoded_in_target, clip_bbox, bboxes[i], &decode_bbox); - decode_bboxes->push_back(decode_bbox); - } + std::vector& decode_bboxes) + { + CV_Assert(prior_bboxes.size() == prior_variances.size()); + CV_Assert(prior_bboxes.size() == bboxes.size()); + size_t num_bboxes = prior_bboxes.size(); + CV_Assert(num_bboxes == 0 || prior_variances[0].size() == 4); + decode_bboxes.clear(); decode_bboxes.resize(num_bboxes); + if(variance_encoded_in_target) + { + for (int i = 0; i < num_bboxes; ++i) + DecodeBBox(prior_bboxes[i], prior_variances[i], code_type, + clip_bbox, bboxes[i], decode_bboxes[i]); + } + else + { + for (int i = 0; i < num_bboxes; ++i) + DecodeBBox(prior_bboxes[i], prior_variances[i], code_type, + clip_bbox, bboxes[i], decode_bboxes[i]); + } } - // Decode all bboxes in a batch. - void DecodeBBoxesAll(const std::vector& all_loc_preds, + // Decode all bboxes in a batch + static void DecodeBBoxesAll(const std::vector& all_loc_preds, const std::vector& prior_bboxes, const std::vector >& prior_variances, const int num, const bool share_location, const int num_loc_classes, const int background_label_id, const CodeType code_type, const bool variance_encoded_in_target, - const bool clip, std::vector* all_decode_bboxes) { - CV_Assert(all_loc_preds.size() == num); - all_decode_bboxes->clear(); - all_decode_bboxes->resize(num); - for (int i = 0; i < num; ++i) { - // Decode predictions into bboxes. - LabelBBox& decode_bboxes = (*all_decode_bboxes)[i]; - for (int c = 0; c < num_loc_classes; ++c) { - int label = share_location ? -1 : c; - if (label == background_label_id) { - // Ignore background class. - continue; - } - if (all_loc_preds[i].find(label) == all_loc_preds[i].end()) { - // Something bad happened if there are no predictions for current label. - util::make_error("Could not find location predictions for label ", label); - } - const std::vector& label_loc_preds = - all_loc_preds[i].find(label)->second; - DecodeBBoxes(prior_bboxes, prior_variances, - code_type, variance_encoded_in_target, clip, - label_loc_preds, &(decode_bboxes[label])); + const bool clip, std::vector& all_decode_bboxes) + { + CV_Assert(all_loc_preds.size() == num); + all_decode_bboxes.clear(); + all_decode_bboxes.resize(num); + for (int i = 0; i < num; ++i) + { + // Decode predictions into bboxes. + const LabelBBox& loc_preds = all_loc_preds[i]; + LabelBBox& decode_bboxes = all_decode_bboxes[i]; + for (int c = 0; c < num_loc_classes; ++c) + { + int label = share_location ? -1 : c; + if (label == background_label_id) + continue; // Ignore background class. + LabelBBox::const_iterator label_loc_preds = loc_preds.find(label); + if (label_loc_preds == loc_preds.end()) + CV_ErrorNoReturn_(cv::Error::StsError, ("Could not find location predictions for label %d", label)); + DecodeBBoxes(prior_bboxes, prior_variances, + code_type, variance_encoded_in_target, clip, + label_loc_preds->second, decode_bboxes[label]); + } } - } } - // Get prior bounding boxes from prior_data. + // Get prior bounding boxes from prior_data // prior_data: 1 x 2 x num_priors * 4 x 1 blob. // num_priors: number of priors. // prior_bboxes: stores all the prior bboxes in the format of caffe::NormalizedBBox. // prior_variances: stores all the variances needed by prior bboxes. - void GetPriorBBoxes(const float* priorData, const int& numPriors, - std::vector* priorBBoxes, - std::vector >* priorVariances) + static void GetPriorBBoxes(const float* priorData, const int& numPriors, + std::vector& priorBBoxes, + std::vector >& priorVariances) { - priorBBoxes->clear(); - priorVariances->clear(); + priorBBoxes.clear(); priorBBoxes.resize(numPriors); + priorVariances.clear(); priorVariances.resize(numPriors); for (int i = 0; i < numPriors; ++i) { int startIdx = i * 4; - caffe::NormalizedBBox bbox; + caffe::NormalizedBBox& bbox = priorBBoxes[i]; bbox.set_xmin(priorData[startIdx]); bbox.set_ymin(priorData[startIdx + 1]); bbox.set_xmax(priorData[startIdx + 2]); bbox.set_ymax(priorData[startIdx + 3]); - float bboxSize = BBoxSize(bbox); - bbox.set_size(bboxSize); - priorBBoxes->push_back(bbox); + bbox.set_size(BBoxSize(bbox)); } for (int i = 0; i < numPriors; ++i) { int startIdx = (numPriors + i) * 4; - std::vector var; + // not needed here: priorVariances[i].clear(); for (int j = 0; j < 4; ++j) { - var.push_back(priorData[startIdx + j]); + priorVariances[i].push_back(priorData[startIdx + j]); } - priorVariances->push_back(var); } } - // Scale the caffe::NormalizedBBox w.r.t. height and width. - void ScaleBBox(const caffe::NormalizedBBox& bbox, - const int height, const int width, - caffe::NormalizedBBox* scaleBBox) - { - scaleBBox->set_xmin(bbox.xmin() * width); - scaleBBox->set_ymin(bbox.ymin() * height); - scaleBBox->set_xmax(bbox.xmax() * width); - scaleBBox->set_ymax(bbox.ymax() * height); - scaleBBox->clear_size(); - bool normalized = !(width > 1 || height > 1); - scaleBBox->set_size(BBoxSize(*scaleBBox, normalized)); - scaleBBox->set_difficult(bbox.difficult()); - } - // Get location predictions from loc_data. // loc_data: num x num_preds_per_class * num_loc_classes * 4 blob. // num: the number of images. @@ -603,19 +542,19 @@ public: // share_location: if true, all classes share the same location prediction. // loc_preds: stores the location prediction, where each item contains // location prediction for an image. - void GetLocPredictions(const float* locData, const int num, + static void GetLocPredictions(const float* locData, const int num, const int numPredsPerClass, const int numLocClasses, - const bool shareLocation, std::vector* locPreds) + const bool shareLocation, std::vector& locPreds) { - locPreds->clear(); + locPreds.clear(); if (shareLocation) { CV_Assert(numLocClasses == 1); } - locPreds->resize(num); - for (int i = 0; i < num; ++i) + locPreds.resize(num); + for (int i = 0; i < num; ++i, locData += numPredsPerClass * numLocClasses * 4) { - LabelBBox& labelBBox = (*locPreds)[i]; + LabelBBox& labelBBox = locPreds[i]; for (int p = 0; p < numPredsPerClass; ++p) { int startIdx = p * numLocClasses * 4; @@ -626,13 +565,13 @@ public: { labelBBox[label].resize(numPredsPerClass); } - labelBBox[label][p].set_xmin(locData[startIdx + c * 4]); - labelBBox[label][p].set_ymin(locData[startIdx + c * 4 + 1]); - labelBBox[label][p].set_xmax(locData[startIdx + c * 4 + 2]); - labelBBox[label][p].set_ymax(locData[startIdx + c * 4 + 3]); + caffe::NormalizedBBox& bbox = labelBBox[label][p]; + bbox.set_xmin(locData[startIdx + c * 4]); + bbox.set_ymin(locData[startIdx + c * 4 + 1]); + bbox.set_xmax(locData[startIdx + c * 4 + 2]); + bbox.set_ymax(locData[startIdx + c * 4 + 3]); } } - locData += numPredsPerClass * numLocClasses * 4; } } @@ -643,25 +582,24 @@ public: // num_classes: number of classes. // conf_preds: stores the confidence prediction, where each item contains // confidence prediction for an image. - void GetConfidenceScores(const float* confData, const int num, + static void GetConfidenceScores(const float* confData, const int num, const int numPredsPerClass, const int numClasses, - std::vector > >* confPreds) + std::vector > >& confPreds) { - confPreds->clear(); - confPreds->resize(num); - for (int i = 0; i < num; ++i) + confPreds.clear(); confPreds.resize(num); + for (int i = 0; i < num; ++i, confData += numPredsPerClass * numClasses) { - std::vector >& labelScores = (*confPreds)[i]; + std::vector >& labelScores = confPreds[i]; labelScores.resize(numClasses); - for (int p = 0; p < numPredsPerClass; ++p) + for (int c = 0; c < numClasses; ++c) { - int startIdx = p * numClasses; - for (int c = 0; c < numClasses; ++c) + std::vector& classLabelScores = labelScores[c]; + classLabelScores.resize(numPredsPerClass); + for (int p = 0; p < numPredsPerClass; ++p) { - labelScores[c].push_back(confData[startIdx + c]); + classLabelScores[p] = confData[p * numClasses + c]; } } - confData += numPredsPerClass * numClasses; } } @@ -674,40 +612,35 @@ public: // nms_threshold: a threshold used in non maximum suppression. // top_k: if not -1, keep at most top_k picked indices. // indices: the kept indices of bboxes after nms. - void ApplyNMSFast(const std::vector& bboxes, + static void ApplyNMSFast(const std::vector& bboxes, const std::vector& scores, const float score_threshold, const float nms_threshold, const float eta, const int top_k, - std::vector* indices) { - // Sanity check. - CV_Assert(bboxes.size() == scores.size()); + std::vector& indices) + { + CV_Assert(bboxes.size() == scores.size()); - // Get top_k scores (with corresponding indices). - std::vector > score_index_vec; - GetMaxScoreIndex(scores, score_threshold, top_k, &score_index_vec); + // Get top_k scores (with corresponding indices). + std::vector > score_index_vec; + GetMaxScoreIndex(scores, score_threshold, top_k, score_index_vec); - // Do nms. - float adaptive_threshold = nms_threshold; - indices->clear(); - while (score_index_vec.size() != 0) { - const int idx = score_index_vec.front().second; - bool keep = true; - for (int k = 0; k < indices->size(); ++k) { - if (keep) { - const int kept_idx = (*indices)[k]; - float overlap = JaccardOverlap(bboxes[idx], bboxes[kept_idx]); - keep = overlap <= adaptive_threshold; - } else { - break; - } + // Do nms. + float adaptive_threshold = nms_threshold; + indices.clear(); + while (score_index_vec.size() != 0) { + const int idx = score_index_vec.front().second; + bool keep = true; + for (int k = 0; k < (int)indices.size() && keep; ++k) { + const int kept_idx = indices[k]; + float overlap = JaccardOverlap(bboxes[idx], bboxes[kept_idx]); + keep = overlap <= adaptive_threshold; + } + if (keep) + indices.push_back(idx); + score_index_vec.erase(score_index_vec.begin()); + if (keep && eta < 1 && adaptive_threshold > 0.5) { + adaptive_threshold *= eta; + } } - if (keep) { - indices->push_back(idx); - } - score_index_vec.erase(score_index_vec.begin()); - if (keep && eta < 1 && adaptive_threshold > 0.5) { - adaptive_threshold *= eta; - } - } } // Get max scores with corresponding indices. @@ -715,74 +648,66 @@ public: // threshold: only consider scores higher than the threshold. // top_k: if -1, keep all; otherwise, keep at most top_k. // score_index_vec: store the sorted (score, index) pair. - void GetMaxScoreIndex(const std::vector& scores, const float threshold,const int top_k, - std::vector >* score_index_vec) + static void GetMaxScoreIndex(const std::vector& scores, const float threshold, const int top_k, + std::vector >& score_index_vec) { + CV_DbgAssert(score_index_vec.empty()); // Generate index score pairs. for (size_t i = 0; i < scores.size(); ++i) { if (scores[i] > threshold) { - score_index_vec->push_back(std::make_pair(scores[i], i)); + score_index_vec.push_back(std::make_pair(scores[i], i)); } } // Sort the score pair according to the scores in descending order - std::stable_sort(score_index_vec->begin(), score_index_vec->end(), + std::stable_sort(score_index_vec.begin(), score_index_vec.end(), util::SortScorePairDescend); // Keep top_k scores if needed. - if (top_k > -1 && top_k < (int)score_index_vec->size()) + if (top_k > -1 && top_k < (int)score_index_vec.size()) { - score_index_vec->resize(top_k); - } - } - - // Compute the intersection between two bboxes. - void IntersectBBox(const caffe::NormalizedBBox& bbox1, - const caffe::NormalizedBBox& bbox2, - caffe::NormalizedBBox* intersect_bbox) { - if (bbox2.xmin() > bbox1.xmax() || bbox2.xmax() < bbox1.xmin() || - bbox2.ymin() > bbox1.ymax() || bbox2.ymax() < bbox1.ymin()) - { - // Return [0, 0, 0, 0] if there is no intersection. - intersect_bbox->set_xmin(0); - intersect_bbox->set_ymin(0); - intersect_bbox->set_xmax(0); - intersect_bbox->set_ymax(0); - } - else - { - intersect_bbox->set_xmin(std::max(bbox1.xmin(), bbox2.xmin())); - intersect_bbox->set_ymin(std::max(bbox1.ymin(), bbox2.ymin())); - intersect_bbox->set_xmax(std::min(bbox1.xmax(), bbox2.xmax())); - intersect_bbox->set_ymax(std::min(bbox1.ymax(), bbox2.ymax())); + score_index_vec.resize(top_k); } } // Compute the jaccard (intersection over union IoU) overlap between two bboxes. - float JaccardOverlap(const caffe::NormalizedBBox& bbox1, - const caffe::NormalizedBBox& bbox2, - const bool normalized=true) + template + static float JaccardOverlap(const caffe::NormalizedBBox& bbox1, + const caffe::NormalizedBBox& bbox2) { caffe::NormalizedBBox intersect_bbox; - IntersectBBox(bbox1, bbox2, &intersect_bbox); - float intersect_width, intersect_height; - if (normalized) + if (bbox2.xmin() > bbox1.xmax() || bbox2.xmax() < bbox1.xmin() || + bbox2.ymin() > bbox1.ymax() || bbox2.ymax() < bbox1.ymin()) { - intersect_width = intersect_bbox.xmax() - intersect_bbox.xmin(); - intersect_height = intersect_bbox.ymax() - intersect_bbox.ymin(); + // Return [0, 0, 0, 0] if there is no intersection. + intersect_bbox.set_xmin(0); + intersect_bbox.set_ymin(0); + intersect_bbox.set_xmax(0); + intersect_bbox.set_ymax(0); } else { - intersect_width = intersect_bbox.xmax() - intersect_bbox.xmin() + 1; - intersect_height = intersect_bbox.ymax() - intersect_bbox.ymin() + 1; + intersect_bbox.set_xmin(std::max(bbox1.xmin(), bbox2.xmin())); + intersect_bbox.set_ymin(std::max(bbox1.ymin(), bbox2.ymin())); + intersect_bbox.set_xmax(std::min(bbox1.xmax(), bbox2.xmax())); + intersect_bbox.set_ymax(std::min(bbox1.ymax(), bbox2.ymax())); } + + float intersect_width, intersect_height; + intersect_width = intersect_bbox.xmax() - intersect_bbox.xmin(); + intersect_height = intersect_bbox.ymax() - intersect_bbox.ymin(); if (intersect_width > 0 && intersect_height > 0) { + if (!normalized) + { + intersect_width++; + intersect_height++; + } float intersect_size = intersect_width * intersect_height; - float bbox1_size = BBoxSize(bbox1); - float bbox2_size = BBoxSize(bbox2); + float bbox1_size = BBoxSize(bbox1); + float bbox2_size = BBoxSize(bbox2); return intersect_size / (bbox1_size + bbox2_size - intersect_size); } else From 4784c7be5fc313e173f7fff83b85e62a002768ed Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 13 Jul 2017 18:42:36 +0300 Subject: [PATCH 3/3] dnn: cleanup dispatched code, fix SIMD128 types --- modules/dnn/CMakeLists.txt | 2 + modules/dnn/src/layers/convolution_layer.cpp | 8 +-- .../dnn/src/layers/fully_connected_layer.cpp | 12 ++--- modules/dnn/src/layers/layers_common.avx.cpp | 54 ------------------- modules/dnn/src/layers/layers_common.avx2.cpp | 51 ------------------ modules/dnn/src/layers/layers_common.hpp | 30 ++--------- modules/dnn/src/layers/layers_common.simd.hpp | 48 +++++++++++------ 7 files changed, 49 insertions(+), 156 deletions(-) delete mode 100644 modules/dnn/src/layers/layers_common.avx.cpp delete mode 100644 modules/dnn/src/layers/layers_common.avx2.cpp diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt index e0d3b959d0..8b335ad67c 100644 --- a/modules/dnn/CMakeLists.txt +++ b/modules/dnn/CMakeLists.txt @@ -9,6 +9,8 @@ endif() set(the_description "Deep neural network module. It allows to load models from different frameworks and to make forward pass") +ocv_add_dispatched_file("layers/layers_common" AVX AVX2) + ocv_add_module(dnn opencv_core opencv_imgproc WRAP python matlab java) ocv_warnings_disable(CMAKE_CXX_FLAGS -Wno-shadow -Wno-parentheses -Wmaybe-uninitialized -Wsign-promo -Wmissing-declarations -Wmissing-prototypes diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 12e38c576b..6e09c8ca98 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -506,13 +506,13 @@ public: int bsz = ofs1 - ofs0; #if CV_TRY_AVX2 if(useAVX2) - fastConv_avx2(wptr, wstep, biasptr, rowbuf0, data_out0 + ofs0, + opt_AVX2::fastConv(wptr, wstep, biasptr, rowbuf0, data_out0 + ofs0, outShape, bsz, vsz, vsz_a, relu, cn0 == 0); else #endif #if CV_TRY_AVX if(useAVX) - fastConv_avx(wptr, wstep, biasptr, rowbuf0, data_out0 + ofs0, + opt_AVX::fastConv(wptr, wstep, biasptr, rowbuf0, data_out0 + ofs0, outShape, bsz, vsz, vsz_a, relu, cn0 == 0); else #endif @@ -824,12 +824,12 @@ public: #if CV_TRY_AVX2 if( useAVX2 ) - fastGEMM_avx2( aptr, astep, bptr, bstep, cptr, cstep, mmax, kmax, nmax ); + opt_AVX2::fastGEMM( aptr, astep, bptr, bstep, cptr, cstep, mmax, kmax, nmax ); else #endif #if CV_TRY_AVX if( useAVX ) - fastGEMM_avx( aptr, astep, bptr, bstep, cptr, cstep, mmax, kmax, nmax ); + opt_AVX::fastGEMM( aptr, astep, bptr, bstep, cptr, cstep, mmax, kmax, nmax ); else #endif for( m = 0; m < mmax; m += 2 ) diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp index f27f39c660..9bec3b086f 100644 --- a/modules/dnn/src/layers/fully_connected_layer.cpp +++ b/modules/dnn/src/layers/fully_connected_layer.cpp @@ -177,12 +177,12 @@ public: #if CV_TRY_AVX2 if( useAVX2 ) - fastGEMM1T_avx2( sptr, wptr, wstep, biasptr, dptr, nw, vecsize); + opt_AVX2::fastGEMM1T( sptr, wptr, wstep, biasptr, dptr, nw, vecsize); else #endif #if CV_TRY_AVX if( useAVX ) - fastGEMM1T_avx( sptr, wptr, wstep, biasptr, dptr, nw, vecsize); + opt_AVX::fastGEMM1T( sptr, wptr, wstep, biasptr, dptr, nw, vecsize); else #endif { @@ -191,19 +191,19 @@ public: #if CV_SIMD128 for( ; i <= nw - 4; i += 4, wptr += 4*wstep ) { - vfloat32x4 vs0 = v_setall_f32(0.f), vs1 = v_setall_f32(0.f); - vfloat32x4 vs2 = v_setall_f32(0.f), vs3 = v_setall_f32(0.f); + v_float32x4 vs0 = v_setall_f32(0.f), vs1 = v_setall_f32(0.f); + v_float32x4 vs2 = v_setall_f32(0.f), vs3 = v_setall_f32(0.f); for( k = 0; k < vecsize; k += 4 ) { - vfloat32x4 v = v_load_aligned(sptr + k); + v_float32x4 v = v_load_aligned(sptr + k); vs0 += v*v_load_aligned(wptr + k); vs1 += v*v_load_aligned(wptr + wstep + k); vs2 += v*v_load_aligned(wptr + wstep*2 + k); vs3 += v*v_load_aligned(wptr + wstep*3 + k); } - vfloat32x4 s = v_reduce_sum4(vs0, vs1, vs2, vs3); + v_float32x4 s = v_reduce_sum4(vs0, vs1, vs2, vs3); s += v_load(biasptr + i); v_store(dptr + i, s); } diff --git a/modules/dnn/src/layers/layers_common.avx.cpp b/modules/dnn/src/layers/layers_common.avx.cpp deleted file mode 100644 index 4e0c034eae..0000000000 --- a/modules/dnn/src/layers/layers_common.avx.cpp +++ /dev/null @@ -1,54 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Copyright (C) 2017, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" -#include "layers_common.hpp" -#include "opencv2/core/hal/intrin.hpp" - -#define fastConv_some_avx fastConv_avx -#define fastGEMM1T_some_avx fastGEMM1T_avx -#define fastGEMM_some_avx fastGEMM_avx - -#undef _mm256_fmadd_ps -#define _mm256_fmadd_ps(a, b, c) _mm256_add_ps(c, _mm256_mul_ps(a, b)) - -#include "layers_common.simd.hpp" diff --git a/modules/dnn/src/layers/layers_common.avx2.cpp b/modules/dnn/src/layers/layers_common.avx2.cpp deleted file mode 100644 index ef8108cc25..0000000000 --- a/modules/dnn/src/layers/layers_common.avx2.cpp +++ /dev/null @@ -1,51 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Copyright (C) 2017, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" -#include "layers_common.hpp" -#include "opencv2/core/hal/intrin.hpp" - -#define fastConv_some_avx fastConv_avx2 -#define fastGEMM1T_some_avx fastGEMM1T_avx2 -#define fastGEMM_some_avx fastGEMM_avx2 - -#include "layers_common.simd.hpp" diff --git a/modules/dnn/src/layers/layers_common.hpp b/modules/dnn/src/layers/layers_common.hpp index bbab2756f5..f34646af14 100644 --- a/modules/dnn/src/layers/layers_common.hpp +++ b/modules/dnn/src/layers/layers_common.hpp @@ -45,6 +45,10 @@ #include #include +// dispatched AVX/AVX2 optimizations +#include "layers/layers_common.simd.hpp" +#include "layers/layers_common.simd_declarations.hpp" + namespace cv { namespace dnn @@ -64,32 +68,6 @@ void getConvPoolPaddings(const Size& inp, const Size& out, const Size &kernel, const Size &stride, const String &padMode, Size &pad); -#if CV_TRY_AVX -void fastConv_avx(const float* weights, size_t wstep, const float* bias, - const float* rowbuf, float* output, const int* outShape, - int blockSize, int vecsize, int vecsize_aligned, - const float* relu, bool initOutput); -void fastGEMM1T_avx( const float* vec, const float* weights, - size_t wstep, const float* bias, - float* dst, int nvecs, int vecsize ); -void fastGEMM_avx( const float* aptr, size_t astep, const float* bptr0, - size_t bstep, float* cptr, size_t cstep, - int ma, int na, int nb ); -#endif - -#if CV_TRY_AVX2 -void fastConv_avx2(const float* weights, size_t wstep, const float* bias, - const float* rowbuf, float* output, const int* outShape, - int blockSize, int vecsize, int vecsize_aligned, - const float* relu, bool initOutput); -void fastGEMM1T_avx2( const float* vec, const float* weights, - size_t wstep, const float* bias, - float* dst, int nvecs, int vecsize ); -void fastGEMM_avx2( const float* aptr, size_t astep, const float* bptr0, - size_t bstep, float* cptr, size_t cstep, - int ma, int na, int nb ); -#endif - } } diff --git a/modules/dnn/src/layers/layers_common.simd.hpp b/modules/dnn/src/layers/layers_common.simd.hpp index 1110ed0933..9890587fde 100644 --- a/modules/dnn/src/layers/layers_common.simd.hpp +++ b/modules/dnn/src/layers/layers_common.simd.hpp @@ -40,16 +40,34 @@ // //M*/ -#ifndef __DNN_LAYERS_COMMON_SIMD_HPP__ -#define __DNN_LAYERS_COMMON_SIMD_HPP__ +#include "opencv2/core/hal/intrin.hpp" namespace cv { namespace dnn { +CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN -void fastConv_some_avx( const float* weights, size_t wstep, const float* bias, - const float* rowbuf, float* output, const int* outShape, - int blockSize, int vecsize, int vecsize_aligned, - const float* relu, bool initOutput ) +void fastConv( const float* weights, size_t wstep, const float* bias, + const float* rowbuf, float* output, const int* outShape, + int blockSize, int vecsize, int vecsize_aligned, + const float* relu, bool initOutput ); +void fastGEMM1T( const float* vec, const float* weights, + size_t wstep, const float* bias, + float* dst, int nvecs, int vecsize ); +void fastGEMM( const float* aptr, size_t astep, const float* bptr, + size_t bstep, float* cptr, size_t cstep, + int ma, int na, int nb ); + +#if !defined(CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY) && CV_AVX + +#if !CV_FMA // AVX workaround +#undef _mm256_fmadd_ps +#define _mm256_fmadd_ps(a, b, c) _mm256_add_ps(c, _mm256_mul_ps(a, b)) +#endif + +void fastConv( const float* weights, size_t wstep, const float* bias, + const float* rowbuf, float* output, const int* outShape, + int blockSize, int vecsize, int vecsize_aligned, + const float* relu, bool initOutput ) { int outCn = outShape[1]; size_t outPlaneSize = outShape[2]*outShape[3]; @@ -214,9 +232,9 @@ void fastConv_some_avx( const float* weights, size_t wstep, const float* bias, } // dst = vec * weights^t + bias -void fastGEMM1T_some_avx( const float* vec, const float* weights, - size_t wstep, const float* bias, - float* dst, int nvecs, int vecsize ) +void fastGEMM1T( const float* vec, const float* weights, + size_t wstep, const float* bias, + float* dst, int nvecs, int vecsize ) { int i = 0; @@ -276,9 +294,9 @@ void fastGEMM1T_some_avx( const float* vec, const float* weights, _mm256_zeroupper(); } -void fastGEMM_some_avx( const float* aptr, size_t astep, const float* bptr, - size_t bstep, float* cptr, size_t cstep, - int ma, int na, int nb ) +void fastGEMM( const float* aptr, size_t astep, const float* bptr, + size_t bstep, float* cptr, size_t cstep, + int ma, int na, int nb ) { int n = 0; for( ; n <= nb - 16; n += 16 ) @@ -346,7 +364,7 @@ void fastGEMM_some_avx( const float* aptr, size_t astep, const float* bptr, _mm256_zeroupper(); } -} -} +#endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY -#endif +CV_CPU_OPTIMIZATION_NAMESPACE_END +}} // namespace