Merge remote-tracking branch 'upstream/3.4' into merge-3.4

This commit is contained in:
Alexander Alekhin 2020-05-28 23:35:11 +00:00
commit c3e8a82c9c
25 changed files with 837 additions and 73 deletions

View File

@ -28,7 +28,7 @@
#3 & \mbox{#4}\\ #3 & \mbox{#4}\\
#5 & \mbox{#6}\\ #5 & \mbox{#6}\\
\end{array} \right.} \end{array} \right.}
\newcommand{\forkthree}[8]{ \newcommand{\forkfour}[8]{
\left\{ \left\{
\begin{array}{l l} \begin{array}{l l}
#1 & \mbox{#2}\\ #1 & \mbox{#2}\\

View File

@ -47,6 +47,11 @@
#include "opencv2/core.hpp" #include "opencv2/core.hpp"
#if EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3
#include <unsupported/Eigen/CXX11/Tensor>
#define OPENCV_EIGEN_TENSOR_SUPPORT
#endif // EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3
#if defined _MSC_VER && _MSC_VER >= 1200 #if defined _MSC_VER && _MSC_VER >= 1200
#pragma warning( disable: 4714 ) //__forceinline is not inlined #pragma warning( disable: 4714 ) //__forceinline is not inlined
#pragma warning( disable: 4127 ) //conditional expression is constant #pragma warning( disable: 4127 ) //conditional expression is constant
@ -59,6 +64,107 @@ namespace cv
//! @addtogroup core_eigen //! @addtogroup core_eigen
//! @{ //! @{
#ifdef OPENCV_EIGEN_TENSOR_SUPPORT
/** @brief Converts an Eigen::Tensor to a cv::Mat.
The method converts an Eigen::Tensor with shape (H x W x C) to a cv::Mat where:
H = number of rows
W = number of columns
C = number of channels
Usage:
\code
Eigen::Tensor<float, 3, Eigen::RowMajor> a_tensor(...);
// populate tensor with values
Mat a_mat;
eigen2cv(a_tensor, a_mat);
\endcode
*/
template <typename _Tp, int _layout> static inline
void eigen2cv( const Eigen::Tensor<_Tp, 3, _layout> &src, OutputArray dst )
{
if( !(_layout & Eigen::RowMajorBit) )
{
const std::array<int, 3> shuffle{2, 1, 0};
Eigen::Tensor<_Tp, 3, !_layout> row_major_tensor = src.swap_layout().shuffle(shuffle);
Mat _src(src.dimension(0), src.dimension(1), CV_MAKETYPE(DataType<_Tp>::type, src.dimension(2)), row_major_tensor.data());
_src.copyTo(dst);
}
else
{
Mat _src(src.dimension(0), src.dimension(1), CV_MAKETYPE(DataType<_Tp>::type, src.dimension(2)), (void *)src.data());
_src.copyTo(dst);
}
}
/** @brief Converts a cv::Mat to an Eigen::Tensor.
The method converts a cv::Mat to an Eigen Tensor with shape (H x W x C) where:
H = number of rows
W = number of columns
C = number of channels
Usage:
\code
Mat a_mat(...);
// populate Mat with values
Eigen::Tensor<float, 3, Eigen::RowMajor> a_tensor(...);
cv2eigen(a_mat, a_tensor);
\endcode
*/
template <typename _Tp, int _layout> static inline
void cv2eigen( const Mat &src, Eigen::Tensor<_Tp, 3, _layout> &dst )
{
if( !(_layout & Eigen::RowMajorBit) )
{
Eigen::Tensor<_Tp, 3, !_layout> row_major_tensor(src.rows, src.cols, src.channels());
Mat _dst(src.rows, src.cols, CV_MAKETYPE(DataType<_Tp>::type, src.channels()), row_major_tensor.data());
if (src.type() == _dst.type())
src.copyTo(_dst);
else
src.convertTo(_dst, _dst.type());
const std::array<int, 3> shuffle{2, 1, 0};
dst = row_major_tensor.swap_layout().shuffle(shuffle);
}
else
{
dst.resize(src.rows, src.cols, src.channels());
Mat _dst(src.rows, src.cols, CV_MAKETYPE(DataType<_Tp>::type, src.channels()), dst.data());
if (src.type() == _dst.type())
src.copyTo(_dst);
else
src.convertTo(_dst, _dst.type());
}
}
/** @brief Maps cv::Mat data to an Eigen::TensorMap.
The method wraps an existing Mat data array with an Eigen TensorMap of shape (H x W x C) where:
H = number of rows
W = number of columns
C = number of channels
Explicit instantiation of the return type is required.
@note Caller should be aware of the lifetime of the cv::Mat instance and take appropriate safety measures.
The cv::Mat instance will retain ownership of the data and the Eigen::TensorMap will lose access when the cv::Mat data is deallocated.
The example below initializes a cv::Mat and produces an Eigen::TensorMap:
\code
float arr[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
Mat a_mat(2, 2, CV_32FC3, arr);
Eigen::TensorMap<Eigen::Tensor<float, 3, Eigen::RowMajor>> a_tensormap = cv2eigen_tensormap<float>(a_mat);
\endcode
*/
template <typename _Tp> static inline
Eigen::TensorMap<Eigen::Tensor<_Tp, 3, Eigen::RowMajor>> cv2eigen_tensormap(const cv::InputArray &src)
{
Mat mat = src.getMat();
CV_CheckTypeEQ(mat.type(), CV_MAKETYPE(traits::Type<_Tp>::value, mat.channels()), "");
return Eigen::TensorMap<Eigen::Tensor<_Tp, 3, Eigen::RowMajor>>((_Tp *)mat.data, mat.rows, mat.cols, mat.channels());
}
#endif // OPENCV_EIGEN_TENSOR_SUPPORT
template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src, OutputArray dst ) void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src, OutputArray dst )
{ {

View File

@ -1888,7 +1888,7 @@ inline size_t parseOption(const std::string &value)
} }
cv::String valueStr = value.substr(0, pos); cv::String valueStr = value.substr(0, pos);
cv::String suffixStr = value.substr(pos, value.length() - pos); cv::String suffixStr = value.substr(pos, value.length() - pos);
int v = atoi(valueStr.c_str()); size_t v = (size_t)std::stoull(valueStr);
if (suffixStr.length() == 0) if (suffixStr.length() == 0)
return v; return v;
else if (suffixStr == "MB" || suffixStr == "Mb" || suffixStr == "mb") else if (suffixStr == "MB" || suffixStr == "Mb" || suffixStr == "mb")

View File

@ -2074,6 +2074,86 @@ TEST(Core_Eigen, eigen2cv_check_Mat_type)
} }
#endif // HAVE_EIGEN #endif // HAVE_EIGEN
#ifdef OPENCV_EIGEN_TENSOR_SUPPORT
TEST(Core_Eigen, cv2eigen_check_tensor_conversion)
{
Mat A(2, 3, CV_32FC3);
float value = 0;
for(int row=0; row<A.rows; row++)
for(int col=0; col<A.cols; col++)
for(int ch=0; ch<A.channels(); ch++)
A.at<Vec3f>(row,col)[ch] = value++;
Eigen::Tensor<float, 3, Eigen::RowMajor> row_tensor;
cv2eigen(A, row_tensor);
float* mat_ptr = (float*)A.data;
float* tensor_ptr = row_tensor.data();
for (int i=0; i< row_tensor.size(); i++)
ASSERT_FLOAT_EQ(mat_ptr[i], tensor_ptr[i]);
Eigen::Tensor<float, 3, Eigen::ColMajor> col_tensor;
cv2eigen(A, col_tensor);
value = 0;
for(int row=0; row<A.rows; row++)
for(int col=0; col<A.cols; col++)
for(int ch=0; ch<A.channels(); ch++)
ASSERT_FLOAT_EQ(value++, col_tensor(row,col,ch));
}
#endif // OPENCV_EIGEN_TENSOR_SUPPORT
#ifdef OPENCV_EIGEN_TENSOR_SUPPORT
TEST(Core_Eigen, eigen2cv_check_tensor_conversion)
{
Eigen::Tensor<float, 3, Eigen::RowMajor> row_tensor(2,3,3);
Eigen::Tensor<float, 3, Eigen::ColMajor> col_tensor(2,3,3);
float value = 0;
for(int row=0; row<row_tensor.dimension(0); row++)
for(int col=0; col<row_tensor.dimension(1); col++)
for(int ch=0; ch<row_tensor.dimension(2); ch++)
{
row_tensor(row,col,ch) = value;
col_tensor(row,col,ch) = value;
value++;
}
Mat A;
eigen2cv(row_tensor, A);
float* tensor_ptr = row_tensor.data();
float* mat_ptr = (float*)A.data;
for (int i=0; i< row_tensor.size(); i++)
ASSERT_FLOAT_EQ(tensor_ptr[i], mat_ptr[i]);
Mat B;
eigen2cv(col_tensor, B);
value = 0;
for(int row=0; row<B.rows; row++)
for(int col=0; col<B.cols; col++)
for(int ch=0; ch<B.channels(); ch++)
ASSERT_FLOAT_EQ(value++, B.at<Vec3f>(row,col)[ch]);
}
#endif // OPENCV_EIGEN_TENSOR_SUPPORT
#ifdef OPENCV_EIGEN_TENSOR_SUPPORT
TEST(Core_Eigen, cv2eigen_tensormap_check_tensormap_access)
{
float arr[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
Mat a_mat(2, 2, CV_32FC3, arr);
Eigen::TensorMap<Eigen::Tensor<float, 3, Eigen::RowMajor>> a_tensor = cv2eigen_tensormap<float>(a_mat);
for(int i=0; i<a_mat.rows; i++) {
for (int j=0; j<a_mat.cols; j++) {
for (int ch=0; ch<a_mat.channels(); ch++) {
ASSERT_FLOAT_EQ(a_mat.at<Vec3f>(i,j)[ch], a_tensor(i,j,ch));
ASSERT_EQ(&a_mat.at<Vec3f>(i,j)[ch], &a_tensor(i,j,ch));
}
}
}
}
#endif // OPENCV_EIGEN_TENSOR_SUPPORT
TEST(Mat, regression_12943) // memory usage: ~4.5 Gb TEST(Mat, regression_12943) // memory usage: ~4.5 Gb
{ {
applyTestTag(CV_TEST_TAG_MEMORY_6GB); applyTestTag(CV_TEST_TAG_MEMORY_6GB);

View File

@ -197,9 +197,23 @@ PERF_TEST_P_(DNNTestNetwork, YOLOv3)
if (backend == DNN_BACKEND_HALIDE) if (backend == DNN_BACKEND_HALIDE)
throw SkipTestException(""); throw SkipTestException("");
Mat sample = imread(findDataFile("dnn/dog416.png")); Mat sample = imread(findDataFile("dnn/dog416.png"));
cvtColor(sample, sample, COLOR_BGR2RGB);
Mat inp; Mat inp;
sample.convertTo(inp, CV_32FC3); sample.convertTo(inp, CV_32FC3, 1.0f / 255, 0);
processNet("dnn/yolov3.weights", "dnn/yolov3.cfg", "", inp / 255); processNet("dnn/yolov3.weights", "dnn/yolov3.cfg", "", inp);
}
PERF_TEST_P_(DNNTestNetwork, YOLOv4)
{
if (backend == DNN_BACKEND_HALIDE)
throw SkipTestException("");
if (target == DNN_TARGET_MYRIAD)
throw SkipTestException("");
Mat sample = imread(findDataFile("dnn/dog416.png"));
cvtColor(sample, sample, COLOR_BGR2RGB);
Mat inp;
sample.convertTo(inp, CV_32FC3, 1.0f / 255, 0);
processNet("dnn/yolov4.weights", "dnn/yolov4.cfg", "", inp);
} }
PERF_TEST_P_(DNNTestNetwork, EAST_text_detection) PERF_TEST_P_(DNNTestNetwork, EAST_text_detection)
@ -235,6 +249,17 @@ PERF_TEST_P_(DNNTestNetwork, Inception_v2_Faster_RCNN)
Mat(cv::Size(800, 600), CV_32FC3)); Mat(cv::Size(800, 600), CV_32FC3));
} }
PERF_TEST_P_(DNNTestNetwork, EfficientDet)
{
if (backend == DNN_BACKEND_HALIDE || target != DNN_TARGET_CPU)
throw SkipTestException("");
Mat sample = imread(findDataFile("dnn/dog416.png"));
resize(sample, sample, Size(512, 512));
Mat inp;
sample.convertTo(inp, CV_32FC3, 1.0/255);
processNet("dnn/efficientdet-d0.pb", "dnn/efficientdet-d0.pbtxt", "", inp);
}
INSTANTIATE_TEST_CASE_P(/*nothing*/, DNNTestNetwork, dnnBackendsAndTargets()); INSTANTIATE_TEST_CASE_P(/*nothing*/, DNNTestNetwork, dnnBackendsAndTargets());
} // namespace } // namespace

View File

@ -1141,17 +1141,26 @@ static Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m)
static int g_networkId = 0; static int g_networkId = 0;
struct Net::Impl detail::NetImplBase::NetImplBase()
: networkId(CV_XADD(&g_networkId, 1))
, networkDumpCounter(0)
, dumpLevel(DNN_NETWORK_DUMP)
{
// nothing
}
std::string detail::NetImplBase::getDumpFileNameBase()
{
std::string dumpFileNameBase = cv::format("ocv_dnn_net_%05d_%02d", networkId, networkDumpCounter++);
return dumpFileNameBase;
}
struct Net::Impl : public detail::NetImplBase
{ {
typedef std::map<int, LayerShapes> LayersShapesMap; typedef std::map<int, LayerShapes> LayersShapesMap;
typedef std::map<int, LayerData> MapIdToLayerData; typedef std::map<int, LayerData> MapIdToLayerData;
const int networkId; // network global identifier
int networkDumpCounter; // dump counter
Impl() Impl()
: networkId(CV_XADD(&g_networkId, 1))
, networkDumpCounter(0)
{ {
//allocate fake net input layer //allocate fake net input layer
netInputLayer = Ptr<DataLayer>(new DataLayer()); netInputLayer = Ptr<DataLayer>(new DataLayer());
@ -1366,7 +1375,7 @@ struct Net::Impl
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
if (DNN_NETWORK_DUMP > 0 && networkDumpCounter == 0) if (dumpLevel && networkDumpCounter == 0)
{ {
dumpNetworkToFile(); dumpNetworkToFile();
} }
@ -1470,7 +1479,7 @@ struct Net::Impl
netWasAllocated = true; netWasAllocated = true;
if (DNN_NETWORK_DUMP > 0) if (dumpLevel)
{ {
dumpNetworkToFile(); dumpNetworkToFile();
} }
@ -2178,7 +2187,7 @@ struct Net::Impl
} }
if (net.empty()) { if (net.empty()) {
net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet()); net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*this));
} }
if (!fused) { if (!fused) {
@ -2222,7 +2231,7 @@ struct Net::Impl
} }
} }
else { else {
net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet()); net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*this));
} }
if (!fused) if (!fused)
@ -3406,7 +3415,8 @@ struct Net::Impl
void dumpNetworkToFile() void dumpNetworkToFile()
{ {
#ifndef OPENCV_DNN_DISABLE_NETWORK_AUTO_DUMP #ifndef OPENCV_DNN_DISABLE_NETWORK_AUTO_DUMP
String dumpFileName = cv::format("ocv_dnn_net_%05d_%02d.dot", networkId, networkDumpCounter++); string dumpFileNameBase = getDumpFileNameBase();
string dumpFileName = dumpFileNameBase + ".dot";
try try
{ {
string dumpStr = dump(); string dumpStr = dump();
@ -3465,7 +3475,7 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe
{ {
auto fake_node = std::make_shared<ngraph::op::Parameter>(ngraph::element::f32, ngraph::Shape{}); auto fake_node = std::make_shared<ngraph::op::Parameter>(ngraph::element::f32, ngraph::Shape{});
Ptr<InfEngineNgraphNode> backendNodeNGraph(new InfEngineNgraphNode(fake_node)); Ptr<InfEngineNgraphNode> backendNodeNGraph(new InfEngineNgraphNode(fake_node));
backendNodeNGraph->net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(ieNet)); backendNodeNGraph->net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*(cvNet.impl), ieNet));
backendNode = backendNodeNGraph; backendNode = backendNodeNGraph;
} }
else else

View File

@ -0,0 +1,34 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef __OPENCV_DNN_COMMON_HPP__
#define __OPENCV_DNN_COMMON_HPP__
#include <opencv2/dnn.hpp>
namespace cv { namespace dnn {
CV__DNN_INLINE_NS_BEGIN
#define IS_DNN_OPENCL_TARGET(id) (id == DNN_TARGET_OPENCL || id == DNN_TARGET_OPENCL_FP16)
Mutex& getInitializationMutex();
void initializeLayerFactory();
namespace detail {
struct NetImplBase
{
const int networkId; // network global identifier
int networkDumpCounter; // dump counter
int dumpLevel; // level of information dumps (initialized through OPENCV_DNN_NETWORK_DUMP parameter)
NetImplBase();
std::string getDumpFileNameBase();
};
} // namespace detail
CV__DNN_INLINE_NS_END
}} // namespace
#endif // __OPENCV_DNN_COMMON_HPP__

View File

@ -6,6 +6,9 @@
// Third party copyrights are property of their respective owners. // Third party copyrights are property of their respective owners.
#include "precomp.hpp" #include "precomp.hpp"
#include <fstream>
#include "ie_ngraph.hpp" #include "ie_ngraph.hpp"
#include <opencv2/dnn/shape_utils.hpp> #include <opencv2/dnn/shape_utils.hpp>
@ -22,6 +25,8 @@ namespace cv { namespace dnn {
#ifdef HAVE_DNN_NGRAPH #ifdef HAVE_DNN_NGRAPH
static bool DNN_IE_SERIALIZE = utils::getConfigurationParameterBool("OPENCV_DNN_IE_SERIALIZE", false);
// For networks with input layer which has an empty name, IE generates a name id[some_number]. // For networks with input layer which has an empty name, IE generates a name id[some_number].
// OpenCV lets users use an empty input name and to prevent unexpected naming, // OpenCV lets users use an empty input name and to prevent unexpected naming,
// we can use some predefined name. // we can use some predefined name.
@ -295,13 +300,16 @@ void InfEngineNgraphNode::setName(const std::string& name) {
node->set_friendly_name(name); node->set_friendly_name(name);
} }
InfEngineNgraphNet::InfEngineNgraphNet() InfEngineNgraphNet::InfEngineNgraphNet(detail::NetImplBase& netImpl)
: netImpl_(netImpl)
{ {
hasNetOwner = false; hasNetOwner = false;
device_name = "CPU"; device_name = "CPU";
} }
InfEngineNgraphNet::InfEngineNgraphNet(InferenceEngine::CNNNetwork& net) : cnn(net) InfEngineNgraphNet::InfEngineNgraphNet(detail::NetImplBase& netImpl, InferenceEngine::CNNNetwork& net)
: netImpl_(netImpl)
, cnn(net)
{ {
hasNetOwner = true; hasNetOwner = true;
device_name = "CPU"; device_name = "CPU";
@ -440,9 +448,27 @@ void InfEngineNgraphNet::init(Target targetId)
ngraph_function->validate_nodes_and_infer_types(); ngraph_function->validate_nodes_and_infer_types();
} }
cnn = InferenceEngine::CNNNetwork(ngraph_function); cnn = InferenceEngine::CNNNetwork(ngraph_function);
#ifdef _DEBUG // TODO
//cnn.serialize("/tmp/cnn.xml", "/tmp/cnn.bin"); if (DNN_IE_SERIALIZE)
{
#ifndef OPENCV_DNN_DISABLE_NETWORK_AUTO_DUMP
std::string dumpFileNameBase = netImpl_.getDumpFileNameBase();
try
{
cnn.serialize(dumpFileNameBase + "_ngraph.xml", dumpFileNameBase + "_ngraph.bin");
}
catch (const std::exception& e)
{
std::ofstream out((dumpFileNameBase + "_ngraph.error").c_str(), std::ios::out);
out << "Exception: " << e.what() << std::endl;
}
catch (...)
{
std::ofstream out((dumpFileNameBase + "_ngraph.error").c_str(), std::ios::out);
out << "Can't dump: unknown exception" << std::endl;
}
#endif #endif
}
} }
switch (targetId) switch (targetId)

View File

@ -34,8 +34,8 @@ class InfEngineNgraphNode;
class InfEngineNgraphNet class InfEngineNgraphNet
{ {
public: public:
InfEngineNgraphNet(); InfEngineNgraphNet(detail::NetImplBase& netImpl);
InfEngineNgraphNet(InferenceEngine::CNNNetwork& net); InfEngineNgraphNet(detail::NetImplBase& netImpl, InferenceEngine::CNNNetwork& net);
void addOutput(const std::string& name); void addOutput(const std::string& name);
@ -55,6 +55,8 @@ public:
void reset(); void reset();
private: private:
detail::NetImplBase& netImpl_;
void release(); void release();
int getNumComponents(); int getNumComponents();
void dfs(std::shared_ptr<ngraph::Node>& node, std::vector<std::shared_ptr<ngraph::Node>>& comp, void dfs(std::shared_ptr<ngraph::Node>& node, std::vector<std::shared_ptr<ngraph::Node>>& comp,

View File

@ -354,7 +354,7 @@ public:
weight = std::make_shared<ngraph::op::Constant>( weight = std::make_shared<ngraph::op::Constant>(
ngraph::element::f32, ngraph::Shape(shape), blobs[0].data); ngraph::element::f32, ngraph::Shape(shape), blobs[0].data);
} }
auto mul = std::make_shared<ngraph::op::v1::Multiply>(norm, weight, ngraph::op::AutoBroadcastType::NUMPY); auto mul = std::make_shared<ngraph::op::v0::Multiply>(norm, weight, ngraph::op::AutoBroadcastType::NUMPY);
return Ptr<BackendNode>(new InfEngineNgraphNode(mul)); return Ptr<BackendNode>(new InfEngineNgraphNode(mul));
} }
#endif // HAVE_DNN_NGRAPH #endif // HAVE_DNN_NGRAPH

View File

@ -73,11 +73,4 @@
#include <opencv2/dnn.hpp> #include <opencv2/dnn.hpp>
#include <opencv2/dnn/all_layers.hpp> #include <opencv2/dnn/all_layers.hpp>
#include "dnn_common.hpp"
namespace cv { namespace dnn {
CV__DNN_INLINE_NS_BEGIN
#define IS_DNN_OPENCL_TARGET(id) (id == DNN_TARGET_OPENCL || id == DNN_TARGET_OPENCL_FP16)
Mutex& getInitializationMutex();
void initializeLayerFactory();
CV__DNN_INLINE_NS_END
}} // namespace

View File

@ -725,6 +725,21 @@ private:
bool negativeScales; bool negativeScales;
}; };
class ClipByValueSubgraph : public TFSubgraph
{
public:
ClipByValueSubgraph()
{
int input = addNodeToMatch("");
int maxValue = addNodeToMatch("Const");
int minimum = addNodeToMatch("Minimum", input, maxValue);
int minValue = addNodeToMatch("Const");
addNodeToMatch("Maximum", minimum, minValue);
setFusedNode("ClipByValue", input, minValue, maxValue);
}
};
void simplifySubgraphs(tensorflow::GraphDef& net) void simplifySubgraphs(tensorflow::GraphDef& net)
{ {
std::vector<Ptr<Subgraph> > subgraphs; std::vector<Ptr<Subgraph> > subgraphs;
@ -749,6 +764,7 @@ void simplifySubgraphs(tensorflow::GraphDef& net)
subgraphs.push_back(Ptr<Subgraph>(new PReLUSubgraph(false))); subgraphs.push_back(Ptr<Subgraph>(new PReLUSubgraph(false)));
subgraphs.push_back(Ptr<Subgraph>(new FlattenProdSubgraph())); subgraphs.push_back(Ptr<Subgraph>(new FlattenProdSubgraph()));
subgraphs.push_back(Ptr<Subgraph>(new ResizeBilinearSubgraphDown())); subgraphs.push_back(Ptr<Subgraph>(new ResizeBilinearSubgraphDown()));
subgraphs.push_back(Ptr<Subgraph>(new ClipByValueSubgraph()));
for (int i = 0; i < net.node_size(); ++i) for (int i = 0; i < net.node_size(); ++i)
{ {

View File

@ -1542,22 +1542,32 @@ void TFImporter::populateNet(Net dstNet)
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
} }
else if (type == "Mul") else if (type == "Mul" || type == "RealDiv")
{ {
bool haveConst = false; int constId = -1;
for(int ii = 0; !haveConst && ii < layer.input_size(); ++ii) for(int ii = 0; ii < layer.input_size(); ++ii)
{ {
Pin input = parsePin(layer.input(ii)); Pin input = parsePin(layer.input(ii));
haveConst = value_id.find(input.name) != value_id.end(); if (value_id.find(input.name) != value_id.end())
{
constId = ii;
break;
}
} }
CV_Assert(!haveConst || layer.input_size() == 2); CV_Assert((constId != -1) || (layer.input_size() == 2));
if (haveConst) if (constId != -1)
{ {
// Multiplication by constant. // Multiplication by constant.
CV_Assert(layer.input_size() == 2); CV_Assert(layer.input_size() == 2);
Mat scaleMat = getTensorContent(getConstBlob(layer, value_id)); Mat scaleMat = getTensorContent(getConstBlob(layer, value_id));
CV_Assert(scaleMat.type() == CV_32FC1); CV_Assert(scaleMat.type() == CV_32FC1);
if (type == "RealDiv")
{
if (constId == 0)
CV_Error(Error::StsNotImplemented, "Division of constant over variable");
scaleMat = 1.0f / scaleMat;
}
int id; int id;
if (scaleMat.total() == 1) // is a scalar. if (scaleMat.total() == 1) // is a scalar.
@ -1659,11 +1669,15 @@ void TFImporter::populateNet(Net dstNet)
int id; int id;
if (equalInpShapes || netInputShapes.empty()) if (equalInpShapes || netInputShapes.empty())
{ {
layerParams.set("operation", "prod"); layerParams.set("operation", type == "RealDiv" ? "div" : "prod");
id = dstNet.addLayer(name, "Eltwise", layerParams); id = dstNet.addLayer(name, "Eltwise", layerParams);
} }
else else
{
if (type == "RealDiv")
CV_Error(Error::StsNotImplemented, "Division of non equal tensors");
id = dstNet.addLayer(name, "Scale", layerParams); id = dstNet.addLayer(name, "Scale", layerParams);
}
layer_id[name] = id; layer_id[name] = id;

View File

@ -245,6 +245,13 @@ public:
nms_boxes.push_back(box); nms_boxes.push_back(box);
nms_confidences.push_back(conf); nms_confidences.push_back(conf);
nms_classIds.push_back(class_id); nms_classIds.push_back(class_id);
#if 0 // use to update test reference data
std::cout << b << ", " << class_id << ", " << conf << "f, "
<< box.x << "f, " << box.y << "f, "
<< box.x + box.width << "f, " << box.y + box.height << "f,"
<< std::endl;
#endif
} }
normAssertDetections(refClassIds[b], refConfidences[b], refBoxes[b], nms_classIds, normAssertDetections(refClassIds[b], refConfidences[b], refBoxes[b], nms_classIds,
@ -413,6 +420,9 @@ TEST_P(Test_Darknet_nets_async, Accuracy)
std::string prefix = get<0>(GetParam()); std::string prefix = get<0>(GetParam());
if (targetId == DNN_TARGET_MYRIAD && prefix == "yolov4") // NC_OUT_OF_MEMORY
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
if (backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) if (backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
throw SkipTestException("No support for async forward"); throw SkipTestException("No support for async forward");
@ -457,7 +467,7 @@ TEST_P(Test_Darknet_nets_async, Accuracy)
} }
INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_nets_async, Combine( INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_nets_async, Combine(
Values("yolo-voc", "tiny-yolo-voc", "yolov3"), Values("yolo-voc", "tiny-yolo-voc", "yolov3", "yolov4"),
dnnBackendsAndTargets() dnnBackendsAndTargets()
)); ));
@ -471,15 +481,21 @@ TEST_P(Test_Darknet_nets, YOLOv3)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
// batchId, classId, confidence, left, top, right, bottom // batchId, classId, confidence, left, top, right, bottom
Mat ref = (Mat_<float>(9, 7) << 0, 7, 0.952983f, 0.614622f, 0.150257f, 0.901369f, 0.289251f, // a truck const int N0 = 3;
0, 1, 0.987908f, 0.150913f, 0.221933f, 0.742255f, 0.74626f, // a bicycle const int N1 = 6;
0, 16, 0.998836f, 0.160024f, 0.389964f, 0.417885f, 0.943716f, // a dog (COCO) static const float ref_[/* (N0 + N1) * 7 */] = {
1, 9, 0.384801f, 0.659824f, 0.372389f, 0.673926f, 0.429412f, // a traffic light 0, 16, 0.998836f, 0.160024f, 0.389964f, 0.417885f, 0.943716f,
1, 9, 0.733283f, 0.376029f, 0.315694f, 0.401776f, 0.395165f, // a traffic light 0, 1, 0.987908f, 0.150913f, 0.221933f, 0.742255f, 0.746261f,
1, 9, 0.785352f, 0.665503f, 0.373543f, 0.688893f, 0.439245f, // a traffic light 0, 7, 0.952983f, 0.614621f, 0.150257f, 0.901368f, 0.289251f,
1, 0, 0.980052f, 0.195856f, 0.378454f, 0.258626f, 0.629258f, // a person
1, 2, 0.989633f, 0.450719f, 0.463353f, 0.496305f, 0.522258f, // a car 1, 2, 0.997412f, 0.647584f, 0.459939f, 0.821037f, 0.663947f,
1, 2, 0.997412f, 0.647584f, 0.459939f, 0.821038f, 0.663947f); // a car 1, 2, 0.989633f, 0.450719f, 0.463353f, 0.496306f, 0.522258f,
1, 0, 0.980053f, 0.195856f, 0.378454f, 0.258626f, 0.629257f,
1, 9, 0.785341f, 0.665503f, 0.373543f, 0.688893f, 0.439244f,
1, 9, 0.733275f, 0.376029f, 0.315694f, 0.401776f, 0.395165f,
1, 9, 0.384815f, 0.659824f, 0.372389f, 0.673927f, 0.429412f,
};
Mat ref(N0 + N1, 7, CV_32FC1, (void*)ref_);
double scoreDiff = 8e-5, iouDiff = 3e-4; double scoreDiff = 8e-5, iouDiff = 3e-4;
if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD)
@ -506,8 +522,8 @@ TEST_P(Test_Darknet_nets, YOLOv3)
#endif #endif
{ {
SCOPED_TRACE("batch size 1"); SCOPED_TRACE("batch size 1");
testDarknetModel(config_file, weights_file, ref.rowRange(0, 3), scoreDiff, iouDiff); testDarknetModel(config_file, weights_file, ref.rowRange(0, N0), scoreDiff, iouDiff);
} }
#if defined(INF_ENGINE_RELEASE) #if defined(INF_ENGINE_RELEASE)
@ -529,6 +545,75 @@ TEST_P(Test_Darknet_nets, YOLOv3)
} }
} }
TEST_P(Test_Darknet_nets, YOLOv4)
{
applyTestTag(CV_TEST_TAG_LONG, (target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_1GB : CV_TEST_TAG_MEMORY_2GB));
#if defined(INF_ENGINE_RELEASE)
if (target == DNN_TARGET_MYRIAD) // NC_OUT_OF_MEMORY
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
#endif
// batchId, classId, confidence, left, top, right, bottom
const int N0 = 3;
const int N1 = 7;
static const float ref_[/* (N0 + N1) * 7 */] = {
0, 16, 0.992194f, 0.172375f, 0.402458f, 0.403918f, 0.932801f,
0, 1, 0.988326f, 0.166708f, 0.228236f, 0.737208f, 0.735803f,
0, 7, 0.94639f, 0.602523f, 0.130399f, 0.901623f, 0.298452f,
1, 2, 0.99761f, 0.646556f, 0.45985f, 0.816041f, 0.659067f,
1, 0, 0.988913f, 0.201726f, 0.360282f, 0.266181f, 0.631728f,
1, 2, 0.98233f, 0.452007f, 0.462217f, 0.495612f, 0.521687f,
1, 9, 0.919195f, 0.374642f, 0.316524f, 0.398126f, 0.393714f,
1, 9, 0.856303f, 0.666842f, 0.372215f, 0.685539f, 0.44141f,
1, 9, 0.313516f, 0.656791f, 0.374734f, 0.671959f, 0.438371f,
1, 9, 0.256625f, 0.940232f, 0.326931f, 0.967586f, 0.374002f,
};
Mat ref(N0 + N1, 7, CV_32FC1, (void*)ref_);
double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.006 : 8e-5;
double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.042 : 3e-4;
std::string config_file = "yolov4.cfg";
std::string weights_file = "yolov4.weights";
#if defined(INF_ENGINE_RELEASE)
if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD &&
getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
{
scoreDiff = 0.04;
iouDiff = 0.2;
}
#endif
{
SCOPED_TRACE("batch size 1");
testDarknetModel(config_file, weights_file, ref.rowRange(0, N0), scoreDiff, iouDiff);
}
{
SCOPED_TRACE("batch size 2");
#if defined(INF_ENGINE_RELEASE)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
{
if (target == DNN_TARGET_OPENCL)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
else if (target == DNN_TARGET_OPENCL_FP16 && INF_ENGINE_VER_MAJOR_LE(202010000))
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
else if (target == DNN_TARGET_MYRIAD &&
getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
}
#endif
testDarknetModel(config_file, weights_file, ref, scoreDiff, iouDiff);
}
}
INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_nets, dnnBackendsAndTargets()); INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_nets, dnnBackendsAndTargets());
TEST_P(Test_Darknet_layers, shortcut) TEST_P(Test_Darknet_layers, shortcut)

View File

@ -1056,6 +1056,11 @@ TEST_P(Test_TensorFlow_layers, tf2_dense)
runTensorFlowNet("tf2_dense"); runTensorFlowNet("tf2_dense");
} }
TEST_P(Test_TensorFlow_layers, clip_by_value)
{
runTensorFlowNet("clip_by_value");
}
TEST_P(Test_TensorFlow_layers, tf2_prelu) TEST_P(Test_TensorFlow_layers, tf2_prelu)
{ {
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
@ -1205,4 +1210,37 @@ TEST_P(Test_TensorFlow_nets, Mask_RCNN)
expectNoFallbacks(net); expectNoFallbacks(net);
} }
TEST_P(Test_TensorFlow_nets, EfficientDet)
{
if (target != DNN_TARGET_CPU)
{
if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
if (target == DNN_TARGET_OPENCL) applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
}
checkBackend();
std::string proto = findDataFile("dnn/efficientdet-d0.pbtxt");
std::string model = findDataFile("dnn/efficientdet-d0.pb");
Net net = readNetFromTensorflow(model, proto);
Mat img = imread(findDataFile("dnn/dog416.png"));
Mat blob = blobFromImage(img, 1.0/255, Size(512, 512), Scalar(123.675, 116.28, 103.53));
net.setPreferableBackend(backend);
net.setPreferableTarget(target);
net.setInput(blob);
// Output has shape 1x1xNx7 where N - number of detections.
// An every detection is a vector of values [id, classId, confidence, left, top, right, bottom]
Mat out = net.forward();
// References are from test for TensorFlow model.
Mat ref = (Mat_<float>(3, 7) << 0, 1, 0.8437444, 0.153996080160141, 0.20534580945968628, 0.7463544607162476, 0.7414066195487976,
0, 17, 0.8245924, 0.16657517850399017, 0.3996818959712982, 0.4111558794975281, 0.9306337833404541,
0, 7, 0.8039304, 0.6118435263633728, 0.13175517320632935, 0.9065558314323425, 0.2943994700908661);
double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 4e-3 : 1e-5;
double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 2e-3 : 1e-4;
normAssertDetections(ref, out, "", 0.5, scoreDiff, iouDiff);
expectNoFallbacksFromIE(net);
}
} }

View File

@ -1545,7 +1545,7 @@ The function smooths an image using the kernel:
\f[\texttt{K} = \frac{1}{\texttt{ksize.width*ksize.height}} \begin{bmatrix} 1 & 1 & 1 & \cdots & 1 & 1 \\ 1 & 1 & 1 & \cdots & 1 & 1 \\ \hdotsfor{6} \\ 1 & 1 & 1 & \cdots & 1 & 1 \\ \end{bmatrix}\f] \f[\texttt{K} = \frac{1}{\texttt{ksize.width*ksize.height}} \begin{bmatrix} 1 & 1 & 1 & \cdots & 1 & 1 \\ 1 & 1 & 1 & \cdots & 1 & 1 \\ \hdotsfor{6} \\ 1 & 1 & 1 & \cdots & 1 & 1 \\ \end{bmatrix}\f]
The call `blur(src, dst, ksize, anchor, borderType)` is equivalent to `boxFilter(src, dst, src.type(), The call `blur(src, dst, ksize, anchor, borderType)` is equivalent to `boxFilter(src, dst, src.type(), ksize,
anchor, true, borderType)`. anchor, true, borderType)`.
@param src input image; it can have any number of channels, which are processed independently, but @param src input image; it can have any number of channels, which are processed independently, but

View File

@ -949,6 +949,7 @@ void ellipse2Poly( Point2d center, Size2d axes, int angle,
int delta, std::vector<Point2d>& pts ) int delta, std::vector<Point2d>& pts )
{ {
CV_INSTRUMENT_REGION(); CV_INSTRUMENT_REGION();
CV_Assert(0 < delta && delta <= 180);
float alpha, beta; float alpha, beta;
int i; int i;
@ -2378,7 +2379,9 @@ void cv::fillPoly(InputOutputArray img, InputArrayOfArrays pts,
{ {
CV_INSTRUMENT_REGION(); CV_INSTRUMENT_REGION();
int i, ncontours = (int)pts.total(); bool manyContours = pts.kind() == _InputArray::STD_VECTOR_VECTOR ||
pts.kind() == _InputArray::STD_VECTOR_MAT;
int i, ncontours = manyContours ? (int)pts.total() : 1;
if( ncontours == 0 ) if( ncontours == 0 )
return; return;
AutoBuffer<Point*> _ptsptr(ncontours); AutoBuffer<Point*> _ptsptr(ncontours);
@ -2388,7 +2391,7 @@ void cv::fillPoly(InputOutputArray img, InputArrayOfArrays pts,
for( i = 0; i < ncontours; i++ ) for( i = 0; i < ncontours; i++ )
{ {
Mat p = pts.getMat(i); Mat p = pts.getMat(manyContours ? i : -1);
CV_Assert(p.checkVector(2, CV_32S) >= 0); CV_Assert(p.checkVector(2, CV_32S) >= 0);
ptsptr[i] = p.ptr<Point>(); ptsptr[i] = p.ptr<Point>();
npts[i] = p.rows*p.cols*p.channels()/2; npts[i] = p.rows*p.cols*p.channels()/2;

View File

@ -641,4 +641,42 @@ TEST(Drawing, regression_16308)
EXPECT_NE(0, (int)img.at<uchar>(99, 50)); EXPECT_NE(0, (int)img.at<uchar>(99, 50));
} }
TEST(Drawing, fillpoly_circle)
{
Mat img_c(640, 480, CV_8UC3, Scalar::all(0));
Mat img_fp = img_c.clone(), img_fcp = img_c.clone(), img_fp3 = img_c.clone();
Point center1(img_c.cols/2, img_c.rows/2);
Point center2(img_c.cols/10, img_c.rows*3/4);
Point center3 = Point(img_c.cols, img_c.rows) - center2;
int radius = img_c.rows/4;
int radius_small = img_c.cols/15;
Scalar color(0, 0, 255);
circle(img_c, center1, radius, color, -1);
// check that circle, fillConvexPoly and fillPoly
// give almost the same result then asked to draw a single circle
vector<Point> vtx;
ellipse2Poly(center1, Size(radius, radius), 0, 0, 360, 1, vtx);
fillConvexPoly(img_fcp, vtx, color);
fillPoly(img_fp, vtx, color);
double diff_fp = cv::norm(img_c, img_fp, NORM_L1)/(255*radius*2*CV_PI);
double diff_fcp = cv::norm(img_c, img_fcp, NORM_L1)/(255*radius*2*CV_PI);
EXPECT_LT(diff_fp, 1.);
EXPECT_LT(diff_fcp, 1.);
// check that fillPoly can draw 3 disjoint circles at once
circle(img_c, center2, radius_small, color, -1);
circle(img_c, center3, radius_small, color, -1);
vector<vector<Point> > vtx3(3);
vtx3[0] = vtx;
ellipse2Poly(center2, Size(radius_small, radius_small), 0, 0, 360, 1, vtx3[1]);
ellipse2Poly(center3, Size(radius_small, radius_small), 0, 0, 360, 1, vtx3[2]);
fillPoly(img_fp3, vtx3, color);
double diff_fp3 = cv::norm(img_c, img_fp3, NORM_L1)/(255*(radius+radius_small*2)*2*CV_PI);
EXPECT_LT(diff_fp3, 1.);
}
}} // namespace }} // namespace

View File

@ -492,12 +492,14 @@ public:
} }
} }
} }
std::pair<MediaID, MediaType> findBest(const MediaType& newType) std::pair<MediaID, MediaType> findBestVideoFormat(const MediaType& newType)
{ {
std::pair<MediaID, MediaType> best; std::pair<MediaID, MediaType> best;
std::map<MediaID, MediaType>::const_iterator i = formats.begin(); std::map<MediaID, MediaType>::const_iterator i = formats.begin();
for (; i != formats.end(); ++i) for (; i != formats.end(); ++i)
{ {
if (i->second.majorType != MFMediaType_Video)
continue;
if (newType.isEmpty()) // file input - choose first returned media type if (newType.isEmpty()) // file input - choose first returned media type
{ {
best = *i; best = *i;
@ -775,7 +777,12 @@ bool CvCapture_MSMF::configureOutput(MediaType newType, cv::uint32_t outFormat)
{ {
FormatStorage formats; FormatStorage formats;
formats.read(videoFileSource.Get()); formats.read(videoFileSource.Get());
std::pair<FormatStorage::MediaID, MediaType> bestMatch = formats.findBest(newType); std::pair<FormatStorage::MediaID, MediaType> bestMatch = formats.findBestVideoFormat(newType);
if (bestMatch.second.isEmpty())
{
CV_LOG_DEBUG(NULL, "Can not find video stream with requested parameters");
return false;
}
dwStreamIndex = bestMatch.first.stream; dwStreamIndex = bestMatch.first.stream;
nativeFormat = bestMatch.second; nativeFormat = bestMatch.second;
MediaType newFormat = nativeFormat; MediaType newFormat = nativeFormat;

View File

@ -43,7 +43,7 @@ def showLegend(classes):
for i in range(len(classes)): for i in range(len(classes)):
block = legend[i * blockHeight:(i + 1) * blockHeight] block = legend[i * blockHeight:(i + 1) * blockHeight]
block[:,:] = colors[i] block[:,:] = colors[i]
cv.putText(block, classes[i], (0, blockHeight/2), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255)) cv.putText(block, classes[i], (0, blockHeight//2), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255))
cv.namedWindow('Legend', cv.WINDOW_NORMAL) cv.namedWindow('Legend', cv.WINDOW_NORMAL)
cv.imshow('Legend', legend) cv.imshow('Legend', legend)

View File

@ -45,7 +45,7 @@ std::vector<std::string> classes;
inline void preprocess(const Mat& frame, Net& net, Size inpSize, float scale, inline void preprocess(const Mat& frame, Net& net, Size inpSize, float scale,
const Scalar& mean, bool swapRB); const Scalar& mean, bool swapRB);
void postprocess(Mat& frame, const std::vector<Mat>& out, Net& net); void postprocess(Mat& frame, const std::vector<Mat>& out, Net& net, int backend);
void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame); void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame);
@ -148,7 +148,8 @@ int main(int argc, char** argv)
// Load a model. // Load a model.
Net net = readNet(modelPath, configPath, parser.get<String>("framework")); Net net = readNet(modelPath, configPath, parser.get<String>("framework"));
net.setPreferableBackend(parser.get<int>("backend")); int backend = parser.get<int>("backend");
net.setPreferableBackend(backend);
net.setPreferableTarget(parser.get<int>("target")); net.setPreferableTarget(parser.get<int>("target"));
std::vector<String> outNames = net.getUnconnectedOutLayersNames(); std::vector<String> outNames = net.getUnconnectedOutLayersNames();
@ -245,7 +246,7 @@ int main(int argc, char** argv)
std::vector<Mat> outs = predictionsQueue.get(); std::vector<Mat> outs = predictionsQueue.get();
Mat frame = processedFramesQueue.get(); Mat frame = processedFramesQueue.get();
postprocess(frame, outs, net); postprocess(frame, outs, net, backend);
if (predictionsQueue.counter > 1) if (predictionsQueue.counter > 1)
{ {
@ -285,7 +286,7 @@ int main(int argc, char** argv)
std::vector<Mat> outs; std::vector<Mat> outs;
net.forward(outs, outNames); net.forward(outs, outNames);
postprocess(frame, outs, net); postprocess(frame, outs, net, backend);
// Put efficiency information. // Put efficiency information.
std::vector<double> layersTimes; std::vector<double> layersTimes;
@ -319,7 +320,7 @@ inline void preprocess(const Mat& frame, Net& net, Size inpSize, float scale,
} }
} }
void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net) void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net, int backend)
{ {
static std::vector<int> outLayers = net.getUnconnectedOutLayers(); static std::vector<int> outLayers = net.getUnconnectedOutLayers();
static std::string outLayerType = net.getLayer(outLayers[0])->type; static std::string outLayerType = net.getLayer(outLayers[0])->type;
@ -396,11 +397,48 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
else else
CV_Error(Error::StsNotImplemented, "Unknown output layer type: " + outLayerType); CV_Error(Error::StsNotImplemented, "Unknown output layer type: " + outLayerType);
std::vector<int> indices; // NMS is used inside Region layer only on DNN_BACKEND_OPENCV for another backends we need NMS in sample
NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices); // or NMS is required if number of outputs > 1
for (size_t i = 0; i < indices.size(); ++i) if (outLayers.size() > 1 || (outLayerType == "Region" && backend != DNN_BACKEND_OPENCV))
{
std::map<int, std::vector<size_t> > class2indices;
for (size_t i = 0; i < classIds.size(); i++)
{
if (confidences[i] >= confThreshold)
{
class2indices[classIds[i]].push_back(i);
}
}
std::vector<Rect> nmsBoxes;
std::vector<float> nmsConfidences;
std::vector<int> nmsClassIds;
for (std::map<int, std::vector<size_t> >::iterator it = class2indices.begin(); it != class2indices.end(); ++it)
{
std::vector<Rect> localBoxes;
std::vector<float> localConfidences;
std::vector<size_t> classIndices = it->second;
for (size_t i = 0; i < classIndices.size(); i++)
{
localBoxes.push_back(boxes[classIndices[i]]);
localConfidences.push_back(confidences[classIndices[i]]);
}
std::vector<int> nmsIndices;
NMSBoxes(localBoxes, localConfidences, confThreshold, nmsThreshold, nmsIndices);
for (size_t i = 0; i < nmsIndices.size(); i++)
{
size_t idx = nmsIndices[i];
nmsBoxes.push_back(localBoxes[idx]);
nmsConfidences.push_back(localConfidences[idx]);
nmsClassIds.push_back(it->first);
}
}
boxes = nmsBoxes;
classIds = nmsClassIds;
confidences = nmsConfidences;
}
for (size_t idx = 0; idx < boxes.size(); ++idx)
{ {
int idx = indices[i];
Rect box = boxes[idx]; Rect box = boxes[idx];
drawPred(classIds[idx], confidences[idx], box.x, box.y, drawPred(classIds[idx], confidences[idx], box.x, box.y,
box.x + box.width, box.y + box.height, frame); box.x + box.width, box.y + box.height, frame);

View File

@ -141,9 +141,6 @@ def postprocess(frame, outs):
# Network produces output blob with a shape NxC where N is a number of # Network produces output blob with a shape NxC where N is a number of
# detected objects and C is a number of classes + 4 where the first 4 # detected objects and C is a number of classes + 4 where the first 4
# numbers are [center_x, center_y, width, height] # numbers are [center_x, center_y, width, height]
classIds = []
confidences = []
boxes = []
for out in outs: for out in outs:
for detection in out: for detection in out:
scores = detection[5:] scores = detection[5:]
@ -163,9 +160,25 @@ def postprocess(frame, outs):
print('Unknown output layer type: ' + lastLayer.type) print('Unknown output layer type: ' + lastLayer.type)
exit() exit()
indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold) # NMS is used inside Region layer only on DNN_BACKEND_OPENCV for another backends we need NMS in sample
# or NMS is required if number of outputs > 1
if len(outNames) > 1 or lastLayer.type == 'Region' and args.backend != cv.dnn.DNN_BACKEND_OPENCV:
indices = []
classIds = np.array(classIds)
boxes = np.array(boxes)
confidences = np.array(confidences)
unique_classes = set(classIds)
for cl in unique_classes:
class_indices = np.where(classIds == cl)[0]
conf = confidences[class_indices]
box = boxes[class_indices].tolist()
nms_indices = cv.dnn.NMSBoxes(box, conf, confThreshold, nmsThreshold)
nms_indices = nms_indices[:, 0] if len(nms_indices) else []
indices.extend(class_indices[nms_indices])
else:
indices = np.arange(0, len(classIds))
for i in indices: for i in indices:
i = i[0]
box = boxes[i] box = boxes[i]
left = box[0] left = box[0]
top = box[1] top = box[1]

View File

@ -65,7 +65,7 @@ def showLegend(classes):
for i in range(len(classes)): for i in range(len(classes)):
block = legend[i * blockHeight:(i + 1) * blockHeight] block = legend[i * blockHeight:(i + 1) * blockHeight]
block[:,:] = colors[i] block[:,:] = colors[i]
cv.putText(block, classes[i], (0, blockHeight/2), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255)) cv.putText(block, classes[i], (0, blockHeight//2), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255))
cv.namedWindow('Legend', cv.WINDOW_NORMAL) cv.namedWindow('Legend', cv.WINDOW_NORMAL)
cv.imshow('Legend', legend) cv.imshow('Legend', legend)
@ -76,7 +76,7 @@ net = cv.dnn.readNet(args.model, args.config, args.framework)
net.setPreferableBackend(args.backend) net.setPreferableBackend(args.backend)
net.setPreferableTarget(args.target) net.setPreferableTarget(args.target)
winName = 'Deep learning image classification in OpenCV' winName = 'Deep learning semantic segmentation in OpenCV'
cv.namedWindow(winName, cv.WINDOW_NORMAL) cv.namedWindow(winName, cv.WINDOW_NORMAL)
cap = cv.VideoCapture(args.input if args.input else 0) cap = cv.VideoCapture(args.input if args.input else 0)

View File

@ -269,7 +269,7 @@ def parseTextGraph(filePath):
def removeIdentity(graph_def): def removeIdentity(graph_def):
identities = {} identities = {}
for node in graph_def.node: for node in graph_def.node:
if node.op == 'Identity': if node.op == 'Identity' or node.op == 'IdentityN':
identities[node.name] = node.input[0] identities[node.name] = node.input[0]
graph_def.node.remove(node) graph_def.node.remove(node)

View File

@ -0,0 +1,236 @@
# This file is a part of OpenCV project.
# It is a subject to the license terms in the LICENSE file found in the top-level directory
# of this distribution and at http://opencv.org/license.html.
#
# Copyright (C) 2020, Intel Corporation, all rights reserved.
# Third party copyrights are property of their respective owners.
#
# Use this script to get the text graph representation (.pbtxt) of EfficientDet
# deep learning network trained in https://github.com/google/automl.
# Then you can import it with a binary frozen graph (.pb) using readNetFromTensorflow() function.
# See details and examples on the following wiki page: https://github.com/opencv/opencv/wiki/TensorFlow-Object-Detection-API
import argparse
import re
from math import sqrt
from tf_text_graph_common import *
class AnchorGenerator:
def __init__(self, min_level, aspect_ratios, num_scales, anchor_scale):
self.min_level = min_level
self.aspect_ratios = aspect_ratios
self.anchor_scale = anchor_scale
self.scales = [2**(float(s) / num_scales) for s in range(num_scales)]
def get(self, layer_id):
widths = []
heights = []
for s in self.scales:
for a in self.aspect_ratios:
base_anchor_size = 2**(self.min_level + layer_id) * self.anchor_scale
heights.append(base_anchor_size * s * a[1])
widths.append(base_anchor_size * s * a[0])
return widths, heights
def createGraph(modelPath, outputPath, min_level, aspect_ratios, num_scales,
anchor_scale, num_classes, image_width, image_height):
print('Min level: %d' % min_level)
print('Anchor scale: %f' % anchor_scale)
print('Num scales: %d' % num_scales)
print('Aspect ratios: %s' % str(aspect_ratios))
print('Number of classes: %d' % num_classes)
print('Input image size: %dx%d' % (image_width, image_height))
# Read the graph.
_inpNames = ['image_arrays']
outNames = ['detections']
writeTextGraph(modelPath, outputPath, outNames)
graph_def = parseTextGraph(outputPath)
def getUnconnectedNodes():
unconnected = []
for node in graph_def.node:
if node.op == 'Const':
continue
unconnected.append(node.name)
for inp in node.input:
if inp in unconnected:
unconnected.remove(inp)
return unconnected
nodesToKeep = ['truediv'] # Keep preprocessing nodes
removeIdentity(graph_def)
scopesToKeep = ('image_arrays', 'efficientnet', 'resample_p6', 'resample_p7',
'fpn_cells', 'class_net', 'box_net', 'Reshape', 'concat')
addConstNode('scale_w', [2.0], graph_def)
addConstNode('scale_h', [2.0], graph_def)
nodesToKeep += ['scale_w', 'scale_h']
for node in graph_def.node:
if re.match('efficientnet-(.*)/blocks_\d+/se/mul_1', node.name):
node.input[0], node.input[1] = node.input[1], node.input[0]
if re.match('fpn_cells/cell_\d+/fnode\d+/resample(.*)/nearest_upsampling/Reshape_1$', node.name):
node.op = 'ResizeNearestNeighbor'
node.input[1] = 'scale_w'
node.input.append('scale_h')
for inpNode in graph_def.node:
if inpNode.name == node.name[:node.name.rfind('_')]:
node.input[0] = inpNode.input[0]
if re.match('box_net/box-predict(_\d)*/separable_conv2d$', node.name):
node.addAttr('loc_pred_transposed', True)
# Replace RealDiv to Mul with inversed scale for compatibility
if node.op == 'RealDiv':
for inpNode in graph_def.node:
if inpNode.name != node.input[1] or not 'value' in inpNode.attr:
continue
tensor = inpNode.attr['value']['tensor'][0]
if not 'float_val' in tensor:
continue
scale = float(inpNode.attr['value']['tensor'][0]['float_val'][0])
addConstNode(inpNode.name + '/inv', [1.0 / scale], graph_def)
nodesToKeep.append(inpNode.name + '/inv')
node.input[1] = inpNode.name + '/inv'
node.op = 'Mul'
break
def to_remove(name, op):
if name in nodesToKeep:
return False
return op == 'Const' or not name.startswith(scopesToKeep)
removeUnusedNodesAndAttrs(to_remove, graph_def)
# Attach unconnected preprocessing
assert(graph_def.node[1].name == 'truediv' and graph_def.node[1].op == 'RealDiv')
graph_def.node[1].input.insert(0, 'image_arrays')
graph_def.node[2].input.insert(0, 'truediv')
priors_generator = AnchorGenerator(min_level, aspect_ratios, num_scales, anchor_scale)
priorBoxes = []
for i in range(5):
inpName = ''
for node in graph_def.node:
if node.name == 'Reshape_%d' % (i * 2 + 1):
inpName = node.input[0]
break
priorBox = NodeDef()
priorBox.name = 'PriorBox_%d' % i
priorBox.op = 'PriorBox'
priorBox.input.append(inpName)
priorBox.input.append(graph_def.node[0].name) # image_tensor
priorBox.addAttr('flip', False)
priorBox.addAttr('clip', False)
widths, heights = priors_generator.get(i)
priorBox.addAttr('width', widths)
priorBox.addAttr('height', heights)
priorBox.addAttr('variance', [1.0, 1.0, 1.0, 1.0])
graph_def.node.extend([priorBox])
priorBoxes.append(priorBox.name)
addConstNode('concat/axis_flatten', [-1], graph_def)
def addConcatNode(name, inputs, axisNodeName):
concat = NodeDef()
concat.name = name
concat.op = 'ConcatV2'
for inp in inputs:
concat.input.append(inp)
concat.input.append(axisNodeName)
graph_def.node.extend([concat])
addConcatNode('PriorBox/concat', priorBoxes, 'concat/axis_flatten')
sigmoid = NodeDef()
sigmoid.name = 'concat/sigmoid'
sigmoid.op = 'Sigmoid'
sigmoid.input.append('concat')
graph_def.node.extend([sigmoid])
addFlatten(sigmoid.name, sigmoid.name + '/Flatten', graph_def)
addFlatten('concat_1', 'concat_1/Flatten', graph_def)
detectionOut = NodeDef()
detectionOut.name = 'detection_out'
detectionOut.op = 'DetectionOutput'
detectionOut.input.append('concat_1/Flatten')
detectionOut.input.append(sigmoid.name + '/Flatten')
detectionOut.input.append('PriorBox/concat')
detectionOut.addAttr('num_classes', num_classes)
detectionOut.addAttr('share_location', True)
detectionOut.addAttr('background_label_id', num_classes + 1)
detectionOut.addAttr('nms_threshold', 0.6)
detectionOut.addAttr('confidence_threshold', 0.2)
detectionOut.addAttr('top_k', 100)
detectionOut.addAttr('keep_top_k', 100)
detectionOut.addAttr('code_type', "CENTER_SIZE")
graph_def.node.extend([detectionOut])
graph_def.node[0].attr['shape'] = {
'shape': {
'dim': [
{'size': -1},
{'size': image_height},
{'size': image_width},
{'size': 3}
]
}
}
while True:
unconnectedNodes = getUnconnectedNodes()
unconnectedNodes.remove(detectionOut.name)
if not unconnectedNodes:
break
for name in unconnectedNodes:
for i in range(len(graph_def.node)):
if graph_def.node[i].name == name:
del graph_def.node[i]
break
# Save as text
graph_def.save(outputPath)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Run this script to get a text graph of '
'SSD model from TensorFlow Object Detection API. '
'Then pass it with .pb file to cv::dnn::readNetFromTensorflow function.')
parser.add_argument('--input', required=True, help='Path to frozen TensorFlow graph.')
parser.add_argument('--output', required=True, help='Path to output text graph.')
parser.add_argument('--min_level', default=3, type=int, help='Parameter from training config')
parser.add_argument('--num_scales', default=3, type=int, help='Parameter from training config')
parser.add_argument('--anchor_scale', default=4.0, type=float, help='Parameter from training config')
parser.add_argument('--aspect_ratios', default=[1.0, 1.0, 1.4, 0.7, 0.7, 1.4],
nargs='+', type=float, help='Parameter from training config')
parser.add_argument('--num_classes', default=90, type=int, help='Number of classes to detect')
parser.add_argument('--width', default=512, type=int, help='Network input width')
parser.add_argument('--height', default=512, type=int, help='Network input height')
args = parser.parse_args()
ar = args.aspect_ratios
assert(len(ar) % 2 == 0)
ar = list(zip(ar[::2], ar[1::2]))
createGraph(args.input, args.output, args.min_level, ar, args.num_scales,
args.anchor_scale, args.num_classes, args.width, args.height)