EAST: An Efficient and Accurate Scene Text Detector (https://arxiv.org/abs/1704.03155v2)

This commit is contained in:
Dmitry Kurtaev 2018-04-24 18:25:43 +03:00
parent d1d7408a20
commit 8488f2e265
8 changed files with 412 additions and 76 deletions

View File

@ -32,11 +32,11 @@ Unspecified error: Can't create layer "layer_name" of type "MyType" in function
To import the model correctly you have to derive a class from cv::dnn::Layer with
the following methods:
@snippet dnn/custom_layers.cpp A custom layer interface
@snippet dnn/custom_layers.hpp A custom layer interface
And register it before the import:
@snippet dnn/custom_layers.cpp Register a custom layer
@snippet dnn/custom_layers.hpp Register a custom layer
@note `MyType` is a type of unimplemented layer from the thrown exception.
@ -44,27 +44,27 @@ Let's see what all the methods do:
- Constructor
@snippet dnn/custom_layers.cpp MyLayer::MyLayer
@snippet dnn/custom_layers.hpp MyLayer::MyLayer
Retrieves hyper-parameters from cv::dnn::LayerParams. If your layer has trainable
weights they will be already stored in the Layer's member cv::dnn::Layer::blobs.
- A static method `create`
@snippet dnn/custom_layers.cpp MyLayer::create
@snippet dnn/custom_layers.hpp MyLayer::create
This method should create an instance of you layer and return cv::Ptr with it.
- Output blobs' shape computation
@snippet dnn/custom_layers.cpp MyLayer::getMemoryShapes
@snippet dnn/custom_layers.hpp MyLayer::getMemoryShapes
Returns layer's output shapes depends on input shapes. You may request an extra
memory using `internals`.
- Run a layer
@snippet dnn/custom_layers.cpp MyLayer::forward
@snippet dnn/custom_layers.hpp MyLayer::forward
Implement a layer's logic here. Compute outputs for given inputs.
@ -74,7 +74,7 @@ the second invocation of `forward` will has the same data at `outputs` and `inte
- Optional `finalize` method
@snippet dnn/custom_layers.cpp MyLayer::finalize
@snippet dnn/custom_layers.hpp MyLayer::finalize
The chain of methods are the following: OpenCV deep learning engine calls `create`
method once then it calls `getMemoryShapes` for an every created layer then you
@ -108,11 +108,11 @@ layer {
This way our implementation can look like:
@snippet dnn/custom_layers.cpp InterpLayer
@snippet dnn/custom_layers.hpp InterpLayer
Next we need to register a new layer type and try to import the model.
@snippet dnn/custom_layers.cpp Register InterpLayer
@snippet dnn/custom_layers.hpp Register InterpLayer
## Example: custom layer from TensorFlow
This is an example of how to import a network with [tf.image.resize_bilinear](https://www.tensorflow.org/versions/master/api_docs/python/tf/image/resize_bilinear)
@ -185,11 +185,11 @@ Custom layers import from TensorFlow is designed to put all layer's `attr` into
cv::dnn::LayerParams but input `Const` blobs into cv::dnn::Layer::blobs.
In our case resize's output shape will be stored in layer's `blobs[0]`.
@snippet dnn/custom_layers.cpp ResizeBilinearLayer
@snippet dnn/custom_layers.hpp ResizeBilinearLayer
Next we register a layer and try to import the model.
@snippet dnn/custom_layers.cpp Register ResizeBilinearLayer
@snippet dnn/custom_layers.hpp Register ResizeBilinearLayer
## Define a custom layer in Python
The following example shows how to customize OpenCV's layers in Python.

View File

@ -826,6 +826,10 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
CV_OUT std::vector<int>& indices,
const float eta = 1.f, const int top_k = 0);
CV_EXPORTS void NMSBoxes(const std::vector<RotatedRect>& bboxes, const std::vector<float>& scores,
const float score_threshold, const float nms_threshold,
CV_OUT std::vector<int>& indices,
const float eta = 1.f, const int top_k = 0);
//! @}
CV__DNN_EXPERIMENTAL_NS_END

View File

@ -8,6 +8,8 @@
#include "precomp.hpp"
#include "nms.inl.hpp"
#include <opencv2/imgproc.hpp>
namespace cv
{
namespace dnn
@ -28,6 +30,28 @@ void NMSBoxes(const std::vector<Rect>& bboxes, const std::vector<float>& scores,
NMSFast_(bboxes, scores, score_threshold, nms_threshold, eta, top_k, indices, rectOverlap);
}
static inline float rotatedRectIOU(const RotatedRect& a, const RotatedRect& b)
{
std::vector<Point2f> inter, hull;
int res = rotatedRectangleIntersection(a, b, inter);
if (inter.empty() || res == INTERSECT_NONE)
return 0.0f;
if (res == INTERSECT_FULL)
return 1.0f;
convexHull(inter, hull);
float interArea = contourArea(hull);
return interArea / (a.size.area() + b.size.area() - interArea);
}
void NMSBoxes(const std::vector<RotatedRect>& bboxes, const std::vector<float>& scores,
const float score_threshold, const float nms_threshold,
std::vector<int>& indices, const float eta, const int top_k)
{
CV_Assert(bboxes.size() == scores.size(), score_threshold >= 0,
nms_threshold >= 0, eta > 0);
NMSFast_(bboxes, scores, score_threshold, nms_threshold, eta, top_k, indices, rotatedRectIOU);
}
CV__DNN_EXPERIMENTAL_NS_END
}// dnn
}// cv

View File

@ -538,6 +538,37 @@ public:
}
};
// In case of resizing by factor.
class ResizeBilinearSubgraph : public Subgraph
{
public:
ResizeBilinearSubgraph()
{
int input = addNodeToMatch("");
int shape = addNodeToMatch("Shape", input);
int stack = addNodeToMatch("Const");
int stack_1 = addNodeToMatch("Const");
int stack_2 = addNodeToMatch("Const");
int strided_slice = addNodeToMatch("StridedSlice", shape, stack, stack_1, stack_2);
int factorY = addNodeToMatch("Const");
int mul = addNodeToMatch("Mul", strided_slice, factorY);
shape = addNodeToMatch("Shape", input);
stack = addNodeToMatch("Const");
stack_1 = addNodeToMatch("Const");
stack_2 = addNodeToMatch("Const");
strided_slice = addNodeToMatch("StridedSlice", shape, stack, stack_1, stack_2);
int factorX = addNodeToMatch("Const");
int mul_1 = addNodeToMatch("Mul", strided_slice, factorX);
int pack = addNodeToMatch("Pack", mul, mul_1);
addNodeToMatch("ResizeBilinear", input, pack);
setFusedNode("ResizeBilinear", input, factorY, factorX);
}
};
void simplifySubgraphs(tensorflow::GraphDef& net)
{
std::vector<Ptr<Subgraph> > subgraphs;
@ -551,6 +582,7 @@ void simplifySubgraphs(tensorflow::GraphDef& net)
subgraphs.push_back(Ptr<Subgraph>(new L2NormalizeSubgraph()));
subgraphs.push_back(Ptr<Subgraph>(new DeconvolutionValidKerasSubgraph()));
subgraphs.push_back(Ptr<Subgraph>(new DeconvolutionSameKerasSubgraph()));
subgraphs.push_back(Ptr<Subgraph>(new ResizeBilinearSubgraph()));
int numNodes = net.node_size();
std::vector<int> matchedNodesIds;

View File

@ -767,6 +767,26 @@ void TFImporter::populateNet(Net dstNet)
}
}
}
else if (type == "Sub")
{
bool haveConst = false;
for(int ii = 0; !haveConst && ii < layer.input_size(); ++ii)
{
Pin input = parsePin(layer.input(ii));
haveConst = value_id.find(input.name) != value_id.end();
}
CV_Assert(haveConst);
layerParams.blobs.resize(1);
blobFromTensor(getConstBlob(layer, value_id), layerParams.blobs[0]);
layerParams.blobs[0] *= -1;
int id = dstNet.addLayer(name, "Shift", layerParams);
layer_id[name] = id;
// one input only
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
}
else if (type == "MatMul")
{
CV_Assert(layer.input_size() == 2);

View File

@ -373,9 +373,24 @@ public:
ResizeBilinearLayer(const LayerParams &params) : Layer(params)
{
CV_Assert(!params.get<bool>("align_corners", false));
CV_Assert(blobs.size() == 1, blobs[0].type() == CV_32SC1);
outHeight = blobs[0].at<int>(0, 0);
outWidth = blobs[0].at<int>(0, 1);
CV_Assert(!blobs.empty());
for (size_t i = 0; i < blobs.size(); ++i)
CV_Assert(blobs[i].type() == CV_32SC1);
if (blobs.size() == 1)
{
CV_Assert(blobs[0].total() == 2);
outHeight = blobs[0].at<int>(0, 0);
outWidth = blobs[0].at<int>(0, 1);
}
else
{
CV_Assert(blobs.size() == 2, blobs[0].total() == 1, blobs[1].total() == 1);
factorHeight = blobs[0].at<int>(0, 0);
factorWidth = blobs[1].at<int>(0, 0);
outHeight = outWidth = 0;
}
}
static Ptr<Layer> create(LayerParams& params)
@ -391,12 +406,21 @@ public:
std::vector<int> outShape(4);
outShape[0] = inputs[0][0]; // batch size
outShape[1] = inputs[0][1]; // number of channels
outShape[2] = outHeight;
outShape[3] = outWidth;
outShape[2] = outHeight != 0 ? outHeight : (inputs[0][2] * factorHeight);
outShape[3] = outWidth != 0 ? outWidth : (inputs[0][3] * factorWidth);
outputs.assign(1, outShape);
return false;
}
virtual void finalize(const std::vector<Mat*>& inputs, std::vector<Mat> &outputs) CV_OVERRIDE
{
if (!outWidth && !outHeight)
{
outHeight = outputs[0].size[2];
outWidth = outputs[0].size[3];
}
}
// This implementation is based on a reference implementation from
// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
virtual void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) CV_OVERRIDE
@ -447,13 +471,51 @@ private:
return x + size[3] * (y + size[2] * (c + size[1] * b));
}
int outWidth, outHeight;
int outWidth, outHeight, factorWidth, factorHeight;
};
TEST(Test_TensorFlow, resize_bilinear)
{
CV_DNN_REGISTER_LAYER_CLASS(ResizeBilinear, ResizeBilinearLayer);
runTensorFlowNet("resize_bilinear");
runTensorFlowNet("resize_bilinear_factor");
LayerFactory::unregisterLayer("ResizeBilinear");
}
// inp = cv.imread('opencv_extra/testdata/cv/ximgproc/sources/08.png')
// inp = inp[:,:,[2, 1, 0]].astype(np.float32).reshape(1, 512, 512, 3)
// outs = sess.run([sess.graph.get_tensor_by_name('feature_fusion/Conv_7/Sigmoid:0'),
// sess.graph.get_tensor_by_name('feature_fusion/concat_3:0')],
// feed_dict={'input_images:0': inp})
// scores = np.ascontiguousarray(outs[0].transpose(0, 3, 1, 2))
// geometry = np.ascontiguousarray(outs[1].transpose(0, 3, 1, 2))
// np.save('east_text_detection.scores.npy', scores)
// np.save('east_text_detection.geometry.npy', geometry)
TEST(Test_TensorFlow, EAST_text_detection)
{
CV_DNN_REGISTER_LAYER_CLASS(ResizeBilinear, ResizeBilinearLayer);
std::string netPath = findDataFile("dnn/frozen_east_text_detection.pb", false);
std::string imgPath = findDataFile("cv/ximgproc/sources/08.png", false);
std::string refScoresPath = findDataFile("dnn/east_text_detection.scores.npy", false);
std::string refGeometryPath = findDataFile("dnn/east_text_detection.geometry.npy", false);
Net net = readNet(findDataFile("dnn/frozen_east_text_detection.pb", false));
Mat img = imread(imgPath);
Mat inp = blobFromImage(img, 1.0, Size(), Scalar(123.68, 116.78, 103.94), true, false);
net.setInput(inp);
std::vector<Mat> outs;
std::vector<String> outNames(2);
outNames[0] = "feature_fusion/Conv_7/Sigmoid";
outNames[1] = "feature_fusion/concat_3";
net.forward(outs, outNames);
Mat scores = outs[0];
Mat geometry = outs[1];
normAssert(scores, blobFromNPY(refScoresPath), "scores");
normAssert(geometry, blobFromNPY(refGeometryPath), "geometry", 5e-5, 1e-3);
LayerFactory::unregisterLayer("ResizeBilinear");
}

View File

@ -1,35 +1,8 @@
#ifndef __OPENCV_SAMPLES_DNN_CUSTOM_LAYERS__
#define __OPENCV_SAMPLES_DNN_CUSTOM_LAYERS__
#include <opencv2/dnn.hpp>
//! [A custom layer interface]
class MyLayer : public cv::dnn::Layer
{
public:
//! [MyLayer::MyLayer]
MyLayer(const cv::dnn::LayerParams &params);
//! [MyLayer::MyLayer]
//! [MyLayer::create]
static cv::Ptr<cv::dnn::Layer> create(cv::dnn::LayerParams& params);
//! [MyLayer::create]
//! [MyLayer::getMemoryShapes]
virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
const int requiredOutputs,
std::vector<std::vector<int> > &outputs,
std::vector<std::vector<int> > &internals) const CV_OVERRIDE;
//! [MyLayer::getMemoryShapes]
//! [MyLayer::forward]
virtual void forward(std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs, std::vector<cv::Mat> &internals) CV_OVERRIDE;
//! [MyLayer::forward]
//! [MyLayer::finalize]
virtual void finalize(const std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs) CV_OVERRIDE;
//! [MyLayer::finalize]
virtual void forward(cv::InputArrayOfArrays inputs, cv::OutputArrayOfArrays outputs, cv::OutputArrayOfArrays internals) CV_OVERRIDE;
};
//! [A custom layer interface]
#include <opencv2/dnn/shape_utils.hpp> // getPlane
//! [InterpLayer]
class InterpLayer : public cv::dnn::Layer
@ -113,15 +86,33 @@ private:
//! [InterpLayer]
//! [ResizeBilinearLayer]
class ResizeBilinearLayer : public cv::dnn::Layer
class ResizeBilinearLayer CV_FINAL : public cv::dnn::Layer
{
public:
ResizeBilinearLayer(const cv::dnn::LayerParams &params) : Layer(params)
{
CV_Assert(!params.get<bool>("align_corners", false));
CV_Assert(blobs.size() == 1, blobs[0].type() == CV_32SC1);
outHeight = blobs[0].at<int>(0, 0);
outWidth = blobs[0].at<int>(0, 1);
CV_Assert(!blobs.empty());
for (size_t i = 0; i < blobs.size(); ++i)
CV_Assert(blobs[i].type() == CV_32SC1);
// There are two cases of input blob: a single blob which contains output
// shape and two blobs with scaling factors.
if (blobs.size() == 1)
{
CV_Assert(blobs[0].total() == 2);
outHeight = blobs[0].at<int>(0, 0);
outWidth = blobs[0].at<int>(0, 1);
factorHeight = factorWidth = 0;
}
else
{
CV_Assert(blobs.size() == 2, blobs[0].total() == 1, blobs[1].total() == 1);
factorHeight = blobs[0].at<int>(0, 0);
factorWidth = blobs[1].at<int>(0, 0);
outHeight = outWidth = 0;
}
}
static cv::Ptr<cv::dnn::Layer> create(cv::dnn::LayerParams& params)
@ -130,25 +121,32 @@ public:
}
virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
const int requiredOutputs,
const int,
std::vector<std::vector<int> > &outputs,
std::vector<std::vector<int> > &internals) const CV_OVERRIDE
std::vector<std::vector<int> > &) const CV_OVERRIDE
{
CV_UNUSED(requiredOutputs); CV_UNUSED(internals);
std::vector<int> outShape(4);
outShape[0] = inputs[0][0]; // batch size
outShape[1] = inputs[0][1]; // number of channels
outShape[2] = outHeight;
outShape[3] = outWidth;
outShape[2] = outHeight != 0 ? outHeight : (inputs[0][2] * factorHeight);
outShape[3] = outWidth != 0 ? outWidth : (inputs[0][3] * factorWidth);
outputs.assign(1, outShape);
return false;
}
virtual void finalize(const std::vector<cv::Mat*>&, std::vector<cv::Mat> &outputs) CV_OVERRIDE
{
if (!outWidth && !outHeight)
{
outHeight = outputs[0].size[2];
outWidth = outputs[0].size[3];
}
}
// This implementation is based on a reference implementation from
// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
virtual void forward(std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs, std::vector<cv::Mat> &internals) CV_OVERRIDE
virtual void forward(std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs, std::vector<cv::Mat> &) CV_OVERRIDE
{
CV_UNUSED(internals);
cv::Mat& inp = *inputs[0];
cv::Mat& out = outputs[0];
const float* inpData = (float*)inp.data;
@ -195,19 +193,54 @@ private:
return x + size[3] * (y + size[2] * (c + size[1] * b));
}
int outWidth, outHeight;
int outWidth, outHeight, factorWidth, factorHeight;
};
//! [ResizeBilinearLayer]
//! [Register a custom layer]
#include <opencv2/dnn/layer.details.hpp> // CV_DNN_REGISTER_LAYER_CLASS macro
//
// The folowing code is used only to generate tutorials documentation.
//
int main(int argc, char** argv)
//! [A custom layer interface]
class MyLayer : public cv::dnn::Layer
{
CV_DNN_REGISTER_LAYER_CLASS(MyType, MyLayer);
public:
//! [MyLayer::MyLayer]
MyLayer(const cv::dnn::LayerParams &params);
//! [MyLayer::MyLayer]
//! [MyLayer::create]
static cv::Ptr<cv::dnn::Layer> create(cv::dnn::LayerParams& params);
//! [MyLayer::create]
//! [MyLayer::getMemoryShapes]
virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
const int requiredOutputs,
std::vector<std::vector<int> > &outputs,
std::vector<std::vector<int> > &internals) const CV_OVERRIDE;
//! [MyLayer::getMemoryShapes]
//! [MyLayer::forward]
virtual void forward(std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs, std::vector<cv::Mat> &internals) CV_OVERRIDE;
//! [MyLayer::forward]
//! [MyLayer::finalize]
virtual void finalize(const std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs) CV_OVERRIDE;
//! [MyLayer::finalize]
virtual void forward(cv::InputArrayOfArrays inputs, cv::OutputArrayOfArrays outputs, cv::OutputArrayOfArrays internals) CV_OVERRIDE;
};
//! [A custom layer interface]
//! [Register a custom layer]
#include <opencv2/dnn/layer.details.hpp> // CV_DNN_REGISTER_LAYER_CLASS
static inline void loadNet()
{
CV_DNN_REGISTER_LAYER_CLASS(Interp, InterpLayer);
// ...
//! [Register a custom layer]
CV_UNUSED(argc); CV_UNUSED(argv);
//! [Register InterpLayer]
CV_DNN_REGISTER_LAYER_CLASS(Interp, InterpLayer);
cv::dnn::Net caffeNet = cv::dnn::readNet("/path/to/config.prototxt", "/path/to/weights.caffemodel");
@ -217,16 +250,8 @@ int main(int argc, char** argv)
CV_DNN_REGISTER_LAYER_CLASS(ResizeBilinear, ResizeBilinearLayer);
cv::dnn::Net tfNet = cv::dnn::readNet("/path/to/graph.pb");
//! [Register ResizeBilinearLayer]
if (false) loadNet(); // To prevent unused function warning.
}
cv::Ptr<cv::dnn::Layer> MyLayer::create(cv::dnn::LayerParams& params)
{
return cv::Ptr<cv::dnn::Layer>(new MyLayer(params));
}
MyLayer::MyLayer(const cv::dnn::LayerParams&) {}
bool MyLayer::getMemoryShapes(const std::vector<std::vector<int> >&, const int,
std::vector<std::vector<int> >&,
std::vector<std::vector<int> >&) const { return false; }
void MyLayer::forward(std::vector<cv::Mat*>&, std::vector<cv::Mat>&, std::vector<cv::Mat>&) {}
void MyLayer::finalize(const std::vector<cv::Mat*>&, std::vector<cv::Mat>&) {}
void MyLayer::forward(cv::InputArrayOfArrays, cv::OutputArrayOfArrays, cv::OutputArrayOfArrays) {}
#endif // __OPENCV_SAMPLES_DNN_CUSTOM_LAYERS__

View File

@ -0,0 +1,169 @@
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/dnn.hpp>
#include "custom_layers.hpp"
using namespace cv;
using namespace cv::dnn;
const char* keys =
"{ help h | | Print help message. }"
"{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera.}"
"{ model m | | Path to a binary .pb file contains trained network.}"
"{ width | 320 | Preprocess input image by resizing to a specific width. It should be multiple by 32. }"
"{ height | 320 | Preprocess input image by resizing to a specific height. It should be multiple by 32. }"
"{ thr | 0.5 | Confidence threshold. }"
"{ nms | 0.4 | Non-maximum suppression threshold. }";
void decode(const Mat& scores, const Mat& geometry, float scoreThresh,
std::vector<RotatedRect>& detections, std::vector<float>& confidences);
int main(int argc, char** argv)
{
// Parse command line arguments.
CommandLineParser parser(argc, argv, keys);
parser.about("Use this script to run TensorFlow implementation (https://github.com/argman/EAST) of "
"EAST: An Efficient and Accurate Scene Text Detector (https://arxiv.org/abs/1704.03155v2)");
if (argc == 1 || parser.has("help"))
{
parser.printMessage();
return 0;
}
float confThreshold = parser.get<float>("thr");
float nmsThreshold = parser.get<float>("nms");
int inpWidth = parser.get<int>("width");
int inpHeight = parser.get<int>("height");
CV_Assert(parser.has("model"));
String model = parser.get<String>("model");
// Register a custom layer.
CV_DNN_REGISTER_LAYER_CLASS(ResizeBilinear, ResizeBilinearLayer);
// Load network.
Net net = readNet(model);
// Open a video file or an image file or a camera stream.
VideoCapture cap;
if (parser.has("input"))
cap.open(parser.get<String>("input"));
else
cap.open(0);
static const std::string kWinName = "EAST: An Efficient and Accurate Scene Text Detector";
namedWindow(kWinName, WINDOW_NORMAL);
std::vector<Mat> outs;
std::vector<String> outNames(2);
outNames[0] = "feature_fusion/Conv_7/Sigmoid";
outNames[1] = "feature_fusion/concat_3";
Mat frame, blob;
while (waitKey(1) < 0)
{
cap >> frame;
if (frame.empty())
{
waitKey();
break;
}
blobFromImage(frame, blob, 1.0, Size(inpWidth, inpHeight), Scalar(123.68, 116.78, 103.94), true, false);
net.setInput(blob);
net.forward(outs, outNames);
Mat scores = outs[0];
Mat geometry = outs[1];
// Decode predicted bounding boxes.
std::vector<RotatedRect> boxes;
std::vector<float> confidences;
decode(scores, geometry, confThreshold, boxes, confidences);
// Apply non-maximum suppression procedure.
std::vector<int> indices;
NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
// Render detections.
Point2f ratio((float)frame.cols / inpWidth, (float)frame.rows / inpHeight);
for (size_t i = 0; i < indices.size(); ++i)
{
RotatedRect& box = boxes[indices[i]];
Point2f vertices[4];
box.points(vertices);
for (int j = 0; j < 4; ++j)
{
vertices[j].x *= ratio.x;
vertices[j].y *= ratio.y;
}
for (int j = 0; j < 4; ++j)
line(frame, vertices[j], vertices[(j + 1) % 4], Scalar(0, 255, 0), 1);
}
// Put efficiency information.
std::vector<double> layersTimes;
double freq = getTickFrequency() / 1000;
double t = net.getPerfProfile(layersTimes) / freq;
std::string label = format("Inference time: %.2f ms", t);
putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
imshow(kWinName, frame);
}
return 0;
}
void decode(const Mat& scores, const Mat& geometry, float scoreThresh,
std::vector<RotatedRect>& detections, std::vector<float>& confidences)
{
detections.clear();
CV_Assert(scores.dims == 4, geometry.dims == 4, scores.size[0] == 1,
geometry.size[0] == 1, scores.size[1] == 1, geometry.size[1] == 5,
scores.size[2] == geometry.size[2], scores.size[3] == geometry.size[3]);
const int height = scores.size[2];
const int width = scores.size[3];
const int planeSize = height * width;
float* scoresData = (float*)scores.data;
float* geometryData = (float*)geometry.data;
float* x0_data = geometryData;
float* x1_data = geometryData + planeSize;
float* x2_data = geometryData + planeSize * 2;
float* x3_data = geometryData + planeSize * 3;
float* anglesData = geometryData + planeSize * 4;
for (int y = 0; y < height; ++y)
{
for (int x = 0; x < width; ++x)
{
float score = scoresData[x];
if (score < scoreThresh)
continue;
// Decode a prediction.
// Multiple by 4 because feature maps are 4 time less than input image.
float offsetX = x * 4.0f, offsetY = y * 4.0f;
float angle = anglesData[x];
float cosA = std::cos(angle);
float sinA = std::sin(angle);
float h = x0_data[x] + x2_data[x];
float w = x1_data[x] + x3_data[x];
Point2f offset(offsetX + cosA * x1_data[x] + sinA * x2_data[x],
offsetY - sinA * x1_data[x] + cosA * x2_data[x]);
Point2f p1 = Point2f(-sinA * h, -cosA * h) + offset;
Point2f p3 = Point2f(-cosA * w, sinA * w) + offset;
RotatedRect r(0.5f * (p1 + p3), Size2f(w, h), -angle * 180.0f / (float)CV_PI);
detections.push_back(r);
confidences.push_back(score);
}
scoresData += width;
x0_data += width;
x1_data += width;
x2_data += width;
x3_data += width;
anglesData += width;
}
}