mirror of
https://github.com/opencv/opencv.git
synced 2025-08-01 18:37:04 +08:00
Merge remote-tracking branch 'upstream/3.4' into merge-3.4
This commit is contained in:
commit
f4d6a3ec4e
@ -1481,8 +1481,8 @@ function(ocv_target_link_libraries target)
|
||||
if(NOT LINK_PENDING STREQUAL "")
|
||||
__ocv_push_target_link_libraries(${LINK_MODE} ${LINK_PENDING})
|
||||
set(LINK_PENDING "")
|
||||
set(LINK_MODE "${dep}")
|
||||
endif()
|
||||
set(LINK_MODE "${dep}")
|
||||
else()
|
||||
if(BUILD_opencv_world)
|
||||
if(OPENCV_MODULE_${dep}_IS_PART_OF_WORLD)
|
||||
|
@ -74,7 +74,7 @@ Canny Edge Detection in OpenCV
|
||||
|
||||
OpenCV puts all the above in single function, **cv.Canny()**. We will see how to use it. First
|
||||
argument is our input image. Second and third arguments are our minVal and maxVal respectively.
|
||||
Third argument is aperture_size. It is the size of Sobel kernel used for find image gradients. By
|
||||
Fourth argument is aperture_size. It is the size of Sobel kernel used for find image gradients. By
|
||||
default it is 3. Last argument is L2gradient which specifies the equation for finding gradient
|
||||
magnitude. If it is True, it uses the equation mentioned above which is more accurate, otherwise it
|
||||
uses this function: \f$Edge\_Gradient \; (G) = |G_x| + |G_y|\f$. By default, it is False.
|
||||
|
@ -91,8 +91,8 @@ a new header with the new boundaries:
|
||||
Mat D (A, Rect(10, 10, 100, 100) ); // using a rectangle
|
||||
Mat E = A(Range::all(), Range(1,3)); // using row and column boundaries
|
||||
@endcode
|
||||
Now you may ask -- if the matrix itself may belong to multiple *Mat* objects who takes responsibility
|
||||
for cleaning it up when it's no longer needed. The short answer is: the last object that used it.
|
||||
Now you may ask -- if the matrix itself may belong to multiple *Mat* objects, who takes responsibility
|
||||
for cleaning it up when it's no longer needed? The short answer is: the last object that used it.
|
||||
This is handled by using a reference counting mechanism. Whenever somebody copies a header of a
|
||||
*Mat* object, a counter is increased for the matrix. Whenever a header is cleaned, this counter
|
||||
is decreased. When the counter reaches zero the matrix is freed. Sometimes you will want to copy
|
||||
@ -102,12 +102,12 @@ Mat F = A.clone();
|
||||
Mat G;
|
||||
A.copyTo(G);
|
||||
@endcode
|
||||
Now modifying *F* or *G* will not affect the matrix pointed by the *A*'s header. What you need to
|
||||
Now modifying *F* or *G* will not affect the matrix pointed to by the *A*'s header. What you need to
|
||||
remember from all this is that:
|
||||
|
||||
- Output image allocation for OpenCV functions is automatic (unless specified otherwise).
|
||||
- You do not need to think about memory management with OpenCV's C++ interface.
|
||||
- The assignment operator and the copy constructor only copies the header.
|
||||
- The assignment operator and the copy constructor only copy the header.
|
||||
- The underlying matrix of an image may be copied using the @ref cv::Mat::clone() and @ref cv::Mat::copyTo()
|
||||
functions.
|
||||
|
||||
@ -122,10 +122,10 @@ of these allows us to create many shades of gray.
|
||||
For *colorful* ways we have a lot more methods to choose from. Each of them breaks it down to three
|
||||
or four basic components and we can use the combination of these to create the others. The most
|
||||
popular one is RGB, mainly because this is also how our eye builds up colors. Its base colors are
|
||||
red, green and blue. To code the transparency of a color sometimes a fourth element: alpha (A) is
|
||||
red, green and blue. To code the transparency of a color sometimes a fourth element, alpha (A), is
|
||||
added.
|
||||
|
||||
There are, however, many other color systems each with their own advantages:
|
||||
There are, however, many other color systems, each with their own advantages:
|
||||
|
||||
- RGB is the most common as our eyes use something similar, however keep in mind that OpenCV standard display
|
||||
system composes colors using the BGR color space (red and blue channels are swapped places).
|
||||
@ -139,11 +139,11 @@ There are, however, many other color systems each with their own advantages:
|
||||
Each of the building components has its own valid domains. This leads to the data type used. How
|
||||
we store a component defines the control we have over its domain. The smallest data type possible is
|
||||
*char*, which means one byte or 8 bits. This may be unsigned (so can store values from 0 to 255) or
|
||||
signed (values from -127 to +127). Although in case of three components this already gives 16
|
||||
million possible colors to represent (like in case of RGB) we may acquire an even finer control by
|
||||
signed (values from -127 to +127). Although this width, in the case of three components (like RGB), already gives 16
|
||||
million possible colors to represent, we may acquire an even finer control by
|
||||
using the float (4 byte = 32 bit) or double (8 byte = 64 bit) data types for each component.
|
||||
Nevertheless, remember that increasing the size of a component also increases the size of the whole
|
||||
picture in the memory.
|
||||
picture in memory.
|
||||
|
||||
Creating a Mat object explicitly
|
||||
----------------------------------
|
||||
|
@ -112,19 +112,24 @@ public:
|
||||
const Mat& Wh = blobs[0];
|
||||
const Mat& Wx = blobs[1];
|
||||
const Mat& bias = blobs[2];
|
||||
const Mat& hInternal = blobs[3];
|
||||
const Mat& cInternal = blobs[4];
|
||||
CV_CheckEQ(Wh.dims, 2, "");
|
||||
CV_CheckEQ(Wx.dims, 2, "");
|
||||
CV_CheckEQ(Wh.rows, Wx.rows, "");
|
||||
CV_CheckEQ(Wh.rows, (1 + static_cast<int>(bidirectional))*4*Wh.cols, "");
|
||||
CV_CheckEQ(Wh.rows, (int)bias.total(), "");
|
||||
CV_CheckEQ(hInternal.cols, Wh.cols, "");
|
||||
CV_CheckEQ(hInternal.cols, cInternal.cols, "");
|
||||
CV_CheckEQ(hInternal.rows, cInternal.rows, "");
|
||||
CV_Assert(Wh.type() == Wx.type() && Wx.type() == bias.type());
|
||||
|
||||
// Peephole weights.
|
||||
if (blobs.size() > 3)
|
||||
if (blobs.size() > 5)
|
||||
{
|
||||
CV_Assert(blobs.size() == 6);
|
||||
CV_Assert(blobs.size() == 8);
|
||||
const int N = Wh.cols;
|
||||
for (int i = 3; i < 6; ++i)
|
||||
for (int i = 5; i < 8; ++i)
|
||||
{
|
||||
CV_Assert(blobs[i].rows == N && blobs[i].cols == N);
|
||||
CV_Assert(blobs[i].type() == bias.type());
|
||||
@ -181,7 +186,7 @@ public:
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_Assert((!usePeephole && blobs.size() == 3) || (usePeephole && blobs.size() == 6));
|
||||
CV_Assert((!usePeephole && blobs.size() == 5) || (usePeephole && blobs.size() == 8));
|
||||
CV_Assert(inputs.size() == 1);
|
||||
const MatShape& inp0 = inputs[0];
|
||||
|
||||
@ -228,7 +233,7 @@ public:
|
||||
std::vector<Mat> input;
|
||||
inputs_arr.getMatVector(input);
|
||||
|
||||
CV_Assert((!usePeephole && blobs.size() == 3) || (usePeephole && blobs.size() == 6));
|
||||
CV_Assert((!usePeephole && blobs.size() == 5) || (usePeephole && blobs.size() == 8));
|
||||
CV_Assert(input.size() == 1);
|
||||
const Mat& inp0 = input[0];
|
||||
|
||||
@ -284,13 +289,14 @@ public:
|
||||
const Mat &Wh = blobs[0].rowRange(i * blobs[0].rows / numDirs, (i + 1) * blobs[0].rows / numDirs);
|
||||
const Mat &Wx = blobs[1].rowRange(i * blobs[1].rows / numDirs, (i + 1) * blobs[1].rows / numDirs);
|
||||
const Mat &bias = blobs[2].colRange(i * blobs[2].cols / numDirs, (i + 1) * blobs[2].cols / numDirs);
|
||||
const Mat &h_0 = blobs[3].rowRange(i * blobs[3].rows / numDirs, (i + 1) * blobs[3].rows / numDirs);
|
||||
const Mat &c_0 = blobs[4].rowRange(i * blobs[4].rows / numDirs, (i + 1) * blobs[4].rows / numDirs);
|
||||
|
||||
int numOut = Wh.size[1];
|
||||
|
||||
Mat hInternal = internals[0], cInternal = internals[1],
|
||||
dummyOnes = internals[2], gates = internals[3];
|
||||
hInternal.setTo(0.);
|
||||
cInternal.setTo(0.);
|
||||
h_0.copyTo(hInternal);
|
||||
c_0.copyTo(cInternal);
|
||||
dummyOnes.setTo(1.);
|
||||
|
||||
int numSamplesTotal = numTimeStamps*numSamples;
|
||||
@ -331,8 +337,8 @@ public:
|
||||
if (usePeephole)
|
||||
{
|
||||
Mat gatesIF = gates.colRange(0, 2*numOut);
|
||||
gemm(cInternal, blobs[3], 1, gateI, 1, gateI);
|
||||
gemm(cInternal, blobs[4], 1, gateF, 1, gateF);
|
||||
gemm(cInternal, blobs[5], 1, gateI, 1, gateI);
|
||||
gemm(cInternal, blobs[6], 1, gateF, 1, gateF);
|
||||
sigmoid(gatesIF, gatesIF);
|
||||
}
|
||||
else
|
||||
@ -355,7 +361,7 @@ public:
|
||||
}
|
||||
if (usePeephole)
|
||||
{
|
||||
gemm(cInternal, blobs[5], 1, gateO, 1, gateO);
|
||||
gemm(cInternal, blobs[7], 1, gateO, 1, gateO);
|
||||
sigmoid(gateO, gateO);
|
||||
}
|
||||
|
||||
|
@ -1048,8 +1048,9 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
|
||||
Mat Wx = getBlob(node_proto, 1);
|
||||
Mat Wh = getBlob(node_proto, 2);
|
||||
Mat b = getBlob(node_proto, 3);
|
||||
CV_CheckEQ(countNonZero(getBlob(node_proto, 5)), 0, "Unsupported non zero initial_h");
|
||||
CV_CheckEQ(countNonZero(getBlob(node_proto, 6)), 0, "Unsupported non zero initial_c");
|
||||
Mat h0 = getBlob(node_proto, 5);
|
||||
Mat c0 = getBlob(node_proto, 6);
|
||||
|
||||
b = b.reshape(1, b.size[0]);
|
||||
|
||||
const int numHidden = lstmParams.get<int>("hidden_size");
|
||||
@ -1082,11 +1083,15 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
|
||||
}
|
||||
Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]);
|
||||
Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]);
|
||||
h0 = h0.reshape(1, h0.size[0] * h0.size[1]);
|
||||
c0 = c0.reshape(1, c0.size[0] * c0.size[1]);
|
||||
|
||||
lstmParams.blobs.resize(3);
|
||||
lstmParams.blobs.resize(5);
|
||||
lstmParams.blobs[0] = Wh;
|
||||
lstmParams.blobs[1] = Wx;
|
||||
lstmParams.blobs[2] = b;
|
||||
lstmParams.blobs[3] = h0;
|
||||
lstmParams.blobs[4] = c0;
|
||||
lstmParams.set("bidirectional", lstmParams.get<String>("direction", "") == "bidirectional");
|
||||
|
||||
node_proto.set_output(0, lstmParams.name); // set different name so output shapes will be registered on that name
|
||||
|
@ -406,12 +406,53 @@ void setKSize(LayerParams &layerParams, const tensorflow::NodeDef &layer)
|
||||
}
|
||||
}
|
||||
|
||||
void setPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer)
|
||||
void setPadMode(LayerParams &layerParams, const tensorflow::NodeDef &layer)
|
||||
{
|
||||
if (hasLayerAttr(layer, "padding"))
|
||||
layerParams.set("pad_mode", getLayerAttr(layer, "padding").s());
|
||||
}
|
||||
|
||||
bool getExplicitPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer, int64_t (&pads)[8])
|
||||
{
|
||||
if (!layerParams.has("pad_mode") ||
|
||||
layerParams.get("pad_mode").getStringValue() != "EXPLICIT")
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
CV_Assert(hasLayerAttr(layer, "explicit_paddings"));
|
||||
|
||||
const tensorflow::AttrValue& protoPads = getLayerAttr(layer, "explicit_paddings");
|
||||
if (protoPads.list().i_size() != 8)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding configuration.");
|
||||
}
|
||||
|
||||
int n = sizeof(pads) / sizeof(pads[0]);
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
pads[i] = protoPads.list().i(i);
|
||||
}
|
||||
|
||||
if (getDataLayout(layer) != DATA_LAYOUT_NCHW)
|
||||
{
|
||||
CV_LOG_DEBUG(NULL, "DNN/TF: Data format " << getLayerAttr(layer, "data_format").s() << ", assuming NHWC.");
|
||||
// Perhaps, we have NHWC padding dimensions order.
|
||||
// N H W C
|
||||
// 0 1 2 3 4 5 6 7
|
||||
std::swap(pads[2], pads[6]);
|
||||
std::swap(pads[3], pads[7]);
|
||||
// N C W H
|
||||
// 0 1 2 3 4 5 6 7
|
||||
std::swap(pads[4], pads[6]);
|
||||
std::swap(pads[5], pads[7]);
|
||||
// N C H W
|
||||
// 0 1 2 3 4 5 6 7
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
Pin parsePin(const std::string &name)
|
||||
{
|
||||
Pin pin(name);
|
||||
@ -516,6 +557,7 @@ protected:
|
||||
|
||||
private:
|
||||
void addPermuteLayer(const int* order, const std::string& permName, Pin& inpId);
|
||||
void setPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer, std::string& inputName, float value = 0.);
|
||||
|
||||
friend class LayerHandler;
|
||||
typedef void (TFImporter::*TFImporterNodeParser)(tensorflow::GraphDef&, const tensorflow::NodeDef&, LayerParams&);
|
||||
@ -558,6 +600,31 @@ private:
|
||||
void parseCustomLayer (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams);
|
||||
};
|
||||
|
||||
void TFImporter::setPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer, std::string& inputName, float value)
|
||||
{
|
||||
setPadMode(layerParams, layer);
|
||||
int64_t pads[8];
|
||||
|
||||
if (!getExplicitPadding(layerParams, layer, pads))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
LayerParams padLp;
|
||||
padLp.name = layer.name() + "/pad";
|
||||
padLp.type = "Padding";
|
||||
padLp.set("paddings", DictValue::arrayInt(pads, sizeof(pads) / sizeof(pads[0])));
|
||||
padLp.set("value", value);
|
||||
|
||||
int id = dstNet.addLayer(padLp.name, padLp.type, padLp);
|
||||
layer_id[padLp.name] = id;
|
||||
|
||||
connect(layer_id, dstNet, parsePin(inputName), id, 0);
|
||||
inputName = padLp.name;
|
||||
|
||||
layerParams.set("pad_mode", "VALID");
|
||||
}
|
||||
|
||||
class LayerHandler
|
||||
{
|
||||
public:
|
||||
@ -808,7 +875,7 @@ void TFImporter::parseConvolution(tensorflow::GraphDef& net, const tensorflow::N
|
||||
|
||||
setStrides(layerParams, layer);
|
||||
if (!layerParams.has("pad_w") && !layerParams.has("pad_h"))
|
||||
setPadding(layerParams, layer);
|
||||
setPadding(layerParams, layer, input);
|
||||
|
||||
// The final node of dilated convolution subgraph.
|
||||
next_layers = getNextLayers(net, name, "BatchToSpaceND");
|
||||
@ -1253,20 +1320,21 @@ void TFImporter::parseMaxPool(tensorflow::GraphDef& net, const tensorflow::NodeD
|
||||
{
|
||||
const std::string& name = layer.name();
|
||||
const int num_inputs = layer.input_size();
|
||||
std::string inputName = layer.input(0);
|
||||
|
||||
CV_CheckGT(num_inputs, 0, "");
|
||||
layerParams.set("pool", "max");
|
||||
|
||||
setKSize(layerParams, layer);
|
||||
setStrides(layerParams, layer);
|
||||
setPadding(layerParams, layer);
|
||||
setPadding(layerParams, layer, inputName, -std::numeric_limits<float>::infinity());
|
||||
// Test_TensorFlow_nets.EAST_text_detection/1, NGRAPH/CPU
|
||||
layerParams.set("ceil_mode", false);
|
||||
|
||||
int id = dstNet.addLayer(name, "Pooling", layerParams);
|
||||
layer_id[name] = id;
|
||||
|
||||
connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs);
|
||||
connectToAllBlobs(layer_id, dstNet, parsePin(inputName), id, num_inputs);
|
||||
}
|
||||
|
||||
void TFImporter::parseAvgPool(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams)
|
||||
@ -1279,7 +1347,7 @@ void TFImporter::parseAvgPool(tensorflow::GraphDef& net, const tensorflow::NodeD
|
||||
layerParams.set("ave_pool_padded_area", false);
|
||||
setKSize(layerParams, layer);
|
||||
setStrides(layerParams, layer);
|
||||
setPadding(layerParams, layer);
|
||||
setPadMode(layerParams, layer);
|
||||
|
||||
int id = dstNet.addLayer(name, "Pooling", layerParams);
|
||||
layer_id[name] = id;
|
||||
@ -1694,7 +1762,7 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso
|
||||
// input: "weights"
|
||||
// input: "input"
|
||||
|
||||
const std::string& name = layer.name();
|
||||
std::string name = layer.name();
|
||||
const int num_inputs = layer.input_size();
|
||||
|
||||
CV_CheckEQ(num_inputs, 3, "Expected output shape, weights and input nodes");
|
||||
@ -1725,7 +1793,21 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso
|
||||
layerParams.set("num_output", kshape[1]);
|
||||
|
||||
setStrides(layerParams, layer);
|
||||
setPadding(layerParams, layer);
|
||||
setPadMode(layerParams, layer);
|
||||
int64_t pads[8];
|
||||
bool explicit_pads = getExplicitPadding(layerParams, layer, pads);
|
||||
int64_t begs[4] = {};
|
||||
int64_t ends[4] = {-1, -1, -1, -1};
|
||||
if (explicit_pads)
|
||||
{
|
||||
name += "/deconv";
|
||||
layerParams.set("pad_mode", "VALID");
|
||||
for (int i = 2; i < 4; ++i) // begins=[0, 0, a, b], ends=[-1, -1, c, d]
|
||||
{
|
||||
begs[i] = pads[2*i];
|
||||
ends[i] = -1 - pads[2*i + 1];
|
||||
}
|
||||
}
|
||||
|
||||
// For convolution layer, output shape computes as
|
||||
// o = 1 + (i - k + 2*p) / s
|
||||
@ -1742,8 +1824,9 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso
|
||||
const int strideY = layerParams.get<int>("stride_h");
|
||||
const int strideX = layerParams.get<int>("stride_w");
|
||||
Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0));
|
||||
const int outH = outShape.at<int>(1);
|
||||
const int outW = outShape.at<int>(2);
|
||||
int shift = (getDataLayout(layer) == DATA_LAYOUT_NCHW);
|
||||
const int outH = outShape.at<int>(1 + shift) + begs[2] - 1 - ends[2];
|
||||
const int outW = outShape.at<int>(2 + shift) + begs[3] - 1 - ends[3];
|
||||
if (layerParams.get<String>("pad_mode") == "SAME")
|
||||
{
|
||||
layerParams.set("adj_w", (outW - 1) % strideX);
|
||||
@ -1759,6 +1842,16 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso
|
||||
|
||||
// one input only
|
||||
connect(layer_id, dstNet, parsePin(layer.input(2)), id, 0);
|
||||
if (explicit_pads) // If we have explicit paddings, remove extra data
|
||||
{
|
||||
layerParams.set("begin", DictValue::arrayInt(begs, sizeof(begs) / sizeof(begs[0])));
|
||||
layerParams.set("end", DictValue::arrayInt(ends, sizeof(ends) / sizeof(ends[0])));
|
||||
|
||||
int id = dstNet.addLayer(layer.name(), "Slice", layerParams);
|
||||
layer_id[layer.name()] = id;
|
||||
|
||||
connect(layer_id, dstNet, parsePin(name), id, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams)
|
||||
@ -1766,8 +1859,8 @@ void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::Nod
|
||||
// op: "BlockLSTM"
|
||||
// input: "lstm_block_wrapper/ToInt64/x" (ignore, number of time stamps)
|
||||
// input: "input"
|
||||
// input: "lstm_block_wrapper/zeros" (ignore)
|
||||
// input: "lstm_block_wrapper/zeros" (ignore)
|
||||
// input: "lstm_block_wrapper/zeros"
|
||||
// input: "lstm_block_wrapper/zeros"
|
||||
// input: "lstm_block_wrapper/kernel"
|
||||
// input: "lstm_block_wrapper/w_i_diag"
|
||||
// input: "lstm_block_wrapper/w_f_diag"
|
||||
@ -1793,9 +1886,11 @@ void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::Nod
|
||||
}
|
||||
}
|
||||
|
||||
Mat W, Wh, Wx, b;
|
||||
Mat W, Wh, Wx, b, cs_prev, h_prev;
|
||||
blobFromTensor(getConstBlob(layer, value_id, 4), W);
|
||||
blobFromTensor(getConstBlob(layer, value_id, 8), b);
|
||||
blobFromTensor(getConstBlob(layer, value_id, 2), cs_prev);
|
||||
blobFromTensor(getConstBlob(layer, value_id, 3), h_prev);
|
||||
const int outSize = W.cols / 4;
|
||||
|
||||
// IGFO->IFOG
|
||||
@ -1811,10 +1906,12 @@ void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::Nod
|
||||
Wx = W.rowRange(0, W.rows - outSize).t();
|
||||
Wh = W.rowRange(W.rows - outSize, W.rows).t();
|
||||
|
||||
layerParams.blobs.resize(3);
|
||||
layerParams.blobs.resize(5);
|
||||
layerParams.blobs[0] = Wh;
|
||||
layerParams.blobs[1] = Wx;
|
||||
layerParams.blobs[2] = b;
|
||||
layerParams.blobs[3] = h_prev;
|
||||
layerParams.blobs[4] = cs_prev;
|
||||
|
||||
if (hasLayerAttr(layer, "use_peephole"))
|
||||
{
|
||||
@ -1822,14 +1919,14 @@ void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::Nod
|
||||
if (usePeephole)
|
||||
{
|
||||
layerParams.set("use_peephole", true);
|
||||
layerParams.blobs.resize(6);
|
||||
layerParams.blobs.resize(8);
|
||||
for (int i = 0; i < 3; ++i)
|
||||
{
|
||||
Mat w;
|
||||
blobFromTensor(getConstBlob(layer, value_id, 5 + i), w);
|
||||
w = w.reshape(1, w.total()); // Single column.
|
||||
w = Mat::diag(w); // Make a diagonal matrix.
|
||||
layerParams.blobs[3 + i] = w;
|
||||
layerParams.blobs[5 + i] = w;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -445,7 +445,7 @@ class Layer_LSTM_Test : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
int numInp, numOut;
|
||||
Mat Wh, Wx, b;
|
||||
Mat Wh, Wx, b, h, c;
|
||||
Ptr<LSTMLayer> layer;
|
||||
std::vector<Mat> inputs, outputs;
|
||||
|
||||
@ -460,12 +460,17 @@ public:
|
||||
Wh = Mat::ones(4 * numOut, numOut, CV_32F);
|
||||
Wx = Mat::ones(4 * numOut, numInp, CV_32F);
|
||||
b = Mat::ones(4 * numOut, 1, CV_32F);
|
||||
h = Mat::ones(4, numOut, CV_32F);
|
||||
c = Mat::ones(4, numOut, CV_32F);
|
||||
|
||||
LayerParams lp;
|
||||
lp.blobs.resize(3);
|
||||
lp.blobs.resize(5);
|
||||
lp.blobs[0] = Wh;
|
||||
lp.blobs[1] = Wx;
|
||||
lp.blobs[2] = b;
|
||||
lp.blobs[3] = h;
|
||||
lp.blobs[4] = c;
|
||||
|
||||
lp.set<bool>("produce_cell_output", produceCellOutput);
|
||||
lp.set<bool>("use_timestamp_dim", useTimestampDim);
|
||||
|
||||
@ -513,10 +518,12 @@ TEST_F(Layer_LSTM_Test, get_set_test)
|
||||
TEST(Layer_LSTM_Test_Accuracy_with_, CaffeRecurrent)
|
||||
{
|
||||
LayerParams lp;
|
||||
lp.blobs.resize(3);
|
||||
lp.blobs.resize(5);
|
||||
lp.blobs[0] = blobFromNPY(_tf("lstm.prototxt.w_2.npy")); // Wh
|
||||
lp.blobs[1] = blobFromNPY(_tf("lstm.prototxt.w_0.npy")); // Wx
|
||||
lp.blobs[2] = blobFromNPY(_tf("lstm.prototxt.w_1.npy")); // bias
|
||||
lp.blobs[3] = Mat::zeros(2, 17, CV_32F); // h_0
|
||||
lp.blobs[4] = Mat::zeros(2, 17, CV_32F); // c_0
|
||||
Ptr<LSTMLayer> layer = LSTMLayer::create(lp);
|
||||
|
||||
Mat inp = blobFromNPY(_tf("recurrent.input.npy"));
|
||||
@ -527,6 +534,68 @@ TEST(Layer_LSTM_Test_Accuracy_with_, CaffeRecurrent)
|
||||
normAssert(h_t_reference, outputs[0]);
|
||||
}
|
||||
|
||||
TEST(Layer_LSTM_Test_Accuracy_with_, HiddenParams)
|
||||
{
|
||||
Mat Wx = blobFromNPY(_tf("lstm.hidden.W.npy"));
|
||||
Mat Wh = blobFromNPY(_tf("lstm.hidden.R.npy"));
|
||||
Mat b = blobFromNPY(_tf("lstm.hidden.B.npy"));
|
||||
Mat h0 = blobFromNPY(_tf("lstm.hidden.h0.npy"));
|
||||
Mat c0 = blobFromNPY(_tf("lstm.hidden.c0.npy"));
|
||||
|
||||
const int numHidden = 3;
|
||||
const int numDirs = Wx.size[0];
|
||||
const int numFeatures = Wx.size[2];
|
||||
|
||||
b = b.reshape(1, b.size[0]);
|
||||
Mat bx = b.colRange(0, b.cols / 2);
|
||||
Mat bh = b.colRange(b.cols / 2, b.cols);
|
||||
b = bx + bh;
|
||||
|
||||
// IFGO->IGFO
|
||||
for (int k = 0; k < numDirs; ++k)
|
||||
{
|
||||
float* WxData = Wx.ptr<float>(k);
|
||||
float* WhData = Wh.ptr<float>(k);
|
||||
float* biasData = b.ptr<float>(k);
|
||||
for (int j = 0; j < numHidden; ++j)
|
||||
{
|
||||
for (int i = 0; i < numFeatures; ++i)
|
||||
{
|
||||
std::swap(WxData[(numHidden + j) * numFeatures + i],
|
||||
WxData[(numHidden * 2 + j) * numFeatures + i]);
|
||||
}
|
||||
for (int i = 0; i < numHidden; ++i)
|
||||
{
|
||||
std::swap(WhData[(numHidden + j) * numHidden + i],
|
||||
WhData[(numHidden * 2 + j) * numHidden + i]);
|
||||
}
|
||||
std::swap(biasData[numHidden + j], biasData[numHidden * 2 + j]);
|
||||
}
|
||||
}
|
||||
|
||||
Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]);
|
||||
Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]);
|
||||
h0 = h0.reshape(1, h0.size[0] * h0.size[1]);
|
||||
c0 = c0.reshape(1, c0.size[0] * c0.size[1]);
|
||||
|
||||
LayerParams lstmParams;
|
||||
lstmParams.blobs.resize(5);
|
||||
lstmParams.blobs[0] = Wh;
|
||||
lstmParams.blobs[1] = Wx;
|
||||
lstmParams.blobs[2] = b;
|
||||
lstmParams.blobs[3] = h0;
|
||||
lstmParams.blobs[4] = c0;
|
||||
lstmParams.set("bidirectional", false);
|
||||
Ptr<LSTMLayer> layer = LSTMLayer::create(lstmParams);
|
||||
|
||||
Mat inp = blobFromNPY(_tf("lstm.hidden.input.npy"));
|
||||
std::vector<Mat> inputs(1, inp), outputs;
|
||||
runLayer(layer, inputs, outputs);
|
||||
|
||||
Mat h_t_reference = blobFromNPY(_tf("lstm.hidden.output.npy"));
|
||||
normAssert(h_t_reference, outputs[0]);
|
||||
}
|
||||
|
||||
TEST(Layer_RNN_Test_Accuracy_with_, CaffeRecurrent)
|
||||
{
|
||||
Ptr<RNNLayer> layer = RNNLayer::create(LayerParams());
|
||||
@ -571,6 +640,9 @@ TEST(Layer_LSTM_Test_Accuracy_, Reverse)
|
||||
bias.at<float>(2, 0) = 1e10f; // Output gate - always output everything
|
||||
bias.at<float>(3, 0) = 0.f; // Update signal
|
||||
|
||||
cv::Mat hInternal = cv::Mat::zeros(1, 1, CV_32FC1);
|
||||
cv::Mat cInternal = cv::Mat::zeros(1, 1, CV_32FC1);
|
||||
|
||||
LayerParams lp;
|
||||
lp.set("reverse", true);
|
||||
lp.set("use_timestamp_dim", true);
|
||||
@ -578,6 +650,8 @@ TEST(Layer_LSTM_Test_Accuracy_, Reverse)
|
||||
lp.blobs.push_back(Wh);
|
||||
lp.blobs.push_back(Wx);
|
||||
lp.blobs.push_back(bias);
|
||||
lp.blobs.push_back(hInternal);
|
||||
lp.blobs.push_back(cInternal);
|
||||
|
||||
cv::Ptr<cv::dnn::LSTMLayer> layer = LSTMLayer::create(lp);
|
||||
std::vector<cv::Mat> outputs;
|
||||
|
@ -710,6 +710,16 @@ TEST_P(Test_ONNX_layers, LSTM_bidirectional)
|
||||
testONNXModels("lstm_bidirectional", npy, 0, 0, false, false);
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, LSTM_hidden)
|
||||
{
|
||||
testONNXModels("hidden_lstm", npy, 0, 0, false, false);
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, LSTM_hidden_bidirectional)
|
||||
{
|
||||
testONNXModels("hidden_lstm_bi", npy, 0, 0, false, false);
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, Pad2d_Unfused)
|
||||
{
|
||||
testONNXModels("ReflectionPad2d");
|
||||
|
@ -203,6 +203,16 @@ TEST_P(Test_TensorFlow_layers, padding)
|
||||
runTensorFlowNet("keras_pad_concat");
|
||||
}
|
||||
|
||||
TEST_P(Test_TensorFlow_layers, padding_asymmetric)
|
||||
{
|
||||
runTensorFlowNet("conv2d_asymmetric_pads_nchw");
|
||||
runTensorFlowNet("conv2d_asymmetric_pads_nhwc");
|
||||
runTensorFlowNet("max_pool2d_asymmetric_pads_nchw");
|
||||
runTensorFlowNet("max_pool2d_asymmetric_pads_nhwc");
|
||||
runTensorFlowNet("conv2d_backprop_input_asymmetric_pads_nchw");
|
||||
runTensorFlowNet("conv2d_backprop_input_asymmetric_pads_nhwc");
|
||||
}
|
||||
|
||||
TEST_P(Test_TensorFlow_layers, padding_same)
|
||||
{
|
||||
// Reference output values are in range [0.0006, 2.798]
|
||||
|
@ -95,7 +95,7 @@ def make_umd(opencvjs, cvjs):
|
||||
root.cv = factory();
|
||||
} else if (typeof importScripts === 'function') {
|
||||
// Web worker
|
||||
root.cv = factory;
|
||||
root.cv = factory();
|
||||
} else {
|
||||
// Other shells, e.g. d8
|
||||
root.cv = factory();
|
||||
|
Loading…
Reference in New Issue
Block a user