mirror of
https://github.com/opencv/opencv.git
synced 2025-06-12 20:42:53 +08:00
Dilated convolution import from TensorFlow
This commit is contained in:
parent
9640bbe76d
commit
58b890b9f7
@ -81,7 +81,7 @@ public:
|
|||||||
|
|
||||||
Size outSize = Size(outputs[0].size[3], outputs[0].size[2]);
|
Size outSize = Size(outputs[0].size[3], outputs[0].size[2]);
|
||||||
getConvPoolPaddings(Size(input.size[3], input.size[2]), outSize,
|
getConvPoolPaddings(Size(input.size[3], input.size[2]), outSize,
|
||||||
kernel, stride, padMode, pad);
|
kernel, stride, padMode, dilation, pad);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool hasBias() const
|
bool hasBias() const
|
||||||
@ -183,7 +183,7 @@ public:
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
getConvPoolOutParams(Size(inpW, inpH), kernel, stride, padMode, out);
|
getConvPoolOutParams(Size(inpW, inpH), kernel, stride, padMode, dilation, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
int ngroups = inpCn / blobs[0].size[1];
|
int ngroups = inpCn / blobs[0].size[1];
|
||||||
|
@ -167,12 +167,12 @@ void getConvolutionKernelParams(const LayerParams ¶ms, int &kernelH, int &ke
|
|||||||
// we pad more on the right and bottom than on the top and left.
|
// we pad more on the right and bottom than on the top and left.
|
||||||
void getConvPoolOutParams(const Size& inp, const Size &kernel,
|
void getConvPoolOutParams(const Size& inp, const Size &kernel,
|
||||||
const Size &stride, const String &padMode,
|
const Size &stride, const String &padMode,
|
||||||
Size& out)
|
const Size &dilation, Size& out)
|
||||||
{
|
{
|
||||||
if (padMode == "VALID")
|
if (padMode == "VALID")
|
||||||
{
|
{
|
||||||
out.height = (inp.height - kernel.height + stride.height) / stride.height;
|
out.height = (inp.height - (dilation.height * (kernel.height - 1) + 1) + stride.height) / stride.height;
|
||||||
out.width = (inp.width- kernel.width + stride.width) / stride.width;
|
out.width = (inp.width - (dilation.width * (kernel.width - 1) + 1) + stride.width) / stride.width;
|
||||||
}
|
}
|
||||||
else if (padMode == "SAME")
|
else if (padMode == "SAME")
|
||||||
{
|
{
|
||||||
@ -187,7 +187,7 @@ void getConvPoolOutParams(const Size& inp, const Size &kernel,
|
|||||||
|
|
||||||
void getConvPoolPaddings(const Size& inp, const Size& out,
|
void getConvPoolPaddings(const Size& inp, const Size& out,
|
||||||
const Size &kernel, const Size &stride,
|
const Size &kernel, const Size &stride,
|
||||||
const String &padMode, Size &pad)
|
const String &padMode, const Size &dilation, Size &pad)
|
||||||
{
|
{
|
||||||
if (padMode == "VALID")
|
if (padMode == "VALID")
|
||||||
{
|
{
|
||||||
@ -195,8 +195,8 @@ void getConvPoolPaddings(const Size& inp, const Size& out,
|
|||||||
}
|
}
|
||||||
else if (padMode == "SAME")
|
else if (padMode == "SAME")
|
||||||
{
|
{
|
||||||
int Ph = std::max(0, (out.height - 1) * stride.height + kernel.height - inp.height);
|
int Ph = std::max(0, (out.height - 1) * stride.height + (dilation.height * (kernel.height - 1) + 1) - inp.height);
|
||||||
int Pw = std::max(0, (out.width - 1) * stride.width + kernel.width - inp.width);
|
int Pw = std::max(0, (out.width - 1) * stride.width + (dilation.width * (kernel.width - 1) + 1) - inp.width);
|
||||||
// For odd values of total padding, add more padding at the 'right'
|
// For odd values of total padding, add more padding at the 'right'
|
||||||
// side of the given dimension.
|
// side of the given dimension.
|
||||||
pad = cv::Size(Pw / 2, Ph / 2);
|
pad = cv::Size(Pw / 2, Ph / 2);
|
||||||
|
@ -64,11 +64,11 @@ void getPoolingKernelParams(const LayerParams ¶ms, int &kernelH, int &kernel
|
|||||||
|
|
||||||
void getConvPoolOutParams(const Size& inp, const Size &kernel,
|
void getConvPoolOutParams(const Size& inp, const Size &kernel,
|
||||||
const Size &stride, const String &padMode,
|
const Size &stride, const String &padMode,
|
||||||
Size& out);
|
const Size &dilation, Size& out);
|
||||||
|
|
||||||
void getConvPoolPaddings(const Size& inp, const Size& out,
|
void getConvPoolPaddings(const Size& inp, const Size& out,
|
||||||
const Size &kernel, const Size &stride,
|
const Size &kernel, const Size &stride,
|
||||||
const String &padMode, Size &pad);
|
const String &padMode, const Size &dilation, Size &pad);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -93,7 +93,7 @@ public:
|
|||||||
kernel = inp;
|
kernel = inp;
|
||||||
}
|
}
|
||||||
|
|
||||||
getConvPoolPaddings(inp, out, kernel, stride, padMode, pad);
|
getConvPoolPaddings(inp, out, kernel, stride, padMode, Size(1, 1), pad);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual bool supportBackend(int backendId)
|
virtual bool supportBackend(int backendId)
|
||||||
@ -592,8 +592,7 @@ public:
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
getConvPoolOutParams(in, kernel, stride,
|
getConvPoolOutParams(in, kernel, stride, padMode, Size(1, 1), out);
|
||||||
padMode, out);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
outputs.resize(type == MAX ? 2 * inputs.size() : inputs.size());
|
outputs.resize(type == MAX ? 2 * inputs.size() : inputs.size());
|
||||||
|
@ -88,6 +88,8 @@ static Mat getTensorContent(const tensorflow::TensorProto &tensor)
|
|||||||
return Mat(1, content.size() / sizeof(float), CV_32FC1, (void*)content.c_str()).clone();
|
return Mat(1, content.size() / sizeof(float), CV_32FC1, (void*)content.c_str()).clone();
|
||||||
case tensorflow::DT_DOUBLE:
|
case tensorflow::DT_DOUBLE:
|
||||||
return Mat(1, content.size() / sizeof(double), CV_64FC1, (void*)content.c_str()).clone();
|
return Mat(1, content.size() / sizeof(double), CV_64FC1, (void*)content.c_str()).clone();
|
||||||
|
case tensorflow::DT_INT32:
|
||||||
|
return Mat(1, content.size() / sizeof(int32_t), CV_32SC1, (void*)content.c_str()).clone();
|
||||||
case tensorflow::DT_HALF:
|
case tensorflow::DT_HALF:
|
||||||
{
|
{
|
||||||
Mat halfs;
|
Mat halfs;
|
||||||
@ -563,7 +565,7 @@ void TFImporter::populateNet(Net dstNet)
|
|||||||
|
|
||||||
for (int li = 0; li < layersSize; li++)
|
for (int li = 0; li < layersSize; li++)
|
||||||
{
|
{
|
||||||
const tensorflow::NodeDef &layer = net.node(li);
|
tensorflow::NodeDef layer = net.node(li);
|
||||||
String name = layer.name();
|
String name = layer.name();
|
||||||
String type = layer.op();
|
String type = layer.op();
|
||||||
LayerParams layerParams;
|
LayerParams layerParams;
|
||||||
@ -571,8 +573,38 @@ void TFImporter::populateNet(Net dstNet)
|
|||||||
if(layers_to_ignore.find(li) != layers_to_ignore.end())
|
if(layers_to_ignore.find(li) != layers_to_ignore.end())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (type == "Conv2D")
|
if (type == "Conv2D" || type == "SpaceToBatchND")
|
||||||
{
|
{
|
||||||
|
// The first node of dilated convolution subgraph.
|
||||||
|
// Extract input node, dilation rate and paddings.
|
||||||
|
std::string input = layer.input(0);
|
||||||
|
if (type == "SpaceToBatchND")
|
||||||
|
{
|
||||||
|
// op: "SpaceToBatchND"
|
||||||
|
// input: "input"
|
||||||
|
// input: "SpaceToBatchND/block_shape"
|
||||||
|
// input: "SpaceToBatchND/paddings"
|
||||||
|
CV_Assert(layer.input_size() == 3);
|
||||||
|
|
||||||
|
DictValue dilation = parseDims(getConstBlob(layer, value_id, 1));
|
||||||
|
CV_Assert(dilation.size() == 2 && dilation.get<int>(0) == dilation.get<int>(1));
|
||||||
|
layerParams.set("dilation", dilation.get<int>(0));
|
||||||
|
|
||||||
|
Mat paddings;
|
||||||
|
parseTensor<int>(getConstBlob(layer, value_id, 2), paddings);
|
||||||
|
|
||||||
|
// paddings is a 2x2 matrix: [[top, bot], [left, right]]
|
||||||
|
layerParams.set("pad_h", paddings.at<float>(0));
|
||||||
|
layerParams.set("pad_w", paddings.at<float>(2));
|
||||||
|
|
||||||
|
StrIntVector next_layers = getNextLayers(net, name, "Conv2D");
|
||||||
|
CV_Assert(next_layers.size() == 1);
|
||||||
|
layer = net.node(next_layers[0].second);
|
||||||
|
layers_to_ignore[next_layers[0].second] = next_layers[0].first;
|
||||||
|
name = layer.name();
|
||||||
|
type = layer.op();
|
||||||
|
}
|
||||||
|
|
||||||
layerParams.set("bias_term", false);
|
layerParams.set("bias_term", false);
|
||||||
layerParams.blobs.resize(1);
|
layerParams.blobs.resize(1);
|
||||||
|
|
||||||
@ -597,11 +629,21 @@ void TFImporter::populateNet(Net dstNet)
|
|||||||
setStrides(layerParams, layer);
|
setStrides(layerParams, layer);
|
||||||
setPadding(layerParams, layer);
|
setPadding(layerParams, layer);
|
||||||
|
|
||||||
|
// The final node of dilated convolution subgraph.
|
||||||
|
next_layers = getNextLayers(net, name, "BatchToSpaceND");
|
||||||
|
if (!next_layers.empty())
|
||||||
|
{
|
||||||
|
layerParams.set("pad_mode", ""); // We use padding values.
|
||||||
|
CV_Assert(next_layers.size() == 1);
|
||||||
|
ExcludeLayer(net, next_layers[0].second, 0, false);
|
||||||
|
layers_to_ignore[next_layers[0].second] = next_layers[0].first;
|
||||||
|
}
|
||||||
|
|
||||||
int id = dstNet.addLayer(name, "Convolution", layerParams);
|
int id = dstNet.addLayer(name, "Convolution", layerParams);
|
||||||
layer_id[name] = id;
|
layer_id[name] = id;
|
||||||
|
|
||||||
// one input only
|
// one input only
|
||||||
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
|
connect(layer_id, dstNet, parsePin(input), id, 0);
|
||||||
}
|
}
|
||||||
else if (type == "BiasAdd" || type == "Add")
|
else if (type == "BiasAdd" || type == "Add")
|
||||||
{
|
{
|
||||||
|
@ -96,6 +96,8 @@ static void runTensorFlowNet(const std::string& prefix,
|
|||||||
TEST(Test_TensorFlow, single_conv)
|
TEST(Test_TensorFlow, single_conv)
|
||||||
{
|
{
|
||||||
runTensorFlowNet("single_conv");
|
runTensorFlowNet("single_conv");
|
||||||
|
runTensorFlowNet("atrous_conv2d_valid");
|
||||||
|
runTensorFlowNet("atrous_conv2d_same");
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(Test_TensorFlow, padding)
|
TEST(Test_TensorFlow, padding)
|
||||||
|
Loading…
Reference in New Issue
Block a user