mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00
Layers for fast-neural-style models: https://github.com/jcjohnson/fast-neural-style
This commit is contained in:
parent
60cbc46da1
commit
4b52b8df34
@ -377,6 +377,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
|
||||
* starting from the first one. The rest of dimensions won't
|
||||
* be padded.
|
||||
* @param value Value to be padded. Defaults to zero.
|
||||
* @param type Padding type: 'constant', 'reflect'
|
||||
* @param input_dims Torch's parameter. If @p input_dims is not equal to the
|
||||
* actual input dimensionality then the `[0]th` dimension
|
||||
* is considered as a batch dimension and @p paddings are shifted
|
||||
|
@ -112,16 +112,12 @@ static inline Mat slice(const Mat &m, const _Range &r0, const _Range &r1, const
|
||||
static inline Mat getPlane(const Mat &m, int n, int cn)
|
||||
{
|
||||
CV_Assert(m.dims > 2);
|
||||
Range range[CV_MAX_DIM];
|
||||
int sz[CV_MAX_DIM];
|
||||
for(int i = 2; i < m.dims; i++)
|
||||
{
|
||||
sz[i-2] = m.size.p[i];
|
||||
range[i] = Range::all();
|
||||
}
|
||||
range[0] = Range(n, n+1);
|
||||
range[1] = Range(cn, cn+1);
|
||||
return m(range).reshape(1, m.dims-2, sz);
|
||||
return Mat(m.dims - 2, sz, m.type(), (void*)m.ptr<float>(n, cn));
|
||||
}
|
||||
|
||||
static inline MatShape shape(const int* dims, const int n = 4)
|
||||
@ -191,6 +187,14 @@ inline int clamp(int ax, const MatShape& shape)
|
||||
return clamp(ax, (int)shape.size());
|
||||
}
|
||||
|
||||
inline Range clamp(const Range& r, int axisSize)
|
||||
{
|
||||
Range clamped(std::max(r.start, 0),
|
||||
r.end > 0 ? std::min(r.end, axisSize) : axisSize + r.end + 1);
|
||||
CV_Assert(clamped.start < clamped.end, clamped.end <= axisSize);
|
||||
return clamped;
|
||||
}
|
||||
|
||||
CV__DNN_EXPERIMENTAL_NS_END
|
||||
}
|
||||
}
|
||||
|
@ -10,6 +10,7 @@ Implementation of padding layer, which adds paddings to input blob.
|
||||
*/
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include "op_halide.hpp"
|
||||
#include <vector>
|
||||
|
||||
@ -26,6 +27,7 @@ public:
|
||||
setParamsFrom(params);
|
||||
paddingValue = params.get<float>("value", 0);
|
||||
inputDims = params.get<int>("input_dims", -1);
|
||||
paddingType = params.get<String>("type", "constant");
|
||||
|
||||
CV_Assert(params.has("paddings"));
|
||||
const DictValue& paddingsParam = params.get("paddings");
|
||||
@ -94,9 +96,46 @@ public:
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (paddingType == "constant")
|
||||
{
|
||||
outputs[0].setTo(paddingValue);
|
||||
inputs[0]->copyTo(outputs[0](dstRanges));
|
||||
}
|
||||
else if (paddingType == "reflect")
|
||||
{
|
||||
CV_Assert(inputs.size() == 1);
|
||||
CV_Assert(outputs.size() == 1);
|
||||
CV_Assert(inputs[0]->dims == 4);
|
||||
CV_Assert(outputs[0].dims == 4);
|
||||
|
||||
if (inputs[0]->size[0] != outputs[0].size[0] || inputs[0]->size[1] != outputs[0].size[1])
|
||||
CV_Error(Error::StsNotImplemented, "Only spatial reflection padding is supported.");
|
||||
|
||||
const int inpHeight = inputs[0]->size[2];
|
||||
const int inpWidth = inputs[0]->size[3];
|
||||
const int outHeight = outputs[0].size[2];
|
||||
const int outWidth = outputs[0].size[3];
|
||||
const int padTop = dstRanges[2].start;
|
||||
const int padBottom = outHeight - dstRanges[2].end;
|
||||
const int padLeft = dstRanges[3].start;
|
||||
const int padRight = outWidth - dstRanges[3].end;
|
||||
CV_Assert(padTop < inpHeight, padBottom < inpHeight,
|
||||
padLeft < inpWidth, padRight < inpWidth);
|
||||
|
||||
for (size_t n = 0; n < inputs[0]->size[0]; ++n)
|
||||
{
|
||||
for (size_t ch = 0; ch < inputs[0]->size[1]; ++ch)
|
||||
{
|
||||
copyMakeBorder(getPlane(*inputs[0], n, ch),
|
||||
getPlane(outputs[0], n, ch),
|
||||
padTop, padBottom, padLeft, padRight,
|
||||
BORDER_REFLECT_101);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, "Unknown padding type: " + paddingType);
|
||||
}
|
||||
|
||||
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs)
|
||||
{
|
||||
@ -124,6 +163,7 @@ private:
|
||||
std::vector<Range> dstRanges;
|
||||
int inputDims;
|
||||
float paddingValue;
|
||||
std::string paddingType;
|
||||
};
|
||||
|
||||
Ptr<PaddingLayer> PaddingLayer::create(const LayerParams ¶ms)
|
||||
|
@ -58,7 +58,7 @@ public:
|
||||
axis = params.get<int>("axis", 1);
|
||||
if (params.has("slice_point"))
|
||||
{
|
||||
CV_Assert(!params.has("begin") && !params.has("size"));
|
||||
CV_Assert(!params.has("begin") && !params.has("size") && !params.has("end"));
|
||||
const DictValue &indicesValue = params.get("slice_point");
|
||||
sliceRanges.resize(indicesValue.size() + 1,
|
||||
std::vector<Range>(axis + 1, Range::all()));
|
||||
@ -71,24 +71,34 @@ public:
|
||||
}
|
||||
sliceRanges.back()[axis].start = prevSlice;
|
||||
}
|
||||
else if (params.has("begin") && params.has("size"))
|
||||
else if (params.has("begin"))
|
||||
{
|
||||
CV_Assert(params.has("size") ^ params.has("end"));
|
||||
const DictValue &begins = params.get("begin");
|
||||
const DictValue &sizes = params.get("size");
|
||||
CV_Assert(begins.size() == sizes.size());
|
||||
const DictValue &sizesOrEnds = params.has("size") ? params.get("size") : params.get("end");
|
||||
CV_Assert(begins.size() == sizesOrEnds.size());
|
||||
|
||||
sliceRanges.resize(1);
|
||||
sliceRanges[0].resize(begins.size(), Range::all());
|
||||
for (int i = 0; i < begins.size(); ++i)
|
||||
{
|
||||
int start = begins.get<int>(i);
|
||||
int size = sizes.get<int>(i);
|
||||
int sizeOrEnd = sizesOrEnds.get<int>(i); // It may be negative to reverse indexation.
|
||||
CV_Assert(start >= 0);
|
||||
CV_Assert(size == -1 || size > 0); // -1 value means range [start, axis_size).
|
||||
|
||||
sliceRanges[0][i].start = start;
|
||||
if (size > 0)
|
||||
sliceRanges[0][i].end = start + size;
|
||||
if (params.has("size"))
|
||||
{
|
||||
int size = sizeOrEnd;
|
||||
CV_Assert(size == -1 || size > 0); // -1 value means range [start, axis_size).
|
||||
sliceRanges[0][i].end = start > 0 ? start + size : -1; // We'll finalize a negative value later.
|
||||
}
|
||||
else
|
||||
{
|
||||
int end = sizeOrEnd;
|
||||
CV_Assert(end < 0 || end > start); // End index is excluded.
|
||||
sliceRanges[0][i].end = end; // We'll finalize a negative value later.
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -109,8 +119,7 @@ public:
|
||||
CV_Assert(sliceRanges[i].size() <= inpShape.size());
|
||||
for (int j = 0; j < sliceRanges[i].size(); ++j)
|
||||
{
|
||||
outputs[i][j] = std::min(sliceRanges[i][j].end, inpShape[j]) -
|
||||
std::max(sliceRanges[i][j].start, 0);
|
||||
outputs[i][j] = clamp(sliceRanges[i][j], inpShape[j]).size();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -152,8 +161,7 @@ public:
|
||||
// Clamp.
|
||||
for (int j = 0; j < sliceRanges[i].size(); ++j)
|
||||
{
|
||||
sliceRanges[i][j].start = std::max(0, sliceRanges[i][j].start);
|
||||
sliceRanges[i][j].end = std::min(sliceRanges[i][j].end, inpShape[j]);
|
||||
sliceRanges[i][j] = clamp(sliceRanges[i][j], inpShape[j]);
|
||||
}
|
||||
// Fill the rest of ranges.
|
||||
for (int j = sliceRanges[i].size(); j < inpShape[-1]; ++j)
|
||||
|
@ -617,7 +617,7 @@ struct TorchImporter : public ::cv::dnn::Importer
|
||||
curModule->modules.push_back(cv::Ptr<Module>(new Module(nnName, "Sigmoid")));
|
||||
readObject();
|
||||
}
|
||||
else if (nnName == "SpatialBatchNormalization")
|
||||
else if (nnName == "SpatialBatchNormalization" || nnName == "InstanceNormalization")
|
||||
{
|
||||
newModule->apiType = "BatchNorm";
|
||||
readTorchTable(scalarParams, tensorParams);
|
||||
@ -626,19 +626,31 @@ struct TorchImporter : public ::cv::dnn::Importer
|
||||
float eps = float(scalarParams.get<double>("eps"));
|
||||
layerParams.set("eps", eps);
|
||||
|
||||
CV_Assert((tensorParams.count("running_var") || tensorParams.count("running_std")) &&
|
||||
tensorParams.count("running_mean"));
|
||||
if (tensorParams.count("running_mean"))
|
||||
{
|
||||
layerParams.blobs.push_back(tensorParams["running_mean"].second);
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert(scalarParams.has("nOutput"));
|
||||
layerParams.blobs.push_back(Mat::zeros(1, scalarParams.get<int>("nOutput"), CV_32F));
|
||||
}
|
||||
|
||||
if (tensorParams.count("running_var"))
|
||||
{
|
||||
layerParams.blobs.push_back(tensorParams["running_var"].second);
|
||||
}
|
||||
else
|
||||
else if (tensorParams.count("running_std"))
|
||||
{
|
||||
layerParams.blobs.push_back(tensorParams["running_std"].second);
|
||||
pow(layerParams.blobs.back(), -2, layerParams.blobs.back());
|
||||
subtract(layerParams.blobs.back(), eps, layerParams.blobs.back());
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert(scalarParams.has("nOutput"));
|
||||
layerParams.blobs.push_back(Mat::ones(1, scalarParams.get<int>("nOutput"), CV_32F));
|
||||
}
|
||||
|
||||
if (tensorParams.count("weight"))
|
||||
{
|
||||
@ -652,6 +664,16 @@ struct TorchImporter : public ::cv::dnn::Importer
|
||||
layerParams.blobs.push_back(tensorParams["bias"].second);
|
||||
}
|
||||
|
||||
if (nnName == "InstanceNormalization")
|
||||
{
|
||||
cv::Ptr<Module> mvnModule(new Module(nnName));
|
||||
mvnModule->apiType = "MVN";
|
||||
curModule->modules.push_back(mvnModule);
|
||||
|
||||
layerParams.blobs[0].setTo(0); // batch norm's mean
|
||||
layerParams.blobs[1].setTo(1); // batch norm's std
|
||||
}
|
||||
|
||||
curModule->modules.push_back(newModule);
|
||||
}
|
||||
else if (nnName == "PReLU")
|
||||
@ -691,7 +713,9 @@ struct TorchImporter : public ::cv::dnn::Importer
|
||||
layerParams.set("scale", scale);
|
||||
curModule->modules.push_back(newModule);
|
||||
}
|
||||
else if (nnName == "Identity")
|
||||
// TotalVariation layer is from fast-neural-style project: https://github.com/jcjohnson/fast-neural-style
|
||||
// It's a loss function that has an Identity forward.
|
||||
else if (nnName == "Identity" || nnName == "TotalVariation")
|
||||
{
|
||||
readTorchTable(scalarParams, tensorParams);
|
||||
newModule->apiType = "Identity";
|
||||
@ -866,7 +890,7 @@ struct TorchImporter : public ::cv::dnn::Importer
|
||||
layerParams.set("scale", scalarParams.get<float>("constant_scalar"));
|
||||
curModule->modules.push_back(newModule);
|
||||
}
|
||||
else if (nnName == "SpatialZeroPadding")
|
||||
else if (nnName == "SpatialZeroPadding" || nnName == "SpatialReflectionPadding")
|
||||
{
|
||||
readTorchTable(scalarParams, tensorParams);
|
||||
CV_Assert(scalarParams.has("pad_l"), scalarParams.has("pad_r"),
|
||||
@ -889,6 +913,26 @@ struct TorchImporter : public ::cv::dnn::Importer
|
||||
paddings[5] = padRight;
|
||||
layerParams.set("paddings", DictValue::arrayInt<int*>(&paddings[0], paddings.size()));
|
||||
layerParams.set("input_dims", 3);
|
||||
|
||||
if (nnName == "SpatialReflectionPadding")
|
||||
layerParams.set("type", "reflect");
|
||||
|
||||
curModule->modules.push_back(newModule);
|
||||
}
|
||||
else if (nnName == "ShaveImage")
|
||||
{
|
||||
// ShaveImage layer is from fast-neural-style project: https://github.com/jcjohnson/fast-neural-style
|
||||
// It may be mapped to Slice layer.
|
||||
readTorchTable(scalarParams, tensorParams);
|
||||
CV_Assert(scalarParams.has("size"));
|
||||
int size = scalarParams.get<int>("size");
|
||||
|
||||
int begins[] = {0, 0, size, size};
|
||||
int ends[] = {-1, -1, -size - 1, -size - 1};
|
||||
|
||||
newModule->apiType = "Slice";
|
||||
layerParams.set("begin", DictValue::arrayInt<int*>(&begins[0], 4));
|
||||
layerParams.set("end", DictValue::arrayInt<int*>(&ends[0], 4));
|
||||
curModule->modules.push_back(newModule);
|
||||
}
|
||||
else
|
||||
|
@ -231,6 +231,7 @@ TEST(Torch_Importer, net_padding)
|
||||
{
|
||||
runTorchNet("net_padding", DNN_TARGET_CPU, "", false, true);
|
||||
runTorchNet("net_spatial_zero_padding", DNN_TARGET_CPU, "", false, true);
|
||||
runTorchNet("net_spatial_reflection_padding", DNN_TARGET_CPU, "", false, true);
|
||||
}
|
||||
|
||||
TEST(Torch_Importer, ENet_accuracy)
|
||||
@ -338,6 +339,49 @@ OCL_TEST(Torch_Importer, ENet_accuracy)
|
||||
}
|
||||
}
|
||||
|
||||
// Check accuracy of style transfer models from https://github.com/jcjohnson/fast-neural-style
|
||||
// th fast_neural_style.lua \
|
||||
// -input_image ~/opencv_extra/testdata/dnn/googlenet_1.png \
|
||||
// -output_image lena.png \
|
||||
// -median_filter 0 \
|
||||
// -image_size 0 \
|
||||
// -model models/eccv16/starry_night.t7
|
||||
// th fast_neural_style.lua \
|
||||
// -input_image ~/opencv_extra/testdata/dnn/googlenet_1.png \
|
||||
// -output_image lena.png \
|
||||
// -median_filter 0 \
|
||||
// -image_size 0 \
|
||||
// -model models/instance_norm/feathers.t7
|
||||
TEST(Torch_Importer, FastNeuralStyle_accuracy)
|
||||
{
|
||||
std::string models[] = {"dnn/fast_neural_style_eccv16_starry_night.t7",
|
||||
"dnn/fast_neural_style_instance_norm_feathers.t7"};
|
||||
std::string targets[] = {"dnn/lena_starry_night.png", "dnn/lena_feathers.png"};
|
||||
|
||||
for (int i = 0; i < 2; ++i)
|
||||
{
|
||||
const string model = findDataFile(models[i], false);
|
||||
Net net = readNetFromTorch(model);
|
||||
|
||||
Mat img = imread(findDataFile("dnn/googlenet_1.png", false));
|
||||
Mat inputBlob = blobFromImage(img, 1.0, Size(), Scalar(103.939, 116.779, 123.68), false);
|
||||
|
||||
net.setInput(inputBlob);
|
||||
Mat out = net.forward();
|
||||
|
||||
// Deprocessing.
|
||||
getPlane(out, 0, 0) += 103.939;
|
||||
getPlane(out, 0, 1) += 116.779;
|
||||
getPlane(out, 0, 2) += 123.68;
|
||||
out = cv::min(cv::max(0, out), 255);
|
||||
|
||||
Mat ref = imread(findDataFile(targets[i]));
|
||||
Mat refBlob = blobFromImage(ref, 1.0, Size(), Scalar(), false);
|
||||
|
||||
normAssert(out, refBlob, "", 0.5, 1.1);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
51
samples/dnn/fast_neural_style.py
Normal file
51
samples/dnn/fast_neural_style.py
Normal file
@ -0,0 +1,51 @@
|
||||
import cv2 as cv
|
||||
import numpy as np
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='This script is used to run style transfer models from '
|
||||
'https://github.com/jcjohnson/fast-neural-style using OpenCV')
|
||||
parser.add_argument('--input', help='Path to image or video. Skip to capture frames from camera')
|
||||
parser.add_argument('--model', help='Path to .t7 model')
|
||||
parser.add_argument('--width', default=-1, type=int, help='Resize input to specific width.')
|
||||
parser.add_argument('--height', default=-1, type=int, help='Resize input to specific height.')
|
||||
parser.add_argument('--median_filter', default=0, type=int, help='Kernel size of postprocessing blurring.')
|
||||
args = parser.parse_args()
|
||||
|
||||
net = cv.dnn.readNetFromTorch(args.model)
|
||||
|
||||
if args.input:
|
||||
cap = cv.VideoCapture(args.input)
|
||||
else:
|
||||
cap = cv.VideoCapture(0)
|
||||
|
||||
cv.namedWindow('Styled image', cv.WINDOW_NORMAL)
|
||||
while cv.waitKey(1) < 0:
|
||||
hasFrame, frame = cap.read()
|
||||
if not hasFrame:
|
||||
cv.waitKey()
|
||||
break
|
||||
|
||||
inWidth = args.width if args.width != -1 else frame.shape[1]
|
||||
inHeight = args.height if args.height != -1 else frame.shape[0]
|
||||
inp = cv.dnn.blobFromImage(frame, 1.0, (inWidth, inHeight),
|
||||
(103.939, 116.779, 123.68), swapRB=False, crop=False)
|
||||
|
||||
net.setInput(inp)
|
||||
out = net.forward()
|
||||
|
||||
out = out.reshape(3, out.shape[2], out.shape[3])
|
||||
out[0] += 103.939
|
||||
out[1] += 116.779
|
||||
out[2] += 123.68
|
||||
out /= 255
|
||||
out = out.transpose(1, 2, 0)
|
||||
|
||||
t, _ = net.getPerfProfile()
|
||||
freq = cv.getTickFrequency() / 1000
|
||||
print t / freq, 'ms'
|
||||
|
||||
if args.median_filter:
|
||||
out = cv.medianBlur(out, args.median_filter)
|
||||
|
||||
cv.imshow('Styled image', out)
|
Loading…
Reference in New Issue
Block a user