mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 09:25:45 +08:00
Custom deep learning layers in Python
This commit is contained in:
parent
ca1975cada
commit
d5b9563263
@ -190,3 +190,37 @@ In our case resize's output shape will be stored in layer's `blobs[0]`.
|
||||
Next we register a layer and try to import the model.
|
||||
|
||||
@snippet dnn/custom_layers.cpp Register ResizeBilinearLayer
|
||||
|
||||
## Define a custom layer in Python
|
||||
The following example shows how to customize OpenCV's layers in Python.
|
||||
|
||||
Let's consider [Holistically-Nested Edge Detection](https://arxiv.org/abs/1504.06375)
|
||||
deep learning model. That was trained with one and only difference comparing to
|
||||
a current version of [Caffe framework](http://caffe.berkeleyvision.org/). `Crop`
|
||||
layers that receive two input blobs and crop the first one to match spatial dimensions
|
||||
of the second one used to crop from the center. Nowadays Caffe's layer does it
|
||||
from the top-left corner. So using the latest version of Caffe or OpenCV you'll
|
||||
get shifted results with filled borders.
|
||||
|
||||
Next we're going to replace OpenCV's `Crop` layer that makes top-left cropping by
|
||||
a centric one.
|
||||
|
||||
- Create a class with `getMemoryShapes` and `forward` methods
|
||||
|
||||
@snippet dnn/edge_detection.py CropLayer
|
||||
|
||||
@note Both methods should return lists.
|
||||
|
||||
- Register a new layer.
|
||||
|
||||
@snippet dnn/edge_detection.py Register
|
||||
|
||||
That's it! We've replaced an implemented OpenCV's layer to a custom one.
|
||||
You may find a full script in the [source code](https://github.com/opencv/opencv/tree/master/samples/dnn/edge_detection.py).
|
||||
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td></td>
|
||||
<td></td>
|
||||
</tr>
|
||||
</table>
|
||||
|
BIN
doc/tutorials/dnn/images/lena_hed.jpg
Normal file
BIN
doc/tutorials/dnn/images/lena_hed.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 38 KiB |
@ -40,4 +40,182 @@ bool pyopencv_to(PyObject *o, std::vector<Mat> &blobs, const char *name) //requi
|
||||
return pyopencvVecConverter<Mat>::to(o, blobs, ArgInfo(name, false));
|
||||
}
|
||||
|
||||
#endif
|
||||
template<typename T>
|
||||
PyObject* pyopencv_from(const dnn::DictValue &dv)
|
||||
{
|
||||
if (dv.size() > 1)
|
||||
{
|
||||
std::vector<T> vec(dv.size());
|
||||
for (int i = 0; i < dv.size(); ++i)
|
||||
vec[i] = dv.get<T>(i);
|
||||
return pyopencv_from_generic_vec(vec);
|
||||
}
|
||||
else
|
||||
return pyopencv_from(dv.get<T>());
|
||||
}
|
||||
|
||||
template<>
|
||||
PyObject* pyopencv_from(const dnn::DictValue &dv)
|
||||
{
|
||||
if (dv.isInt()) return pyopencv_from<int>(dv);
|
||||
if (dv.isReal()) return pyopencv_from<float>(dv);
|
||||
if (dv.isString()) return pyopencv_from<String>(dv);
|
||||
CV_Error(Error::StsNotImplemented, "Unknown value type");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
template<>
|
||||
PyObject* pyopencv_from(const dnn::LayerParams& lp)
|
||||
{
|
||||
PyObject* dict = PyDict_New();
|
||||
for (std::map<String, dnn::DictValue>::const_iterator it = lp.begin(); it != lp.end(); ++it)
|
||||
{
|
||||
CV_Assert(!PyDict_SetItemString(dict, it->first.c_str(), pyopencv_from(it->second)));
|
||||
}
|
||||
return dict;
|
||||
}
|
||||
|
||||
class pycvLayer CV_FINAL : public dnn::Layer
|
||||
{
|
||||
public:
|
||||
pycvLayer(const dnn::LayerParams ¶ms, PyObject* pyLayer) : Layer(params)
|
||||
{
|
||||
PyGILState_STATE gstate;
|
||||
gstate = PyGILState_Ensure();
|
||||
|
||||
PyObject* args = PyTuple_New(2);
|
||||
CV_Assert(!PyTuple_SetItem(args, 0, pyopencv_from(params)));
|
||||
CV_Assert(!PyTuple_SetItem(args, 1, pyopencv_from(params.blobs)));
|
||||
o = PyObject_CallObject(pyLayer, args);
|
||||
|
||||
Py_DECREF(args);
|
||||
PyGILState_Release(gstate);
|
||||
if (!o)
|
||||
CV_Error(Error::StsError, "Failed to create an instance of custom layer");
|
||||
}
|
||||
|
||||
static void registerLayer(const std::string& type, PyObject* o)
|
||||
{
|
||||
std::map<std::string, std::vector<PyObject*> >::iterator it = pyLayers.find(type);
|
||||
if (it != pyLayers.end())
|
||||
it->second.push_back(o);
|
||||
else
|
||||
pyLayers[type] = std::vector<PyObject*>(1, o);
|
||||
}
|
||||
|
||||
static void unregisterLayer(const std::string& type)
|
||||
{
|
||||
std::map<std::string, std::vector<PyObject*> >::iterator it = pyLayers.find(type);
|
||||
if (it != pyLayers.end())
|
||||
{
|
||||
if (it->second.size() > 1)
|
||||
it->second.pop_back();
|
||||
else
|
||||
pyLayers.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
static Ptr<dnn::Layer> create(dnn::LayerParams ¶ms)
|
||||
{
|
||||
std::map<std::string, std::vector<PyObject*> >::iterator it = pyLayers.find(params.type);
|
||||
if (it == pyLayers.end())
|
||||
CV_Error(Error::StsNotImplemented, "Layer with a type \"" + params.type +
|
||||
"\" is not implemented");
|
||||
CV_Assert(!it->second.empty());
|
||||
return Ptr<dnn::Layer>(new pycvLayer(params, it->second.back()));
|
||||
}
|
||||
|
||||
virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
|
||||
const int,
|
||||
std::vector<std::vector<int> > &outputs,
|
||||
std::vector<std::vector<int> > &) const CV_OVERRIDE
|
||||
{
|
||||
PyGILState_STATE gstate;
|
||||
gstate = PyGILState_Ensure();
|
||||
|
||||
PyObject* args = PyList_New(inputs.size());
|
||||
for(size_t i = 0; i < inputs.size(); ++i)
|
||||
PyList_SET_ITEM(args, i, pyopencv_from_generic_vec(inputs[i]));
|
||||
|
||||
PyObject* res = PyObject_CallMethodObjArgs(o, PyString_FromString("getMemoryShapes"), args, NULL);
|
||||
Py_DECREF(args);
|
||||
PyGILState_Release(gstate);
|
||||
if (!res)
|
||||
CV_Error(Error::StsNotImplemented, "Failed to call \"getMemoryShapes\" method");
|
||||
pyopencv_to_generic_vec(res, outputs, ArgInfo("", 0));
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &) CV_OVERRIDE
|
||||
{
|
||||
PyGILState_STATE gstate;
|
||||
gstate = PyGILState_Ensure();
|
||||
|
||||
std::vector<Mat> inps(inputs.size());
|
||||
for (size_t i = 0; i < inputs.size(); ++i)
|
||||
inps[i] = *inputs[i];
|
||||
|
||||
PyObject* args = pyopencv_from(inps);
|
||||
PyObject* res = PyObject_CallMethodObjArgs(o, PyString_FromString("forward"), args, NULL);
|
||||
Py_DECREF(args);
|
||||
PyGILState_Release(gstate);
|
||||
if (!res)
|
||||
CV_Error(Error::StsNotImplemented, "Failed to call \"forward\" method");
|
||||
|
||||
std::vector<Mat> pyOutputs;
|
||||
pyopencv_to(res, pyOutputs, ArgInfo("", 0));
|
||||
|
||||
CV_Assert(pyOutputs.size() == outputs.size());
|
||||
for (size_t i = 0; i < outputs.size(); ++i)
|
||||
{
|
||||
CV_Assert(pyOutputs[i].size == outputs[i].size);
|
||||
CV_Assert(pyOutputs[i].type() == outputs[i].type());
|
||||
pyOutputs[i].copyTo(outputs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
}
|
||||
|
||||
private:
|
||||
// Map layers types to python classes.
|
||||
static std::map<std::string, std::vector<PyObject*> > pyLayers;
|
||||
PyObject* o; // Instance of implemented python layer.
|
||||
};
|
||||
|
||||
std::map<std::string, std::vector<PyObject*> > pycvLayer::pyLayers;
|
||||
|
||||
static PyObject *pyopencv_cv_dnn_registerLayer(PyObject*, PyObject *args, PyObject *kw)
|
||||
{
|
||||
const char *keywords[] = { "type", "class", NULL };
|
||||
char* layerType;
|
||||
PyObject *classInstance;
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kw, "sO", (char**)keywords, &layerType, &classInstance))
|
||||
return NULL;
|
||||
if (!PyCallable_Check(classInstance)) {
|
||||
PyErr_SetString(PyExc_TypeError, "class must be callable");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pycvLayer::registerLayer(layerType, classInstance);
|
||||
dnn::LayerFactory::registerLayer(layerType, pycvLayer::create);
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
static PyObject *pyopencv_cv_dnn_unregisterLayer(PyObject*, PyObject *args, PyObject *kw)
|
||||
{
|
||||
const char *keywords[] = { "type", NULL };
|
||||
char* layerType;
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kw, "s", (char**)keywords, &layerType))
|
||||
return NULL;
|
||||
|
||||
pycvLayer::unregisterLayer(layerType);
|
||||
dnn::LayerFactory::unregisterLayer(layerType);
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
#endif // HAVE_OPENCV_DNN
|
||||
|
@ -1783,6 +1783,10 @@ static PyMethodDef special_methods[] = {
|
||||
{"createTrackbar", pycvCreateTrackbar, METH_VARARGS, "createTrackbar(trackbarName, windowName, value, count, onChange) -> None"},
|
||||
{"createButton", (PyCFunction)pycvCreateButton, METH_VARARGS | METH_KEYWORDS, "createButton(buttonName, onChange [, userData, buttonType, initialButtonState]) -> None"},
|
||||
{"setMouseCallback", (PyCFunction)pycvSetMouseCallback, METH_VARARGS | METH_KEYWORDS, "setMouseCallback(windowName, onMouse [, param]) -> None"},
|
||||
#endif
|
||||
#ifdef HAVE_OPENCV_DNN
|
||||
{"dnn_registerLayer", (PyCFunction)pyopencv_cv_dnn_registerLayer, METH_VARARGS | METH_KEYWORDS, "registerLayer(type, class) -> None"},
|
||||
{"dnn_unregisterLayer", (PyCFunction)pyopencv_cv_dnn_unregisterLayer, METH_VARARGS | METH_KEYWORDS, "unregisterLayer(type) -> None"},
|
||||
#endif
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
69
samples/dnn/edge_detection.py
Normal file
69
samples/dnn/edge_detection.py
Normal file
@ -0,0 +1,69 @@
|
||||
import cv2 as cv
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='This sample shows how to define custom OpenCV deep learning layers in Python. '
|
||||
'Holistically-Nested Edge Detection (https://arxiv.org/abs/1504.06375) neural network '
|
||||
'is used as an example model. Find a pre-trained model at https://github.com/s9xie/hed.')
|
||||
parser.add_argument('--input', help='Path to image or video. Skip to capture frames from camera')
|
||||
parser.add_argument('--prototxt', help='Path to deploy.prototxt', required=True)
|
||||
parser.add_argument('--caffemodel', help='Path to hed_pretrained_bsds.caffemodel', required=True)
|
||||
parser.add_argument('--width', help='Resize input image to a specific width', default=500, type=int)
|
||||
parser.add_argument('--height', help='Resize input image to a specific height', default=500, type=int)
|
||||
args = parser.parse_args()
|
||||
|
||||
#! [CropLayer]
|
||||
class CropLayer(object):
|
||||
def __init__(self, params, blobs):
|
||||
self.xstart = 0
|
||||
self.xend = 0
|
||||
self.ystart = 0
|
||||
self.yend = 0
|
||||
|
||||
# Our layer receives two inputs. We need to crop the first input blob
|
||||
# to match a shape of the second one (keeping batch size and number of channels)
|
||||
def getMemoryShapes(self, inputs):
|
||||
inputShape, targetShape = inputs[0], inputs[1]
|
||||
batchSize, numChannels = inputShape[0], inputShape[1]
|
||||
height, width = targetShape[2], targetShape[3]
|
||||
|
||||
self.ystart = (inputShape[2] - targetShape[2]) / 2
|
||||
self.xstart = (inputShape[3] - targetShape[3]) / 2
|
||||
self.yend = self.ystart + height
|
||||
self.xend = self.xstart + width
|
||||
|
||||
return [[batchSize, numChannels, height, width]]
|
||||
|
||||
def forward(self, inputs):
|
||||
return [inputs[0][:,:,self.ystart:self.yend,self.xstart:self.xend]]
|
||||
#! [CropLayer]
|
||||
|
||||
#! [Register]
|
||||
cv.dnn_registerLayer('Crop', CropLayer)
|
||||
#! [Register]
|
||||
|
||||
# Load the model.
|
||||
net = cv.dnn.readNet(args.prototxt, args.caffemodel)
|
||||
|
||||
kWinName = 'Holistically-Nested Edge Detection'
|
||||
cv.namedWindow('Input', cv.WINDOW_NORMAL)
|
||||
cv.namedWindow(kWinName, cv.WINDOW_NORMAL)
|
||||
|
||||
cap = cv.VideoCapture(args.input if args.input else 0)
|
||||
while cv.waitKey(1) < 0:
|
||||
hasFrame, frame = cap.read()
|
||||
if not hasFrame:
|
||||
cv.waitKey()
|
||||
break
|
||||
|
||||
cv.imshow('Input', frame)
|
||||
|
||||
inp = cv.dnn.blobFromImage(frame, scalefactor=1.0, size=(args.width, args.height),
|
||||
mean=(104.00698793, 116.66876762, 122.67891434),
|
||||
swapRB=False, crop=False)
|
||||
net.setInput(inp)
|
||||
|
||||
out = net.forward()
|
||||
out = out[0, 0]
|
||||
out = cv.resize(out, (frame.shape[1], frame.shape[0]))
|
||||
cv.imshow(kWinName, out)
|
Loading…
Reference in New Issue
Block a user