mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 09:25:45 +08:00
Merge pull request #25810 from fengyuentau:python/fix_parsing_3d_mat_in_dnn
python: attempts to fix 3d mat parsing problem for dnn #25810 Fixes https://github.com/opencv/opencv/issues/25762 https://github.com/opencv/opencv/issues/23242 Relates https://github.com/opencv/opencv/issues/25763 https://github.com/opencv/opencv/issues/19091 Although `cv.Mat` has already been introduced to workaround this problem, people do not know it and it kind of leads to confusion with `numpy.array`. This patch adds a "switch" to turn off the auto multichannel feature when the API is from cv::dnn::Net (more specifically, `setInput`) and the parameter is of type `Mat`. This patch only leads to changes of three places in `pyopencv_generated_types_content.h`: ```.diff static PyObject* pyopencv_cv_dnn_dnn_Net_setInput(PyObject* self, PyObject* py_args, PyObject* kw) { ... - pyopencv_to_safe(pyobj_blob, blob, ArgInfo("blob", 0)) && + pyopencv_to_safe(pyobj_blob, blob, ArgInfo("blob", 8)) && ... } // I guess we also need to change this as one-channel blob is expected for param static PyObject* pyopencv_cv_dnn_dnn_Net_setParam(PyObject* self, PyObject* py_args, PyObject* kw) { ... - pyopencv_to_safe(pyobj_blob, blob, ArgInfo("blob", 0)) ) + pyopencv_to_safe(pyobj_blob, blob, ArgInfo("blob", 8)) ) ... - pyopencv_to_safe(pyobj_blob, blob, ArgInfo("blob", 0)) ) + pyopencv_to_safe(pyobj_blob, blob, ArgInfo("blob", 8)) ) ... } ``` Others are unchanged, e.g. `dnn_SegmentationModel` and stuff like that. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
e644ab0b40
commit
5510718381
@ -478,6 +478,7 @@ Cv64suf;
|
|||||||
#define CV_OUT
|
#define CV_OUT
|
||||||
#define CV_PROP
|
#define CV_PROP
|
||||||
#define CV_PROP_RW
|
#define CV_PROP_RW
|
||||||
|
#define CV_ND // Indicates that input data should be parsed into Mat without channels
|
||||||
#define CV_WRAP
|
#define CV_WRAP
|
||||||
#define CV_WRAP_AS(synonym)
|
#define CV_WRAP_AS(synonym)
|
||||||
#define CV_WRAP_MAPPABLE(mappable)
|
#define CV_WRAP_MAPPABLE(mappable)
|
||||||
|
@ -642,13 +642,13 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
* @param outputName name for layer which output is needed to get
|
* @param outputName name for layer which output is needed to get
|
||||||
* @details If @p outputName is empty, runs forward pass for the whole network.
|
* @details If @p outputName is empty, runs forward pass for the whole network.
|
||||||
*/
|
*/
|
||||||
CV_WRAP void forward(OutputArrayOfArrays outputBlobs, const String& outputName = String());
|
CV_WRAP void forward(CV_ND OutputArrayOfArrays outputBlobs, const String& outputName = String());
|
||||||
|
|
||||||
/** @brief Runs forward pass to compute outputs of layers listed in @p outBlobNames.
|
/** @brief Runs forward pass to compute outputs of layers listed in @p outBlobNames.
|
||||||
* @param outputBlobs contains blobs for first outputs of specified layers.
|
* @param outputBlobs contains blobs for first outputs of specified layers.
|
||||||
* @param outBlobNames names for layers which outputs are needed to get
|
* @param outBlobNames names for layers which outputs are needed to get
|
||||||
*/
|
*/
|
||||||
CV_WRAP void forward(OutputArrayOfArrays outputBlobs,
|
CV_WRAP void forward(CV_ND OutputArrayOfArrays outputBlobs,
|
||||||
const std::vector<String>& outBlobNames);
|
const std::vector<String>& outBlobNames);
|
||||||
|
|
||||||
/** @brief Runs forward pass to compute outputs of layers listed in @p outBlobNames.
|
/** @brief Runs forward pass to compute outputs of layers listed in @p outBlobNames.
|
||||||
@ -727,7 +727,7 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
* as:
|
* as:
|
||||||
* \f[input(n,c,h,w) = scalefactor \times (blob(n,c,h,w) - mean_c)\f]
|
* \f[input(n,c,h,w) = scalefactor \times (blob(n,c,h,w) - mean_c)\f]
|
||||||
*/
|
*/
|
||||||
CV_WRAP void setInput(InputArray blob, const String& name = "",
|
CV_WRAP void setInput(CV_ND InputArray blob, const String& name = "",
|
||||||
double scalefactor = 1.0, const Scalar& mean = Scalar());
|
double scalefactor = 1.0, const Scalar& mean = Scalar());
|
||||||
|
|
||||||
/** @brief Sets the new value for the learned param of the layer.
|
/** @brief Sets the new value for the learned param of the layer.
|
||||||
@ -738,8 +738,8 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
* @note If shape of the new blob differs from the previous shape,
|
* @note If shape of the new blob differs from the previous shape,
|
||||||
* then the following forward pass may fail.
|
* then the following forward pass may fail.
|
||||||
*/
|
*/
|
||||||
CV_WRAP void setParam(int layer, int numParam, const Mat &blob);
|
CV_WRAP void setParam(int layer, int numParam, CV_ND const Mat &blob);
|
||||||
CV_WRAP inline void setParam(const String& layerName, int numParam, const Mat &blob) { return setParam(getLayerId(layerName), numParam, blob); }
|
CV_WRAP inline void setParam(const String& layerName, int numParam, CV_ND const Mat &blob) { return setParam(getLayerId(layerName), numParam, blob); }
|
||||||
|
|
||||||
/** @brief Returns parameter blob of the layer.
|
/** @brief Returns parameter blob of the layer.
|
||||||
* @param layer name or id of the layer.
|
* @param layer name or id of the layer.
|
||||||
|
@ -455,10 +455,6 @@ class dnn_test(NewOpenCVTests):
|
|||||||
"Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.")
|
"Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.")
|
||||||
|
|
||||||
input = np.load(input_file)
|
input = np.load(input_file)
|
||||||
# we have to expand the shape of input tensor because Python bindings cut 3D tensors to 2D
|
|
||||||
# it should be fixed in future. see : https://github.com/opencv/opencv/issues/19091
|
|
||||||
# please remove `expand_dims` after that
|
|
||||||
input = np.expand_dims(input, axis=3)
|
|
||||||
gold_output = np.load(output_file)
|
gold_output = np.load(output_file)
|
||||||
|
|
||||||
for backend, target in self.dnnBackendsAndTargets:
|
for backend, target in self.dnnBackendsAndTargets:
|
||||||
@ -469,10 +465,63 @@ class dnn_test(NewOpenCVTests):
|
|||||||
net.setPreferableBackend(backend)
|
net.setPreferableBackend(backend)
|
||||||
net.setPreferableTarget(target)
|
net.setPreferableTarget(target)
|
||||||
|
|
||||||
|
# Check whether 3d shape is parsed correctly for setInput
|
||||||
net.setInput(input)
|
net.setInput(input)
|
||||||
real_output = net.forward()
|
|
||||||
|
|
||||||
normAssert(self, real_output, gold_output, "", getDefaultThreshold(target))
|
# Case 0: test API `forward(const String& outputName = String()`
|
||||||
|
real_output = net.forward() # Retval is a np.array of shape [2, 5, 3]
|
||||||
|
normAssert(self, real_output, gold_output, "Case 1", getDefaultThreshold(target))
|
||||||
|
|
||||||
|
'''
|
||||||
|
Pre-allocate output memory with correct shape.
|
||||||
|
Normally Python users do not use in this way,
|
||||||
|
but we have to test it since we design API in this way
|
||||||
|
'''
|
||||||
|
# Case 1: a np.array with a string of output name.
|
||||||
|
# It tests API `forward(OutputArrayOfArrays outputBlobs, const String& outputName = String()`
|
||||||
|
# when outputBlobs is a np.array and we expect it to be the only output.
|
||||||
|
real_output = np.empty([2, 5, 3], dtype=np.float32)
|
||||||
|
real_output = net.forward(real_output, "237") # Retval is a tuple with a np.array of shape [2, 5, 3]
|
||||||
|
normAssert(self, real_output, gold_output, "Case 1", getDefaultThreshold(target))
|
||||||
|
|
||||||
|
# Case 2: a tuple of np.array with a string of output name.
|
||||||
|
# It tests API `forward(OutputArrayOfArrays outputBlobs, const String& outputName = String()`
|
||||||
|
# when outputBlobs is a container of several np.array and we expect to save all outputs accordingly.
|
||||||
|
real_output = tuple(np.empty([2, 5, 3], dtype=np.float32))
|
||||||
|
real_output = net.forward(real_output, "237") # Retval is a tuple with a np.array of shape [2, 5, 3]
|
||||||
|
normAssert(self, real_output, gold_output, "Case 2", getDefaultThreshold(target))
|
||||||
|
|
||||||
|
# Case 3: a tuple of np.array with a string of output name.
|
||||||
|
# It tests API `forward(OutputArrayOfArrays outputBlobs, const std::vector<String>& outBlobNames)`
|
||||||
|
real_output = tuple(np.empty([2, 5, 3], dtype=np.float32))
|
||||||
|
# Note that it does not support parsing a list , e.g. ["237"]
|
||||||
|
real_output = net.forward(real_output, ("237")) # Retval is a tuple with a np.array of shape [2, 5, 3]
|
||||||
|
normAssert(self, real_output, gold_output, "Case 3", getDefaultThreshold(target))
|
||||||
|
|
||||||
|
def test_set_param_3d(self):
|
||||||
|
model_path = self.find_dnn_file('dnn/onnx/models/matmul_3d_init.onnx')
|
||||||
|
input_file = self.find_dnn_file('dnn/onnx/data/input_matmul_3d_init.npy')
|
||||||
|
output_file = self.find_dnn_file('dnn/onnx/data/output_matmul_3d_init.npy')
|
||||||
|
|
||||||
|
input = np.load(input_file)
|
||||||
|
output = np.load(output_file)
|
||||||
|
|
||||||
|
for backend, target in self.dnnBackendsAndTargets:
|
||||||
|
printParams(backend, target)
|
||||||
|
|
||||||
|
net = cv.dnn.readNet(model_path)
|
||||||
|
|
||||||
|
node_name = net.getLayerNames()[0]
|
||||||
|
w = net.getParam(node_name, 0) # returns the original tensor of three-dimensional shape
|
||||||
|
net.setParam(node_name, 0, w) # set param once again to see whether tensor is converted with correct shape
|
||||||
|
|
||||||
|
net.setPreferableBackend(backend)
|
||||||
|
net.setPreferableTarget(target)
|
||||||
|
|
||||||
|
net.setInput(input)
|
||||||
|
res_output = net.forward()
|
||||||
|
|
||||||
|
normAssert(self, output, res_output, "", getDefaultThreshold(target))
|
||||||
|
|
||||||
def test_scalefactor_assign(self):
|
def test_scalefactor_assign(self):
|
||||||
params = cv.dnn.Image2BlobParams()
|
params = cv.dnn.Image2BlobParams()
|
||||||
|
@ -46,19 +46,22 @@ private:
|
|||||||
static const uint32_t arg_outputarg_flag = 0x1;
|
static const uint32_t arg_outputarg_flag = 0x1;
|
||||||
static const uint32_t arg_arithm_op_src_flag = 0x2;
|
static const uint32_t arg_arithm_op_src_flag = 0x2;
|
||||||
static const uint32_t arg_pathlike_flag = 0x4;
|
static const uint32_t arg_pathlike_flag = 0x4;
|
||||||
|
static const uint32_t arg_nd_mat_flag = 0x8;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
const char* name;
|
const char* name;
|
||||||
bool outputarg;
|
bool outputarg;
|
||||||
bool arithm_op_src;
|
bool arithm_op_src;
|
||||||
bool pathlike;
|
bool pathlike;
|
||||||
|
bool nd_mat;
|
||||||
// more fields may be added if necessary
|
// more fields may be added if necessary
|
||||||
|
|
||||||
ArgInfo(const char* name_, uint32_t arg_) :
|
ArgInfo(const char* name_, uint32_t arg_) :
|
||||||
name(name_),
|
name(name_),
|
||||||
outputarg((arg_ & arg_outputarg_flag) != 0),
|
outputarg((arg_ & arg_outputarg_flag) != 0),
|
||||||
arithm_op_src((arg_ & arg_arithm_op_src_flag) != 0),
|
arithm_op_src((arg_ & arg_arithm_op_src_flag) != 0),
|
||||||
pathlike((arg_ & arg_pathlike_flag) != 0) {}
|
pathlike((arg_ & arg_pathlike_flag) != 0),
|
||||||
|
nd_mat((arg_ & arg_nd_mat_flag) != 0) {}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ArgInfo(const ArgInfo&) = delete;
|
ArgInfo(const ArgInfo&) = delete;
|
||||||
|
@ -173,7 +173,7 @@ bool pyopencv_to(PyObject* o, Mat& m, const ArgInfo& info)
|
|||||||
|
|
||||||
CV_LOG_DEBUG(NULL, "Incoming ndarray '" << info.name << "': ndims=" << ndims << " _sizes=" << pycv_dumpArray(_sizes, ndims) << " _strides=" << pycv_dumpArray(_strides, ndims));
|
CV_LOG_DEBUG(NULL, "Incoming ndarray '" << info.name << "': ndims=" << ndims << " _sizes=" << pycv_dumpArray(_sizes, ndims) << " _strides=" << pycv_dumpArray(_strides, ndims));
|
||||||
|
|
||||||
bool ismultichannel = ndims == 3 && _sizes[2] <= CV_CN_MAX;
|
bool ismultichannel = ndims == 3 && _sizes[2] <= CV_CN_MAX && !info.nd_mat;
|
||||||
if (pyopencv_Mat_TypePtr && PyObject_TypeCheck(o, pyopencv_Mat_TypePtr))
|
if (pyopencv_Mat_TypePtr && PyObject_TypeCheck(o, pyopencv_Mat_TypePtr))
|
||||||
{
|
{
|
||||||
bool wrapChannels = false;
|
bool wrapChannels = false;
|
||||||
|
@ -340,22 +340,38 @@ static bool pyopencv_to_generic_vec(PyObject* obj, std::vector<Tp>& value, const
|
|||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (!PySequence_Check(obj))
|
if (info.nd_mat && PyArray_Check(obj))
|
||||||
{
|
{
|
||||||
failmsg("Can't parse '%s'. Input argument doesn't provide sequence protocol", info.name);
|
/*
|
||||||
return false;
|
If obj is marked as nd mat and of array type, it is parsed to a single
|
||||||
}
|
mat in the target vector to avoid being split into multiple mats
|
||||||
const size_t n = static_cast<size_t>(PySequence_Size(obj));
|
*/
|
||||||
value.resize(n);
|
value.resize(1);
|
||||||
for (size_t i = 0; i < n; i++)
|
if (!pyopencv_to(obj, value.front(), info))
|
||||||
{
|
|
||||||
SafeSeqItem item_wrap(obj, i);
|
|
||||||
if (!pyopencv_to(item_wrap.item, value[i], info))
|
|
||||||
{
|
{
|
||||||
failmsg("Can't parse '%s'. Sequence item with index %lu has a wrong type", info.name, i);
|
failmsg("Can't parse '%s'. Array item has a wrong type", info.name);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else // parse as sequence
|
||||||
|
{
|
||||||
|
if (!PySequence_Check(obj))
|
||||||
|
{
|
||||||
|
failmsg("Can't parse '%s'. Input argument doesn't provide sequence protocol", info.name);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const size_t n = static_cast<size_t>(PySequence_Size(obj));
|
||||||
|
value.resize(n);
|
||||||
|
for (size_t i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
SafeSeqItem item_wrap(obj, i);
|
||||||
|
if (!pyopencv_to(item_wrap.item, value[i], info))
|
||||||
|
{
|
||||||
|
failmsg("Can't parse '%s'. Sequence item with index %lu has a wrong type", info.name, i);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -488,6 +488,10 @@ class ArgInfo(object):
|
|||||||
return self.name + '_'
|
return self.name + '_'
|
||||||
return self.name
|
return self.name
|
||||||
|
|
||||||
|
@property
|
||||||
|
def nd_mat(self):
|
||||||
|
return '/ND' in self._modifiers
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def inputarg(self):
|
def inputarg(self):
|
||||||
return '/O' not in self._modifiers
|
return '/O' not in self._modifiers
|
||||||
@ -528,6 +532,7 @@ class ArgInfo(object):
|
|||||||
arg = 0x01 if self.outputarg else 0x0
|
arg = 0x01 if self.outputarg else 0x0
|
||||||
arg += 0x02 if self.arithm_op_src_arg else 0x0
|
arg += 0x02 if self.arithm_op_src_arg else 0x0
|
||||||
arg += 0x04 if self.pathlike else 0x0
|
arg += 0x04 if self.pathlike else 0x0
|
||||||
|
arg += 0x08 if self.nd_mat else 0x0
|
||||||
return "ArgInfo(\"%s\", %d)" % (self.name, arg)
|
return "ArgInfo(\"%s\", %d)" % (self.name, arg)
|
||||||
|
|
||||||
|
|
||||||
|
@ -82,6 +82,10 @@ class CppHeaderParser(object):
|
|||||||
modlist = []
|
modlist = []
|
||||||
|
|
||||||
# pass 0: extracts the modifiers
|
# pass 0: extracts the modifiers
|
||||||
|
if "CV_ND" in arg_str:
|
||||||
|
modlist.append("/ND")
|
||||||
|
arg_str = arg_str.replace("CV_ND", "")
|
||||||
|
|
||||||
if "CV_OUT" in arg_str:
|
if "CV_OUT" in arg_str:
|
||||||
modlist.append("/O")
|
modlist.append("/O")
|
||||||
arg_str = arg_str.replace("CV_OUT", "")
|
arg_str = arg_str.replace("CV_OUT", "")
|
||||||
|
Loading…
Reference in New Issue
Block a user