mirror of
https://github.com/opencv/opencv.git
synced 2025-01-18 06:03:15 +08:00
Merge remote-tracking branch 'upstream/3.4' into merge-3.4
This commit is contained in:
commit
b45273eccb
@ -414,6 +414,29 @@ void Mat::copyTo( OutputArray _dst, InputArray _mask ) const
|
||||
copymask(ptrs[0], 0, ptrs[2], 0, ptrs[1], 0, sz, &esz);
|
||||
}
|
||||
|
||||
|
||||
static bool can_apply_memset(const Mat &mat, const Scalar &s, int &fill_value)
|
||||
{
|
||||
// check if depth is 1 byte.
|
||||
switch (mat.depth())
|
||||
{
|
||||
case CV_8U: fill_value = saturate_cast<uchar>( s.val[0] ); break;
|
||||
case CV_8S: fill_value = saturate_cast<schar>( s.val[0] ); break;
|
||||
default: return false;
|
||||
}
|
||||
|
||||
// check if all element is same.
|
||||
const int64* is = (const int64*)&s.val[0];
|
||||
switch (mat.channels())
|
||||
{
|
||||
case 1: return true;
|
||||
case 2: return (is[0] == is[1]);
|
||||
case 3: return (is[0] == is[1] && is[1] == is[2]);
|
||||
case 4: return (is[0] == is[1] && is[1] == is[2] && is[2] == is[3]);
|
||||
default: return false;
|
||||
}
|
||||
}
|
||||
|
||||
Mat& Mat::operator = (const Scalar& s)
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
@ -434,6 +457,14 @@ Mat& Mat::operator = (const Scalar& s)
|
||||
}
|
||||
else
|
||||
{
|
||||
int fill_value = 0;
|
||||
if ( can_apply_memset(*this, s, fill_value) )
|
||||
{
|
||||
for (size_t i = 0; i < it.nplanes; i++, ++it)
|
||||
memset(dptr, fill_value, elsize);
|
||||
return *this;
|
||||
}
|
||||
|
||||
if( it.nplanes > 0 )
|
||||
{
|
||||
double scalar[12];
|
||||
|
@ -561,7 +561,7 @@ void cv::cuda::GpuMat::convertTo(OutputArray _dst, int rtype, Stream& stream) co
|
||||
{convertToNoScale<double, uchar>, convertToNoScale<double, schar>, convertToNoScale<double, ushort>, convertToNoScale<double, short>, convertToNoScale<double, int>, convertToNoScale<double, float>, 0}
|
||||
};
|
||||
|
||||
funcs[sdepth][ddepth](reshape(1), dst.reshape(1), stream);
|
||||
funcs[sdepth][ddepth](src.reshape(1), dst.reshape(1), stream);
|
||||
}
|
||||
|
||||
void cv::cuda::GpuMat::convertTo(OutputArray _dst, int rtype, double alpha, double beta, Stream& stream) const
|
||||
@ -591,7 +591,7 @@ void cv::cuda::GpuMat::convertTo(OutputArray _dst, int rtype, double alpha, doub
|
||||
{convertToScale<double, uchar>, convertToScale<double, schar>, convertToScale<double, ushort>, convertToScale<double, short>, convertToScale<double, int>, convertToScale<double, float>, convertToScale<double, double>}
|
||||
};
|
||||
|
||||
funcs[sdepth][ddepth](reshape(1), dst.reshape(1), alpha, beta, stream);
|
||||
funcs[sdepth][ddepth](src.reshape(1), dst.reshape(1), alpha, beta, stream);
|
||||
}
|
||||
|
||||
void cv::cuda::convertFp16(InputArray _src, OutputArray _dst, Stream& stream)
|
||||
|
@ -6464,6 +6464,9 @@ struct Image2D::Impl
|
||||
CV_Error(Error::OpenCLApiCallError, "OpenCL runtime not found!");
|
||||
|
||||
cl_context context = (cl_context)Context::getDefault().ptr();
|
||||
if (!context)
|
||||
return false;
|
||||
|
||||
// Figure out how many formats are supported by this context.
|
||||
cl_uint numFormats = 0;
|
||||
cl_int err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
|
||||
|
@ -119,7 +119,6 @@ public:
|
||||
CV_CheckEQ(inputs.size(), (size_t)2, "");
|
||||
numOutput = inputs[1].back();
|
||||
cAxis = inputs[0].size() - 1;
|
||||
CV_CheckEQ(numOutput, inputs[0][cAxis - 1], "");
|
||||
int dims = inputs[0].size();
|
||||
CV_CheckEQ(inputs[1].size(), (size_t)dims, "");
|
||||
CV_CheckGE(dims, 2, "");
|
||||
|
@ -108,6 +108,8 @@ public:
|
||||
type = AVE;
|
||||
else if (pool == "stochastic")
|
||||
type = STOCHASTIC;
|
||||
else if (pool == "sum")
|
||||
type = SUM;
|
||||
else
|
||||
CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\"");
|
||||
|
||||
@ -209,7 +211,7 @@ public:
|
||||
return type == MAX || type == AVE;
|
||||
}
|
||||
else
|
||||
return type != STOCHASTIC;
|
||||
return type != STOCHASTIC && type != SUM;
|
||||
}
|
||||
#endif
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
@ -304,7 +306,7 @@ public:
|
||||
maxPooling(inputs[0], outputs[0], mask);
|
||||
break;
|
||||
}
|
||||
case AVE:
|
||||
case AVE: case SUM:
|
||||
CV_Assert_N(inputs.size() == 1, outputs.size() == 1);
|
||||
avePooling(inputs[0], outputs[0]);
|
||||
break;
|
||||
@ -513,7 +515,7 @@ public:
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
CV_Assert_N((inputs.size() == 1 && (type == MAX || type == AVE)) || inputs.size() == 2, nodes.size() == inputs.size());
|
||||
CV_Assert_N((inputs.size() == 1 && (type == MAX || type == AVE || type == SUM)) || inputs.size() == 2, nodes.size() == inputs.size());
|
||||
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
|
||||
ngraph::op::PadType pad_type = ngraph::op::PadType::EXPLICIT;
|
||||
@ -528,6 +530,19 @@ public:
|
||||
exclude_pad, rounding_type, pad_type);
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(ave_pool));
|
||||
}
|
||||
else if (type == SUM) {
|
||||
ngraph::Shape inpShape = ieInpNode->get_shape();
|
||||
CV_Assert(inpShape.size() == 2 + kernel_size.size());
|
||||
std::vector<int64_t> axes;
|
||||
for (size_t i = 0; i < kernel_size.size(); i++)
|
||||
{
|
||||
if (inpShape[2 + i] == kernel_size[i])
|
||||
axes.push_back(2 + i);
|
||||
}
|
||||
auto reduction_axes = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{axes.size()}, axes);
|
||||
auto reduce_sum = std::make_shared<ngraph::op::v1::ReduceSum>(ieInpNode, reduction_axes, true);
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(reduce_sum));
|
||||
}
|
||||
else if (type == MAX) {
|
||||
auto max_pool = std::make_shared<ngraph::op::v1::MaxPool>(ieInpNode, ngraph::Strides(strides),
|
||||
ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
|
||||
@ -887,7 +902,7 @@ public:
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (poolingType == AVE)
|
||||
else if (poolingType == AVE || poolingType == SUM)
|
||||
{
|
||||
for( ; x0 < x1; ++x0)
|
||||
{
|
||||
@ -898,7 +913,7 @@ public:
|
||||
xend = min(xend, inp_width);
|
||||
float inv_kernel_area = avePoolPaddedArea ? xdelta * ydelta * ddelta :
|
||||
((dend - dstart) * (yend - ystart) * (xend - xstart));
|
||||
inv_kernel_area = 1.0 / inv_kernel_area;
|
||||
inv_kernel_area = poolingType == AVE ? 1.0 / inv_kernel_area : 1.0;
|
||||
#if CV_SIMD128
|
||||
if( isPool2D && xstart > 0 && x0 + 7 < x1 && (x0 + 7) * stride_w - pad_l + kernel_w < inp_width )
|
||||
{
|
||||
@ -1243,6 +1258,7 @@ private:
|
||||
MAX,
|
||||
AVE,
|
||||
STOCHASTIC,
|
||||
SUM,
|
||||
ROI, // RoI pooling, https://arxiv.org/pdf/1504.08083.pdf
|
||||
PSROI // Position-sensitive RoI pooling, https://arxiv.org/pdf/1605.06409.pdf
|
||||
};
|
||||
|
@ -262,6 +262,24 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class ExpandSubgraph : public Subgraph
|
||||
{
|
||||
public:
|
||||
ExpandSubgraph()
|
||||
{
|
||||
int input = addNodeToMatch("");
|
||||
int values = addNodeToMatch("");
|
||||
int init = addNodeToMatch("ConstantOfShape", values);
|
||||
int coeff = addNodeToMatch("Constant");
|
||||
int mul = addNodeToMatch("Mul", init, coeff);
|
||||
int shape = addNodeToMatch("Constant");
|
||||
int condition = addNodeToMatch("Equal", shape, mul);
|
||||
int where = addNodeToMatch("Where", condition, init, addNodeToMatch("Constant"));
|
||||
addNodeToMatch("Expand", input, where);
|
||||
setFusedNode("Expand", input, shape);
|
||||
}
|
||||
};
|
||||
|
||||
class MulCastSubgraph : public Subgraph
|
||||
{
|
||||
public:
|
||||
@ -459,6 +477,7 @@ void simplifySubgraphs(opencv_onnx::GraphProto& net)
|
||||
subgraphs.push_back(makePtr<NormalizeSubgraph3>());
|
||||
subgraphs.push_back(makePtr<BatchNormalizationSubgraph1>());
|
||||
subgraphs.push_back(makePtr<BatchNormalizationSubgraph2>());
|
||||
subgraphs.push_back(makePtr<ExpandSubgraph>());
|
||||
|
||||
simplifySubgraphs(Ptr<ImportGraphWrapper>(new ONNXGraphWrapper(net)), subgraphs);
|
||||
}
|
||||
|
@ -387,26 +387,42 @@ void ONNXImporter::populateNet(Net dstNet)
|
||||
layerParams.set("ceil_mode", layerParams.has("pad_mode"));
|
||||
layerParams.set("ave_pool_padded_area", framework_name == "pytorch");
|
||||
}
|
||||
else if (layer_type == "GlobalAveragePool" || layer_type == "GlobalMaxPool" || layer_type == "ReduceMean")
|
||||
else if (layer_type == "GlobalAveragePool" || layer_type == "GlobalMaxPool" ||
|
||||
layer_type == "ReduceMean" || layer_type == "ReduceSum")
|
||||
{
|
||||
CV_Assert(node_proto.input_size() == 1);
|
||||
layerParams.type = "Pooling";
|
||||
layerParams.set("pool", layer_type == "GlobalMaxPool"? "MAX" : "AVE");
|
||||
String pool;
|
||||
if (layer_type == "GlobalMaxPool")
|
||||
pool = "MAX";
|
||||
else if (layer_type == "ReduceSum")
|
||||
pool = "SUM";
|
||||
else
|
||||
pool = "AVE";
|
||||
layerParams.set("pool", pool);
|
||||
layerParams.set("global_pooling", layer_type == "GlobalAveragePool" || layer_type == "GlobalMaxPool");
|
||||
|
||||
if (layer_type == "ReduceMean")
|
||||
if (layer_type == "ReduceMean" || layer_type == "ReduceSum")
|
||||
{
|
||||
if (layerParams.get<int>("keepdims") == 0 || !layerParams.has("axes"))
|
||||
CV_Error(Error::StsNotImplemented, "Unsupported mode of ReduceMean operation.");
|
||||
if (!layerParams.has("axes"))
|
||||
CV_Error(Error::StsNotImplemented, "Unsupported mode of " + layer_type + " operation.");
|
||||
|
||||
MatShape inpShape = outShapes[node_proto.input(0)];
|
||||
DictValue axes = layerParams.get("axes");
|
||||
bool keepdims = layerParams.get<int>("keepdims");
|
||||
MatShape targetShape = inpShape;
|
||||
for (int i = 0; i < axes.size(); i++) {
|
||||
int axis = clamp(axes.get<int>(i), inpShape.size());
|
||||
if (keepdims) {
|
||||
targetShape[axis] = 1;
|
||||
} else {
|
||||
targetShape.erase(targetShape.begin() + axis);
|
||||
}
|
||||
}
|
||||
|
||||
if (inpShape.size() == 3 && axes.size() <= 2)
|
||||
{
|
||||
int axis = axes.get<int>(0);
|
||||
int axis = clamp(axes.get<int>(0), inpShape.size());
|
||||
CV_CheckNE(axis, 0, "");
|
||||
outShapes[layerParams.name] = inpShape;
|
||||
outShapes[layerParams.name][axis] = 1;
|
||||
|
||||
LayerParams reshapeLp;
|
||||
reshapeLp.name = layerParams.name + "/reshape";
|
||||
@ -426,13 +442,12 @@ void ONNXImporter::populateNet(Net dstNet)
|
||||
avgLp.name = layerParams.name + "/avg";
|
||||
avgLp.type = "Pooling";
|
||||
CV_Assert(layer_id.find(avgLp.name) == layer_id.end());
|
||||
avgLp.set("pool", "ave");
|
||||
avgLp.set("pool", pool);
|
||||
if (axes.size() == 2)
|
||||
{
|
||||
CV_CheckEQ(axes.get<int>(0), 1, "Unsupported ReduceMean mode");
|
||||
CV_CheckEQ(axes.get<int>(1), 2, "Unsupported ReduceMean mode");
|
||||
CV_CheckEQ(clamp(axes.get<int>(0), inpShape.size()), 1, ("Unsupported " + layer_type + " mode").c_str());
|
||||
CV_CheckEQ(clamp(axes.get<int>(1), inpShape.size()), 2, ("Unsupported " + layer_type + " mode").c_str());
|
||||
avgLp.set("global_pooling", true);
|
||||
outShapes[layerParams.name][axes.get<int>(1)] = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -443,28 +458,33 @@ void ONNXImporter::populateNet(Net dstNet)
|
||||
node_proto.set_input(0, reshapeLp.name);
|
||||
node_proto.set_output(0, avgLp.name);
|
||||
addLayer(dstNet, avgLp, node_proto, layer_id, outShapes);
|
||||
|
||||
layerParams.type = "Flatten";
|
||||
layerParams.set("axis", 0);
|
||||
layerParams.set("end_axis", 1);
|
||||
|
||||
node_proto.set_input(0, avgLp.name);
|
||||
node_proto.set_output(0, layerParams.name);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (inpShape.size() != 4 && inpShape.size() != 5)
|
||||
CV_Error(Error::StsNotImplemented, "Unsupported input shape of reduce_mean operation.");
|
||||
CV_Error(Error::StsNotImplemented, "Unsupported input shape of " + layer_type + " operation.");
|
||||
|
||||
CV_Assert(axes.size() <= inpShape.size() - 2);
|
||||
std::vector<int> kernel_size(inpShape.size() - 2, 1);
|
||||
for (int i = 0; i < axes.size(); i++) {
|
||||
int axis = axes.get<int>(i);
|
||||
int axis = clamp(axes.get<int>(i), inpShape.size());
|
||||
CV_Assert_N(axis >= 2 + i, axis < inpShape.size());
|
||||
kernel_size[axis - 2] = inpShape[axis];
|
||||
}
|
||||
layerParams.set("kernel_size", DictValue::arrayInt(&kernel_size[0], kernel_size.size()));
|
||||
LayerParams poolLp = layerParams;
|
||||
poolLp.name = layerParams.name + "/avg";
|
||||
CV_Assert(layer_id.find(poolLp.name) == layer_id.end());
|
||||
poolLp.set("kernel_size", DictValue::arrayInt(&kernel_size[0], kernel_size.size()));
|
||||
|
||||
node_proto.set_output(0, poolLp.name);
|
||||
addLayer(dstNet, poolLp, node_proto, layer_id, outShapes);
|
||||
}
|
||||
|
||||
layerParams.type = "Reshape";
|
||||
layerParams.set("dim", DictValue::arrayInt(&targetShape[0], targetShape.size()));
|
||||
|
||||
node_proto.set_input(0, node_proto.output(0));
|
||||
node_proto.set_output(0, layerParams.name);
|
||||
}
|
||||
}
|
||||
else if (layer_type == "Slice")
|
||||
@ -641,6 +661,17 @@ void ONNXImporter::populateNet(Net dstNet)
|
||||
{
|
||||
layerParams.type = "Scale";
|
||||
layerParams.set("bias_term", true);
|
||||
int axis = 1;
|
||||
for (int i = 0; i < graph_proto.initializer_size(); i++)
|
||||
{
|
||||
opencv_onnx::TensorProto tensor_proto = graph_proto.initializer(i);
|
||||
if (tensor_proto.name() == node_proto.input(const_blob_id))
|
||||
{
|
||||
axis = inpShape.size() - tensor_proto.dims_size();
|
||||
break;
|
||||
}
|
||||
}
|
||||
layerParams.set("axis", axis);
|
||||
blob = blob.reshape(1, 1);
|
||||
layerParams.blobs.push_back((isSub ? -1 : 1) * blob);
|
||||
}
|
||||
@ -911,13 +942,20 @@ void ONNXImporter::populateNet(Net dstNet)
|
||||
CV_Assert(node_proto.input_size() == 2);
|
||||
layerParams.type = "InnerProduct";
|
||||
layerParams.set("bias_term", false);
|
||||
CV_Assert(constBlobs.find(node_proto.input(0)) == constBlobs.end());
|
||||
int firstInpDims = outShapes[node_proto.input(0)].size();
|
||||
int secondInpDims;
|
||||
|
||||
if (constBlobs.find(node_proto.input(1)) != constBlobs.end())
|
||||
{
|
||||
Mat blob = getBlob(node_proto, constBlobs, 1);
|
||||
secondInpDims = blob.dims;
|
||||
layerParams.blobs.push_back(blob.t());
|
||||
layerParams.set("num_output", layerParams.blobs[0].size[0]);
|
||||
} else {
|
||||
secondInpDims = outShapes[node_proto.input(1)].size();
|
||||
}
|
||||
layerParams.set("axis", firstInpDims - secondInpDims + 1);
|
||||
}
|
||||
else if (layer_type == "Mul" || layer_type == "Div")
|
||||
{
|
||||
@ -983,15 +1021,10 @@ void ONNXImporter::populateNet(Net dstNet)
|
||||
{
|
||||
Mat inp0 = getBlob(node_proto, constBlobs, 0);
|
||||
Mat inp1 = getBlob(node_proto, constBlobs, 1);
|
||||
if (inp0.size != inp1.size)
|
||||
if (inp0.size != inp1.size && inp1.total() != 1)
|
||||
CV_Error(Error::StsNotImplemented, "Constant multiply with different shapes");
|
||||
|
||||
Mat out;
|
||||
if (isDiv)
|
||||
divide(inp0, inp1, out);
|
||||
else
|
||||
multiply(inp0, inp1, out);
|
||||
|
||||
Mat out = isDiv ? inp0 / inp1 : inp0.mul(inp1);
|
||||
out = out.reshape(1, inp0.dims, inp0.size);
|
||||
out.dims = inp0.dims; // to workaround dims == 1
|
||||
addConstant(layerParams.name, out, constBlobs, outShapes);
|
||||
@ -1162,9 +1195,45 @@ void ONNXImporter::populateNet(Net dstNet)
|
||||
Mat newShapeMat = getBlob(node_proto, constBlobs, 1);
|
||||
MatShape targetShape(newShapeMat.ptr<int>(), newShapeMat.ptr<int>() + newShapeMat.total());
|
||||
|
||||
shapeIt = outShapes.find(node_proto.input(0));
|
||||
CV_Assert(shapeIt != outShapes.end());
|
||||
MatShape inpShape = shapeIt->second;
|
||||
MatShape inpShape;
|
||||
bool haveVariables = constBlobs.find(node_proto.input(0)) == constBlobs.end();
|
||||
if (haveVariables)
|
||||
{
|
||||
shapeIt = outShapes.find(node_proto.input(0));
|
||||
CV_Assert(shapeIt != outShapes.end());
|
||||
inpShape = shapeIt->second;
|
||||
}
|
||||
else
|
||||
{
|
||||
inpShape = shape(getBlob(node_proto, constBlobs, 0));
|
||||
}
|
||||
|
||||
String srcName = node_proto.input(0);
|
||||
// Unsqueeze and repeat along new axis
|
||||
if (targetShape.size() == inpShape.size() + 1)
|
||||
{
|
||||
for (int i = 0; i < targetShape.size(); i++)
|
||||
{
|
||||
if (targetShape[i] == -1 && i < inpShape.size())
|
||||
targetShape[i] = inpShape[i];
|
||||
else if (i < inpShape.size() && targetShape[i] != inpShape[i])
|
||||
inpShape.insert(inpShape.begin() + i, 1);
|
||||
}
|
||||
if (haveVariables)
|
||||
{
|
||||
LayerParams reshapeLp;
|
||||
reshapeLp.name = layerParams.name + "/reshape";
|
||||
reshapeLp.type = "Reshape";
|
||||
CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
|
||||
reshapeLp.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size()));
|
||||
|
||||
opencv_onnx::NodeProto proto;
|
||||
proto.add_input(node_proto.input(0));
|
||||
proto.add_output(reshapeLp.name);
|
||||
addLayer(dstNet, reshapeLp, proto, layer_id, outShapes);
|
||||
srcName = reshapeLp.name;
|
||||
}
|
||||
}
|
||||
CV_CheckEQ(inpShape.size(), targetShape.size(), "Unsupported Expand op with different dims");
|
||||
|
||||
std::vector<int> broadcast_axes;
|
||||
@ -1179,6 +1248,19 @@ void ONNXImporter::populateNet(Net dstNet)
|
||||
}
|
||||
}
|
||||
|
||||
if (!haveVariables)
|
||||
{
|
||||
if (broadcast_axes.size() != 1)
|
||||
CV_Error(Error::StsNotImplemented, "Expand op doesn't support multiple axes for constant input");
|
||||
|
||||
Mat input = getBlob(node_proto, constBlobs, 0);
|
||||
input = input.reshape(0, total(inpShape, 0, broadcast_axes[0]));
|
||||
Mat output = cv::repeat(input, 1, targetShape[broadcast_axes[0]]);
|
||||
output = output.reshape(0, targetShape);
|
||||
addConstant(layerParams.name, output, constBlobs, outShapes);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (broadcast_axes.size() == 2 &&
|
||||
broadcast_axes[0] == broadcast_axes[1] - 1 && broadcast_axes[1] == inpShape.size() - 1)
|
||||
{
|
||||
@ -1213,6 +1295,7 @@ void ONNXImporter::populateNet(Net dstNet)
|
||||
CV_Assert(layer_id.find(copyLP.name) == layer_id.end());
|
||||
input_names.push_back(copyLP.name);
|
||||
|
||||
node_proto.set_input(0, srcName);
|
||||
node_proto.set_output(0, copyLP.name);
|
||||
addLayer(dstNet, copyLP, node_proto, layer_id, outShapes);
|
||||
}
|
||||
@ -1223,6 +1306,7 @@ void ONNXImporter::populateNet(Net dstNet)
|
||||
}
|
||||
layerParams.set("axis", broadcast_axes[0]);
|
||||
layerParams.type = "Concat";
|
||||
node_proto.set_output(0, layerParams.name);
|
||||
}
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, "Unsupported Expand op");
|
||||
@ -1395,6 +1479,7 @@ void ONNXImporter::populateNet(Net dstNet)
|
||||
|
||||
inpShape.erase(inpShape.begin() + axis);
|
||||
layerParams.type = "Reshape";
|
||||
layerParams.set("axis", 0);
|
||||
layerParams.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size()));
|
||||
node_proto.set_input(0, sliceLp.name);
|
||||
}
|
||||
|
@ -2067,7 +2067,7 @@ void TFImporter::populateNet(Net dstNet)
|
||||
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
|
||||
connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1);
|
||||
}
|
||||
else if (type == "Mean")
|
||||
else if (type == "Mean" || type == "Sum")
|
||||
{
|
||||
// Computes the mean of elements across dimensions of a tensor.
|
||||
// If keepdims is false (default) reduces input_tensor along the dimensions given in axis,
|
||||
@ -2116,7 +2116,7 @@ void TFImporter::populateNet(Net dstNet)
|
||||
LayerParams avgLp;
|
||||
std::string avgName = name + "/avg";
|
||||
CV_Assert(layer_id.find(avgName) == layer_id.end());
|
||||
avgLp.set("pool", "ave");
|
||||
avgLp.set("pool", type == "Mean" ? "ave" : "sum");
|
||||
// pooling kernel H x 1
|
||||
avgLp.set("global_pooling_h", true);
|
||||
avgLp.set("kernel_w", 1);
|
||||
@ -2153,11 +2153,44 @@ void TFImporter::populateNet(Net dstNet)
|
||||
layer_id[name] = id;
|
||||
connect(layer_id, dstNet, Pin(avgName), id, 0);
|
||||
connect(layer_id, dstNet, Pin(layerShapeName), id, 1);
|
||||
} else if (indices.total() == 1) {
|
||||
int axis = toNCHW(indices.at<int>(0));
|
||||
if (axis == 2 || axis == 3)
|
||||
{
|
||||
layerParams.set("pool", type == "Mean" ? "ave" : "sum");
|
||||
layerParams.set(axis == 2 ? "kernel_w" : "kernel_h", 1);
|
||||
layerParams.set(axis == 2 ? "global_pooling_h" : "global_pooling_w", true);
|
||||
int id = dstNet.addLayer(name, "Pooling", layerParams);
|
||||
layer_id[name] = id;
|
||||
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
|
||||
|
||||
if (!keepDims)
|
||||
{
|
||||
// To keep correct order after squeeze dims we first need to change layout from NCHW to NHWC
|
||||
LayerParams permLP;
|
||||
int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC.
|
||||
permLP.set("order", DictValue::arrayInt<int*>(order, 4));
|
||||
std::string permName = name + "/nchw";
|
||||
CV_Assert(layer_id.find(permName) == layer_id.end());
|
||||
int permId = dstNet.addLayer(permName, "Permute", permLP);
|
||||
layer_id[permName] = permId;
|
||||
connect(layer_id, dstNet, Pin(name), permId, 0);
|
||||
|
||||
LayerParams squeezeLp;
|
||||
std::string squeezeName = name + "/squeeze";
|
||||
CV_Assert(layer_id.find(squeezeName) == layer_id.end());
|
||||
squeezeLp.set("axis", indices.at<int>(0));
|
||||
squeezeLp.set("end_axis", indices.at<int>(0) + 1);
|
||||
int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp);
|
||||
layer_id[squeezeName] = squeezeId;
|
||||
connect(layer_id, dstNet, Pin(permName), squeezeId, 0);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (indices.total() != 2 || indices.at<int>(0) != 1 || indices.at<int>(1) != 2)
|
||||
CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean operation.");
|
||||
CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean or reduce_sum operation.");
|
||||
|
||||
layerParams.set("pool", "ave");
|
||||
layerParams.set("pool", type == "Mean" ? "ave" : "sum");
|
||||
layerParams.set("global_pooling", true);
|
||||
int id = dstNet.addLayer(name, "Pooling", layerParams);
|
||||
layer_id[name] = id;
|
||||
|
@ -786,6 +786,8 @@ TEST_P(Test_Darknet_layers, connected)
|
||||
|
||||
TEST_P(Test_Darknet_layers, relu)
|
||||
{
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
|
||||
testDarknetLayer("relu");
|
||||
}
|
||||
|
||||
|
@ -2098,4 +2098,436 @@ TEST_P(Layer_Test_BatchNorm, fusion)
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_BatchNorm, dnnBackendsAndTargets());
|
||||
|
||||
class TestLayerFusion : public DNNTestLayer {
|
||||
public:
|
||||
static void makeDefaultTestConvolutionLayer(LayerParams& convParams, int in_channels, int num_filters, bool bias_term)
|
||||
{
|
||||
const int kernel_h = 3, kernel_w = 3;
|
||||
const int pad_h = kernel_h / 2, pad_w = kernel_w / 2;
|
||||
|
||||
convParams.set("kernel_h", kernel_h);
|
||||
convParams.set("kernel_w", kernel_w);
|
||||
convParams.set("pad_h", pad_h);
|
||||
convParams.set("pad_w", pad_w);
|
||||
convParams.set("num_output", num_filters);
|
||||
convParams.set("bias_term", bias_term);
|
||||
convParams.type = "Convolution";
|
||||
convParams.name = "convolution";
|
||||
|
||||
float conv_init_magnitude = 1.0f / in_channels / kernel_h / kernel_w;
|
||||
int weightsShape[] = {num_filters, in_channels, kernel_h, kernel_w};
|
||||
Mat weights(4, &weightsShape[0], CV_32F);
|
||||
randu(weights, -conv_init_magnitude, conv_init_magnitude);
|
||||
convParams.blobs.push_back(weights);
|
||||
if (bias_term)
|
||||
{
|
||||
Mat bias(1, num_filters, CV_32F);
|
||||
randu(bias, -1.0f, 1.0f);
|
||||
convParams.blobs.push_back(bias);
|
||||
}
|
||||
}
|
||||
|
||||
static void makeDefaultTestActivationLayer(LayerParams& activationParams, const std::string& type, int in_channels)
|
||||
{
|
||||
activationParams.type = type;
|
||||
activationParams.name = "activation";
|
||||
if (activationParams.type == "ReLU")
|
||||
activationParams.set("negative_slope", 0.1f);
|
||||
else if (activationParams.type == "Power")
|
||||
{
|
||||
activationParams.set("power", 2.0f);
|
||||
activationParams.set("scale", 0.5f);
|
||||
activationParams.set("shift", 0.3f);
|
||||
}
|
||||
else if (activationParams.type == "ReLU6")
|
||||
{
|
||||
activationParams.set("min_value", -1.0f);
|
||||
activationParams.set("max_value", 1.0f);
|
||||
}
|
||||
else if (activationParams.type == "ChannelsPReLU")
|
||||
{
|
||||
Mat scales(1, in_channels, CV_32F);
|
||||
randu(scales, -1.0f, 1.0f);
|
||||
activationParams.blobs.push_back(scales);
|
||||
}
|
||||
}
|
||||
|
||||
static void makeDefaultTestEltwiseLayer(LayerParams& eltwiseParams, const std::string& op, bool withCoefficients)
|
||||
{
|
||||
eltwiseParams.type = "Eltwise";
|
||||
eltwiseParams.name = "eltwise";
|
||||
eltwiseParams.set("operation", op);
|
||||
if (withCoefficients)
|
||||
{
|
||||
float coeff[] = {0.3f, 0.5f};
|
||||
eltwiseParams.set("coeff", DictValue::arrayReal<float*>(coeff, 2));
|
||||
}
|
||||
}
|
||||
|
||||
static void test(Mat& input, Net& net, Backend backendId, Target targetId, std::vector<int> expectedFusedLayers = std::vector<int>(), double l1 = 0.0, double lInf = 0.0)
|
||||
{
|
||||
DNNTestLayer::checkBackend(backendId, targetId);
|
||||
|
||||
net.enableFusion(false);
|
||||
net.setPreferableBackend(DNN_BACKEND_OPENCV);
|
||||
net.setPreferableTarget(DNN_TARGET_CPU);
|
||||
net.setInput(input);
|
||||
Mat outputReference = net.forward().clone();
|
||||
std::vector<double> refTimings;
|
||||
net.getPerfProfile(refTimings);
|
||||
for (int i = 0; i < refTimings.size(); i++)
|
||||
{
|
||||
CV_Assert(refTimings[i] != 0.0);
|
||||
}
|
||||
|
||||
net.enableFusion(true);
|
||||
net.setPreferableBackend(backendId);
|
||||
net.setPreferableTarget(targetId);
|
||||
net.setInput(input);
|
||||
Mat outputTest = net.forward().clone();
|
||||
std::vector<double> testTimings;
|
||||
net.getPerfProfile(testTimings);
|
||||
for (int i = 0; i < testTimings.size(); i++)
|
||||
{
|
||||
if(std::find(expectedFusedLayers.begin(), expectedFusedLayers.end(), i + 1) != expectedFusedLayers.end())
|
||||
{
|
||||
EXPECT_EQ(testTimings[i], 0.0);
|
||||
}
|
||||
else
|
||||
{
|
||||
EXPECT_NE(testTimings[i], 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
// double ref_max_value, ref_min_value;
|
||||
// minMaxLoc(outputReference.reshape(1, 1), &ref_min_value, &ref_max_value);
|
||||
// std::cout << "reference range: " << ref_min_value << ' ' << ref_max_value << std::endl;
|
||||
|
||||
double default_l1, default_lInf;
|
||||
DNNTestLayer::getDefaultThresholds(backendId, targetId, &default_l1, &default_lInf);
|
||||
if (l1 == 0.0)
|
||||
l1 = default_l1;
|
||||
if (lInf == 0.0)
|
||||
lInf = default_lInf;
|
||||
normAssert(outputReference, outputTest, "", l1, lInf);
|
||||
}
|
||||
|
||||
static testing::internal::ParamGenerator<std::string> eltwiseOpList()
|
||||
{
|
||||
// TODO: automate list generation
|
||||
return Values("sum", "max", "prod", "div");
|
||||
}
|
||||
|
||||
static testing::internal::ParamGenerator<std::string> activationLayersList()
|
||||
{
|
||||
// TODO: automate list generation
|
||||
return Values("ReLU", "ReLU6", "ChannelsPReLU", "TanH", "Swish", "Mish", "Sigmoid", "ELU", "AbsVal", "BNLL", "Power");
|
||||
}
|
||||
|
||||
static testing::internal::ParamGenerator<tuple<Backend, Target> > dnnBackendsAndTargetsForFusionTests()
|
||||
{
|
||||
return dnnBackendsAndTargets(false, false, true, false, false, false); // OCV OpenCL + OCV CPU
|
||||
}
|
||||
};
|
||||
|
||||
typedef TestWithParam<tuple<bool, std::string, tuple<Backend, Target> > > ConvolutionActivationFusion;
|
||||
TEST_P(ConvolutionActivationFusion, Accuracy)
|
||||
{
|
||||
// input
|
||||
// |
|
||||
// -----------------------
|
||||
// | convolution |
|
||||
// -----------------------
|
||||
// |
|
||||
// -----------------------
|
||||
// | activation |
|
||||
// -----------------------
|
||||
// |
|
||||
// output
|
||||
|
||||
const int batch_size = 2, in_channels = 16;
|
||||
const int in_height = 16, in_width = 16;
|
||||
int inputShape[] = {batch_size, in_channels, in_height, in_width};
|
||||
Mat input(4, &inputShape[0], CV_32F);
|
||||
randu(input, 1.0f, 2.0f);
|
||||
|
||||
bool bias_term = get<0>(GetParam());
|
||||
LayerParams convParams;
|
||||
TestLayerFusion::makeDefaultTestConvolutionLayer(convParams, in_channels, in_channels, bias_term);
|
||||
|
||||
std::string actType = get<1>(GetParam());
|
||||
LayerParams activationParams;
|
||||
TestLayerFusion::makeDefaultTestActivationLayer(activationParams, actType, in_channels);
|
||||
|
||||
Backend backendId = get<0>(get<2>(GetParam()));
|
||||
Target targetId = get<1>(get<2>(GetParam()));
|
||||
|
||||
// bug: https://github.com/opencv/opencv/issues/17964
|
||||
if (actType == "Power" && backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16))
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
|
||||
|
||||
// bug: https://github.com/opencv/opencv/issues/17953
|
||||
if (actType == "ChannelsPReLU" && bias_term == false &&
|
||||
backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16))
|
||||
{
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
|
||||
}
|
||||
|
||||
Net net;
|
||||
int convId = net.addLayer(convParams.name, convParams.type, convParams);
|
||||
int activId = net.addLayerToPrev(activationParams.name, activationParams.type, activationParams);
|
||||
net.connect(0, 0, convId, 0);
|
||||
|
||||
std::vector<int> expectedFusedLayers;
|
||||
if (backendId == DNN_BACKEND_OPENCV)
|
||||
{
|
||||
if (targetId == DNN_TARGET_CPU)
|
||||
expectedFusedLayers.push_back(activId); // all activations are fused
|
||||
else if (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16)
|
||||
{
|
||||
if (actType == "ReLU" || actType == "ChannelsPReLU" || actType == "ReLU6" || actType == "TanH" || actType == "Power")
|
||||
expectedFusedLayers.push_back(activId);
|
||||
}
|
||||
}
|
||||
|
||||
TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers);
|
||||
}
|
||||
INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionActivationFusion, Combine(
|
||||
/* bias */ testing::Bool(),
|
||||
/* activation */ TestLayerFusion::activationLayersList(),
|
||||
TestLayerFusion::dnnBackendsAndTargetsForFusionTests()
|
||||
));
|
||||
|
||||
typedef TestWithParam<tuple<bool, std::string, bool, tuple<Backend, Target> > > ConvolutionEltwiseFusion;
|
||||
TEST_P(ConvolutionEltwiseFusion, Accuracy)
|
||||
{
|
||||
// input
|
||||
// |
|
||||
// -------------------------------
|
||||
// | |
|
||||
// | ---------------
|
||||
// | | convolution |
|
||||
// | ---------------
|
||||
// | |
|
||||
// | ---------------- |
|
||||
// --------| eltwise op |-------
|
||||
// ----------------
|
||||
// |
|
||||
// output
|
||||
|
||||
const int batch_size = 2, in_channels = 16;
|
||||
const int in_height = 16, in_width = 16;
|
||||
int inputShape[] = {batch_size, in_channels, in_height, in_width};
|
||||
Mat input(4, &inputShape[0], CV_32F);
|
||||
randu(input, 1.0f, 2.0f); // avoid small values to test eltwise div
|
||||
|
||||
bool bias_term = get<0>(GetParam());
|
||||
LayerParams convParams;
|
||||
TestLayerFusion::makeDefaultTestConvolutionLayer(convParams, in_channels, in_channels, bias_term);
|
||||
|
||||
std::string eltwiseOp = get<1>(GetParam());
|
||||
bool weightedEltwise = get<2>(GetParam());
|
||||
if (eltwiseOp != "sum" && weightedEltwise)
|
||||
throw SkipTestException("weighted eltwise not supported");
|
||||
LayerParams eltwiseParams;
|
||||
TestLayerFusion::makeDefaultTestEltwiseLayer(eltwiseParams, eltwiseOp, weightedEltwise);
|
||||
|
||||
Net net;
|
||||
int convId = net.addLayer(convParams.name, convParams.type, convParams);
|
||||
int eltwiseId = net.addLayer(eltwiseParams.name, eltwiseParams.type, eltwiseParams);
|
||||
net.connect(0, 0, convId, 0);
|
||||
net.connect(convId, 0, eltwiseId, 0);
|
||||
net.connect(0, 0, eltwiseId, 1);
|
||||
|
||||
Backend backendId = get<0>(get<3>(GetParam()));
|
||||
Target targetId = get<1>(get<3>(GetParam()));
|
||||
TestLayerFusion::test(input, net, backendId, targetId);
|
||||
}
|
||||
INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionEltwiseFusion, Combine(
|
||||
/* bias */ testing::Bool(),
|
||||
/* eltwise op */ TestLayerFusion::eltwiseOpList(),
|
||||
/* eltwise weighted */ testing::Bool(),
|
||||
TestLayerFusion::dnnBackendsAndTargetsForFusionTests()
|
||||
));
|
||||
|
||||
typedef TestWithParam<tuple<bool, std::string, bool, std::string, tuple<Backend, Target> > > ConvolutionEltwiseActivationFusion;
|
||||
TEST_P(ConvolutionEltwiseActivationFusion, Accuracy)
|
||||
{
|
||||
// input
|
||||
// |
|
||||
// -------------------------------
|
||||
// | |
|
||||
// | ---------------
|
||||
// | | convolution |
|
||||
// | ---------------
|
||||
// | |
|
||||
// | ---------------- |
|
||||
// --------| eltwise op |-------
|
||||
// ----------------
|
||||
// |
|
||||
// ----------------
|
||||
// | activation |
|
||||
// ----------------
|
||||
// |
|
||||
// output
|
||||
|
||||
const int batch_size = 2, in_channels = 16;
|
||||
const int in_height = 16, in_width = 16;
|
||||
int inputShape[] = {batch_size, in_channels, in_height, in_width};
|
||||
Mat input(4, &inputShape[0], CV_32F);
|
||||
randu(input, 1.0f, 2.0f); // avoid small values to test eltwise div
|
||||
|
||||
bool bias_term = get<0>(GetParam());
|
||||
LayerParams convParams;
|
||||
TestLayerFusion::makeDefaultTestConvolutionLayer(convParams, in_channels, in_channels, bias_term);
|
||||
|
||||
std::string eltwiseOp = get<1>(GetParam());
|
||||
bool weightedEltwise = get<2>(GetParam());
|
||||
if (eltwiseOp != "sum" && weightedEltwise)
|
||||
throw SkipTestException("weighted eltwise not supported");
|
||||
LayerParams eltwiseParams;
|
||||
TestLayerFusion::makeDefaultTestEltwiseLayer(eltwiseParams, eltwiseOp, false);
|
||||
|
||||
std::string actType = get<3>(GetParam());
|
||||
LayerParams activationParams;
|
||||
TestLayerFusion::makeDefaultTestActivationLayer(activationParams, actType, in_channels);
|
||||
|
||||
Backend backendId = get<0>(get<4>(GetParam()));
|
||||
Target targetId = get<1>(get<4>(GetParam()));
|
||||
|
||||
// bug: https://github.com/opencv/opencv/issues/17945
|
||||
if (eltwiseOp != "sum" && backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16))
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
|
||||
|
||||
// bug: https://github.com/opencv/opencv/issues/17953
|
||||
if (eltwiseOp == "sum" && actType == "ChannelsPReLU" && bias_term == false &&
|
||||
backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16))
|
||||
{
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
|
||||
}
|
||||
|
||||
// bug: https://github.com/opencv/opencv/issues/17964
|
||||
if (actType == "Power" && backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16))
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
|
||||
|
||||
Net net;
|
||||
int convId = net.addLayer(convParams.name, convParams.type, convParams);
|
||||
int eltwiseId = net.addLayer(eltwiseParams.name, eltwiseParams.type, eltwiseParams);
|
||||
int activId = net.addLayer(activationParams.name, activationParams.type, activationParams);
|
||||
net.connect(0, 0, convId, 0);
|
||||
net.connect(convId, 0, eltwiseId, 0);
|
||||
net.connect(0, 0, eltwiseId, 1);
|
||||
net.connect(eltwiseId, 0, activId, 0);
|
||||
|
||||
std::vector<int> expectedFusedLayers;
|
||||
if (backendId == DNN_BACKEND_OPENCV)
|
||||
{
|
||||
if (targetId == DNN_TARGET_CPU)
|
||||
expectedFusedLayers.push_back(activId); // activation is fused with eltwise layer
|
||||
else if (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16)
|
||||
{
|
||||
if (actType == "ReLU" || actType == "ChannelsPReLU" || actType == "Power")
|
||||
{
|
||||
expectedFusedLayers.push_back(eltwiseId);
|
||||
expectedFusedLayers.push_back(activId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers);
|
||||
}
|
||||
INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionEltwiseActivationFusion, Combine(
|
||||
/* bias */ testing::Bool(),
|
||||
/* eltwise op */ TestLayerFusion::eltwiseOpList(),
|
||||
/* eltwise weighted */ testing::Bool(),
|
||||
/* activation */ TestLayerFusion::activationLayersList(),
|
||||
TestLayerFusion::dnnBackendsAndTargetsForFusionTests()
|
||||
));
|
||||
|
||||
typedef TestWithParam<tuple<bool, std::string, std::string, bool, tuple<Backend, Target> > > ConvolutionActivationEltwiseFusion;
|
||||
TEST_P(ConvolutionActivationEltwiseFusion, Accuracy)
|
||||
{
|
||||
// input
|
||||
// |
|
||||
// -------------------------------
|
||||
// | |
|
||||
// | ----------------
|
||||
// | | convolution |
|
||||
// | ----------------
|
||||
// | |
|
||||
// | ----------------
|
||||
// | | activation |
|
||||
// | ----------------
|
||||
// | |
|
||||
// | ---------------- |
|
||||
// --------| eltwise sum |-------
|
||||
// ----------------
|
||||
// |
|
||||
|
||||
const int batch_size = 2, in_channels = 16;
|
||||
const int in_height = 16, in_width = 16;
|
||||
int inputShape[] = {batch_size, in_channels, in_height, in_width};
|
||||
Mat input(4, &inputShape[0], CV_32F);
|
||||
randu(input, 1.0f, 2.0f); // avoid small values to test eltwise div
|
||||
|
||||
bool bias_term = get<0>(GetParam());
|
||||
LayerParams convParams;
|
||||
TestLayerFusion::makeDefaultTestConvolutionLayer(convParams, in_channels, in_channels, bias_term);
|
||||
|
||||
std::string actType = get<1>(GetParam());
|
||||
LayerParams activationParams;
|
||||
TestLayerFusion::makeDefaultTestActivationLayer(activationParams, actType, in_channels);
|
||||
|
||||
std::string eltwiseOp = get<2>(GetParam());
|
||||
bool weightedEltwise = get<3>(GetParam());
|
||||
if (eltwiseOp != "sum" && weightedEltwise)
|
||||
throw SkipTestException("weighted eltwise not supported");
|
||||
LayerParams eltwiseParams;
|
||||
TestLayerFusion::makeDefaultTestEltwiseLayer(eltwiseParams, eltwiseOp, false);
|
||||
|
||||
Backend backendId = get<0>(get<4>(GetParam()));
|
||||
Target targetId = get<1>(get<4>(GetParam()));
|
||||
|
||||
// bug: https://github.com/opencv/opencv/issues/17964
|
||||
if (actType == "Power" && backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16))
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
|
||||
|
||||
// bug: https://github.com/opencv/opencv/issues/17953
|
||||
if (actType == "ChannelsPReLU" && bias_term == false &&
|
||||
backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16))
|
||||
{
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
|
||||
}
|
||||
|
||||
Net net;
|
||||
int convId = net.addLayer(convParams.name, convParams.type, convParams);
|
||||
int activId = net.addLayer(activationParams.name, activationParams.type, activationParams);
|
||||
int eltwiseId = net.addLayer(eltwiseParams.name, eltwiseParams.type, eltwiseParams);
|
||||
net.connect(0, 0, convId, 0);
|
||||
net.connect(convId, 0, activId, 0);
|
||||
net.connect(activId, 0, eltwiseId, 0);
|
||||
net.connect(0, 0, eltwiseId, 1);
|
||||
|
||||
std::vector<int> expectedFusedLayers;
|
||||
if (backendId == DNN_BACKEND_OPENCV)
|
||||
{
|
||||
if (targetId == DNN_TARGET_CPU)
|
||||
expectedFusedLayers.push_back(activId); // activation fused with convolution
|
||||
else if (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16)
|
||||
{
|
||||
if (actType == "ReLU" || actType == "ChannelsPReLU" || actType == "ReLU6" || actType == "TanH" || actType == "Power")
|
||||
expectedFusedLayers.push_back(activId); // activation fused with convolution
|
||||
}
|
||||
}
|
||||
|
||||
TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers);
|
||||
}
|
||||
INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionActivationEltwiseFusion, Combine(
|
||||
/* bias */ testing::Bool(),
|
||||
/* activation */ TestLayerFusion::activationLayersList(),
|
||||
/* eltwise op */ TestLayerFusion::eltwiseOpList(),
|
||||
/* eltwise weighted */ testing::Bool(),
|
||||
TestLayerFusion::dnnBackendsAndTargetsForFusionTests()
|
||||
));
|
||||
|
||||
}} // namespace
|
||||
|
@ -270,6 +270,11 @@ TEST_P(Test_ONNX_layers, ReduceMean)
|
||||
testONNXModels("reduce_mean_axis2");
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, ReduceSum)
|
||||
{
|
||||
testONNXModels("reduce_sum");
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, ReduceMean3D)
|
||||
{
|
||||
if (backend == DNN_BACKEND_CUDA)
|
||||
@ -436,10 +441,20 @@ TEST_P(Test_ONNX_layers, MatMul)
|
||||
testONNXModels("matmul_4d");
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, MatMulAdd)
|
||||
{
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
|
||||
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
||||
testONNXModels("matmul_add");
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, Expand)
|
||||
{
|
||||
testONNXModels("expand_batch");
|
||||
testONNXModels("expand_channels");
|
||||
testONNXModels("expand_neg_batch");
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, ExpandHW)
|
||||
|
@ -128,6 +128,13 @@ TEST_P(Test_TensorFlow_layers, reduce_mean)
|
||||
runTensorFlowNet("global_pool_by_axis");
|
||||
}
|
||||
|
||||
TEST_P(Test_TensorFlow_layers, reduce_sum)
|
||||
{
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
|
||||
runTensorFlowNet("sum_pool_by_axis");
|
||||
}
|
||||
|
||||
TEST_P(Test_TensorFlow_layers, conv_single_conv)
|
||||
{
|
||||
runTensorFlowNet("single_conv");
|
||||
@ -354,6 +361,11 @@ TEST_P(Test_TensorFlow_layers, pooling_reduce_mean)
|
||||
runTensorFlowNet("reduce_mean"); // an average pooling over all spatial dimensions.
|
||||
}
|
||||
|
||||
TEST_P(Test_TensorFlow_layers, pooling_reduce_sum)
|
||||
{
|
||||
runTensorFlowNet("reduce_sum"); // a SUM pooling over all spatial dimensions.
|
||||
}
|
||||
|
||||
TEST_P(Test_TensorFlow_layers, max_pool_grad)
|
||||
{
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
||||
|
@ -452,14 +452,10 @@ public:
|
||||
root_[i] = pool_.allocate<KMeansNode>();
|
||||
std::memset(root_[i], 0, sizeof(KMeansNode));
|
||||
|
||||
if(is_kdtree_distance::val || is_vector_space_distance::val) {
|
||||
computeNodeStatistics(root_[i], indices_[i], (unsigned int)size_);
|
||||
computeClustering(root_[i], indices_[i], (int)size_, branching_,0);
|
||||
}
|
||||
else {
|
||||
computeBitfieldNodeStatistics(root_[i], indices_[i], (unsigned int)size_);
|
||||
computeBitfieldClustering(root_[i], indices_[i], (int)size_, branching_,0);
|
||||
}
|
||||
Distance* dummy = NULL;
|
||||
computeNodeStatistics(root_[i], indices_[i], (unsigned int)size_, dummy);
|
||||
|
||||
computeClustering(root_[i], indices_[i], (int)size_, branching_,0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -818,6 +814,413 @@ private:
|
||||
}
|
||||
|
||||
|
||||
template<typename DistType>
|
||||
void computeNodeStatistics(KMeansNodePtr node, int* indices,
|
||||
unsigned int indices_length,
|
||||
const DistType* identifier)
|
||||
{
|
||||
(void)identifier;
|
||||
computeNodeStatistics(node, indices, indices_length);
|
||||
}
|
||||
|
||||
void computeNodeStatistics(KMeansNodePtr node, int* indices,
|
||||
unsigned int indices_length,
|
||||
const cvflann::HammingLUT* identifier)
|
||||
{
|
||||
(void)identifier;
|
||||
computeBitfieldNodeStatistics(node, indices, indices_length);
|
||||
}
|
||||
|
||||
void computeNodeStatistics(KMeansNodePtr node, int* indices,
|
||||
unsigned int indices_length,
|
||||
const cvflann::Hamming<unsigned char>* identifier)
|
||||
{
|
||||
(void)identifier;
|
||||
computeBitfieldNodeStatistics(node, indices, indices_length);
|
||||
}
|
||||
|
||||
void computeNodeStatistics(KMeansNodePtr node, int* indices,
|
||||
unsigned int indices_length,
|
||||
const cvflann::Hamming2<unsigned char>* identifier)
|
||||
{
|
||||
(void)identifier;
|
||||
computeBitfieldNodeStatistics(node, indices, indices_length);
|
||||
}
|
||||
|
||||
|
||||
void refineClustering(int* indices, int indices_length, int branching, CentersType** centers,
|
||||
std::vector<DistanceType>& radiuses, int* belongs_to, int* count)
|
||||
{
|
||||
cv::AutoBuffer<double> dcenters_buf(branching*veclen_);
|
||||
Matrix<double> dcenters(dcenters_buf.data(), branching, veclen_);
|
||||
|
||||
bool converged = false;
|
||||
int iteration = 0;
|
||||
while (!converged && iteration<iterations_) {
|
||||
converged = true;
|
||||
iteration++;
|
||||
|
||||
// compute the new cluster centers
|
||||
for (int i=0; i<branching; ++i) {
|
||||
memset(dcenters[i],0,sizeof(double)*veclen_);
|
||||
radiuses[i] = 0;
|
||||
}
|
||||
for (int i=0; i<indices_length; ++i) {
|
||||
ElementType* vec = dataset_[indices[i]];
|
||||
double* center = dcenters[belongs_to[i]];
|
||||
for (size_t k=0; k<veclen_; ++k) {
|
||||
center[k] += vec[k];
|
||||
}
|
||||
}
|
||||
for (int i=0; i<branching; ++i) {
|
||||
int cnt = count[i];
|
||||
for (size_t k=0; k<veclen_; ++k) {
|
||||
dcenters[i][k] /= cnt;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int> new_centroids(indices_length);
|
||||
std::vector<DistanceType> sq_dists(indices_length);
|
||||
|
||||
// reassign points to clusters
|
||||
KMeansDistanceComputer<Matrix<double> > invoker(
|
||||
distance_, dataset_, branching, indices, dcenters, veclen_, new_centroids, sq_dists);
|
||||
parallel_for_(cv::Range(0, (int)indices_length), invoker);
|
||||
|
||||
for (int i=0; i < (int)indices_length; ++i) {
|
||||
DistanceType sq_dist(sq_dists[i]);
|
||||
int new_centroid(new_centroids[i]);
|
||||
if (sq_dist > radiuses[new_centroid]) {
|
||||
radiuses[new_centroid] = sq_dist;
|
||||
}
|
||||
if (new_centroid != belongs_to[i]) {
|
||||
count[belongs_to[i]]--;
|
||||
count[new_centroid]++;
|
||||
belongs_to[i] = new_centroid;
|
||||
converged = false;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i=0; i<branching; ++i) {
|
||||
// if one cluster converges to an empty cluster,
|
||||
// move an element into that cluster
|
||||
if (count[i]==0) {
|
||||
int j = (i+1)%branching;
|
||||
while (count[j]<=1) {
|
||||
j = (j+1)%branching;
|
||||
}
|
||||
|
||||
for (int k=0; k<indices_length; ++k) {
|
||||
if (belongs_to[k]==j) {
|
||||
// for cluster j, we move the furthest element from the center to the empty cluster i
|
||||
if ( distance_(dataset_[indices[k]], dcenters[j], veclen_) == radiuses[j] ) {
|
||||
belongs_to[k] = i;
|
||||
count[j]--;
|
||||
count[i]++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
converged = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int i=0; i<branching; ++i) {
|
||||
centers[i] = new CentersType[veclen_];
|
||||
memoryCounter_ += (int)(veclen_*sizeof(CentersType));
|
||||
for (size_t k=0; k<veclen_; ++k) {
|
||||
centers[i][k] = (CentersType)dcenters[i][k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void refineBitfieldClustering(int* indices, int indices_length, int branching, CentersType** centers,
|
||||
std::vector<DistanceType>& radiuses, int* belongs_to, int* count)
|
||||
{
|
||||
for (int i=0; i<branching; ++i) {
|
||||
centers[i] = new CentersType[veclen_];
|
||||
memoryCounter_ += (int)(veclen_*sizeof(CentersType));
|
||||
}
|
||||
|
||||
const unsigned int accumulator_veclen = static_cast<unsigned int>(
|
||||
veclen_*sizeof(ElementType)*BITS_PER_CHAR);
|
||||
cv::AutoBuffer<unsigned int> dcenters_buf(branching*accumulator_veclen);
|
||||
Matrix<unsigned int> dcenters(dcenters_buf.data(), branching, accumulator_veclen);
|
||||
|
||||
bool converged = false;
|
||||
int iteration = 0;
|
||||
while (!converged && iteration<iterations_) {
|
||||
converged = true;
|
||||
iteration++;
|
||||
|
||||
// compute the new cluster centers
|
||||
for (int i=0; i<branching; ++i) {
|
||||
memset(dcenters[i],0,sizeof(unsigned int)*accumulator_veclen);
|
||||
radiuses[i] = 0;
|
||||
}
|
||||
for (int i=0; i<indices_length; ++i) {
|
||||
unsigned char* vec = (unsigned char*)dataset_[indices[i]];
|
||||
unsigned int* dcenter = dcenters[belongs_to[i]];
|
||||
for (size_t k=0, l=0; k<accumulator_veclen; k+=BITS_PER_CHAR, ++l) {
|
||||
dcenter[k] += (vec[l]) & 0x01;
|
||||
dcenter[k+1] += (vec[l]>>1) & 0x01;
|
||||
dcenter[k+2] += (vec[l]>>2) & 0x01;
|
||||
dcenter[k+3] += (vec[l]>>3) & 0x01;
|
||||
dcenter[k+4] += (vec[l]>>4) & 0x01;
|
||||
dcenter[k+5] += (vec[l]>>5) & 0x01;
|
||||
dcenter[k+6] += (vec[l]>>6) & 0x01;
|
||||
dcenter[k+7] += (vec[l]>>7) & 0x01;
|
||||
}
|
||||
}
|
||||
for (int i=0; i<branching; ++i) {
|
||||
double cnt = static_cast<double>(count[i]);
|
||||
unsigned int* dcenter = dcenters[i];
|
||||
unsigned char* charCenter = (unsigned char*)centers[i];
|
||||
for (size_t k=0, l=0; k<accumulator_veclen; k+=BITS_PER_CHAR, ++l) {
|
||||
charCenter[l] = static_cast<unsigned char>(
|
||||
(((int)(0.5 + (double)(dcenter[k]) / cnt)))
|
||||
| (((int)(0.5 + (double)(dcenter[k+1]) / cnt))<<1)
|
||||
| (((int)(0.5 + (double)(dcenter[k+2]) / cnt))<<2)
|
||||
| (((int)(0.5 + (double)(dcenter[k+3]) / cnt))<<3)
|
||||
| (((int)(0.5 + (double)(dcenter[k+4]) / cnt))<<4)
|
||||
| (((int)(0.5 + (double)(dcenter[k+5]) / cnt))<<5)
|
||||
| (((int)(0.5 + (double)(dcenter[k+6]) / cnt))<<6)
|
||||
| (((int)(0.5 + (double)(dcenter[k+7]) / cnt))<<7));
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int> new_centroids(indices_length);
|
||||
std::vector<DistanceType> dists(indices_length);
|
||||
|
||||
// reassign points to clusters
|
||||
KMeansDistanceComputer<ElementType**> invoker(
|
||||
distance_, dataset_, branching, indices, centers, veclen_, new_centroids, dists);
|
||||
parallel_for_(cv::Range(0, (int)indices_length), invoker);
|
||||
|
||||
for (int i=0; i < indices_length; ++i) {
|
||||
DistanceType dist(dists[i]);
|
||||
int new_centroid(new_centroids[i]);
|
||||
if (dist > radiuses[new_centroid]) {
|
||||
radiuses[new_centroid] = dist;
|
||||
}
|
||||
if (new_centroid != belongs_to[i]) {
|
||||
count[belongs_to[i]]--;
|
||||
count[new_centroid]++;
|
||||
belongs_to[i] = new_centroid;
|
||||
converged = false;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i=0; i<branching; ++i) {
|
||||
// if one cluster converges to an empty cluster,
|
||||
// move an element into that cluster
|
||||
if (count[i]==0) {
|
||||
int j = (i+1)%branching;
|
||||
while (count[j]<=1) {
|
||||
j = (j+1)%branching;
|
||||
}
|
||||
|
||||
for (int k=0; k<indices_length; ++k) {
|
||||
if (belongs_to[k]==j) {
|
||||
// for cluster j, we move the furthest element from the center to the empty cluster i
|
||||
if ( distance_(dataset_[indices[k]], centers[j], veclen_) == radiuses[j] ) {
|
||||
belongs_to[k] = i;
|
||||
count[j]--;
|
||||
count[i]++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
converged = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void computeSubClustering(KMeansNodePtr node, int* indices, int indices_length,
|
||||
int branching, int level, CentersType** centers,
|
||||
std::vector<DistanceType>& radiuses, int* belongs_to, int* count)
|
||||
{
|
||||
// compute kmeans clustering for each of the resulting clusters
|
||||
node->childs = pool_.allocate<KMeansNodePtr>(branching);
|
||||
int start = 0;
|
||||
int end = start;
|
||||
for (int c=0; c<branching; ++c) {
|
||||
int s = count[c];
|
||||
|
||||
DistanceType variance = 0;
|
||||
DistanceType mean_radius =0;
|
||||
for (int i=0; i<indices_length; ++i) {
|
||||
if (belongs_to[i]==c) {
|
||||
DistanceType d = distance_(dataset_[indices[i]], ZeroIterator<ElementType>(), veclen_);
|
||||
variance += d;
|
||||
mean_radius += static_cast<DistanceType>( sqrt(d) );
|
||||
std::swap(indices[i],indices[end]);
|
||||
std::swap(belongs_to[i],belongs_to[end]);
|
||||
end++;
|
||||
}
|
||||
}
|
||||
variance /= s;
|
||||
mean_radius /= s;
|
||||
variance -= distance_(centers[c], ZeroIterator<ElementType>(), veclen_);
|
||||
|
||||
node->childs[c] = pool_.allocate<KMeansNode>();
|
||||
std::memset(node->childs[c], 0, sizeof(KMeansNode));
|
||||
node->childs[c]->radius = radiuses[c];
|
||||
node->childs[c]->pivot = centers[c];
|
||||
node->childs[c]->variance = variance;
|
||||
node->childs[c]->mean_radius = mean_radius;
|
||||
computeClustering(node->childs[c],indices+start, end-start, branching, level+1);
|
||||
start=end;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void computeAnyBitfieldSubClustering(KMeansNodePtr node, int* indices, int indices_length,
|
||||
int branching, int level, CentersType** centers,
|
||||
std::vector<DistanceType>& radiuses, int* belongs_to, int* count)
|
||||
{
|
||||
// compute kmeans clustering for each of the resulting clusters
|
||||
node->childs = pool_.allocate<KMeansNodePtr>(branching);
|
||||
int start = 0;
|
||||
int end = start;
|
||||
for (int c=0; c<branching; ++c) {
|
||||
int s = count[c];
|
||||
|
||||
unsigned long long variance = 0ull;
|
||||
DistanceType mean_radius =0;
|
||||
for (int i=0; i<indices_length; ++i) {
|
||||
if (belongs_to[i]==c) {
|
||||
DistanceType d = distance_(dataset_[indices[i]], ZeroIterator<ElementType>(), veclen_);
|
||||
variance += static_cast<unsigned long long>( ensureSquareDistance<Distance>(d) );
|
||||
mean_radius += ensureSimpleDistance<Distance>(d);
|
||||
std::swap(indices[i],indices[end]);
|
||||
std::swap(belongs_to[i],belongs_to[end]);
|
||||
end++;
|
||||
}
|
||||
}
|
||||
mean_radius = static_cast<DistanceType>(
|
||||
0.5f + static_cast<float>(mean_radius) / static_cast<float>(s));
|
||||
variance = static_cast<unsigned long long>(
|
||||
0.5 + static_cast<double>(variance) / static_cast<double>(s));
|
||||
variance -= static_cast<unsigned long long>(
|
||||
ensureSquareDistance<Distance>(
|
||||
distance_(centers[c], ZeroIterator<ElementType>(), veclen_)));
|
||||
|
||||
node->childs[c] = pool_.allocate<KMeansNode>();
|
||||
std::memset(node->childs[c], 0, sizeof(KMeansNode));
|
||||
node->childs[c]->radius = radiuses[c];
|
||||
node->childs[c]->pivot = centers[c];
|
||||
node->childs[c]->variance = static_cast<DistanceType>(variance);
|
||||
node->childs[c]->mean_radius = mean_radius;
|
||||
computeClustering(node->childs[c],indices+start, end-start, branching, level+1);
|
||||
start=end;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<typename DistType>
|
||||
void refineAndSplitClustering(
|
||||
KMeansNodePtr node, int* indices, int indices_length, int branching,
|
||||
int level, CentersType** centers, std::vector<DistanceType>& radiuses,
|
||||
int* belongs_to, int* count, const DistType* identifier)
|
||||
{
|
||||
(void)identifier;
|
||||
refineClustering(indices, indices_length, branching, centers, radiuses, belongs_to, count);
|
||||
|
||||
computeSubClustering(node, indices, indices_length, branching,
|
||||
level, centers, radiuses, belongs_to, count);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The methods responsible with doing the recursive hierarchical clustering on
|
||||
* binary vectors.
|
||||
* As some might have heared that KMeans on binary data doesn't make sense,
|
||||
* it's worth a little explanation why it actually fairly works. As
|
||||
* with the Hierarchical Clustering algortihm, we seed several centers for the
|
||||
* current node by picking some of its points. Then in a first pass each point
|
||||
* of the node is then related to its closest center. Now let's have a look at
|
||||
* the 5 central dimensions of the 9 following points:
|
||||
*
|
||||
* xxxxxx11100xxxxx (1)
|
||||
* xxxxxx11010xxxxx (2)
|
||||
* xxxxxx11001xxxxx (3)
|
||||
* xxxxxx10110xxxxx (4)
|
||||
* xxxxxx10101xxxxx (5)
|
||||
* xxxxxx10011xxxxx (6)
|
||||
* xxxxxx01110xxxxx (7)
|
||||
* xxxxxx01101xxxxx (8)
|
||||
* xxxxxx01011xxxxx (9)
|
||||
* sum _____
|
||||
* of 1: 66555
|
||||
*
|
||||
* Even if the barycenter notion doesn't apply, we can set a center
|
||||
* xxxxxx11111xxxxx that will better fit the five dimensions we are focusing
|
||||
* on for these points.
|
||||
*
|
||||
* Note that convergence isn't ensured anymore. In practice, using Gonzales
|
||||
* as seeding algorithm should be fine for getting convergence ("iterations"
|
||||
* value can be set to -1). But with KMeans++ seeding you should definitely
|
||||
* set a maximum number of iterations (but make it higher than the "iterations"
|
||||
* default value of 11).
|
||||
*
|
||||
* Params:
|
||||
* node = the node to cluster
|
||||
* indices = indices of the points belonging to the current node
|
||||
* indices_length = number of points in the current node
|
||||
* branching = the branching factor to use in the clustering
|
||||
* level = 0 for the root node, it increases with the subdivision levels
|
||||
* centers = clusters centers to compute
|
||||
* radiuses = radiuses of clusters
|
||||
* belongs_to = LookUp Table returning, for a given indice id, the center id it belongs to
|
||||
* count = array storing the number of indices for a given center id
|
||||
* identifier = dummy pointer on an instance of Distance (use to branch correctly among templates)
|
||||
*/
|
||||
void refineAndSplitClustering(
|
||||
KMeansNodePtr node, int* indices, int indices_length, int branching,
|
||||
int level, CentersType** centers, std::vector<DistanceType>& radiuses,
|
||||
int* belongs_to, int* count, const cvflann::HammingLUT* identifier)
|
||||
{
|
||||
(void)identifier;
|
||||
refineBitfieldClustering(
|
||||
indices, indices_length, branching, centers, radiuses, belongs_to, count);
|
||||
|
||||
computeAnyBitfieldSubClustering(node, indices, indices_length, branching,
|
||||
level, centers, radiuses, belongs_to, count);
|
||||
}
|
||||
|
||||
|
||||
void refineAndSplitClustering(
|
||||
KMeansNodePtr node, int* indices, int indices_length, int branching,
|
||||
int level, CentersType** centers, std::vector<DistanceType>& radiuses,
|
||||
int* belongs_to, int* count, const cvflann::Hamming<unsigned char>* identifier)
|
||||
{
|
||||
(void)identifier;
|
||||
refineBitfieldClustering(
|
||||
indices, indices_length, branching, centers, radiuses, belongs_to, count);
|
||||
|
||||
computeAnyBitfieldSubClustering(node, indices, indices_length, branching,
|
||||
level, centers, radiuses, belongs_to, count);
|
||||
}
|
||||
|
||||
|
||||
void refineAndSplitClustering(
|
||||
KMeansNodePtr node, int* indices, int indices_length, int branching,
|
||||
int level, CentersType** centers, std::vector<DistanceType>& radiuses,
|
||||
int* belongs_to, int* count, const cvflann::Hamming2<unsigned char>* identifier)
|
||||
{
|
||||
(void)identifier;
|
||||
refineBitfieldClustering(
|
||||
indices, indices_length, branching, centers, radiuses, belongs_to, count);
|
||||
|
||||
computeAnyBitfieldSubClustering(node, indices, indices_length, branching,
|
||||
level, centers, radiuses, belongs_to, count);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The method responsible with actually doing the recursive hierarchical
|
||||
@ -882,372 +1285,16 @@ private:
|
||||
count[belongs_to[i]]++;
|
||||
}
|
||||
|
||||
cv::AutoBuffer<double> dcenters_buf(branching*veclen_);
|
||||
Matrix<double> dcenters(dcenters_buf.data(), branching, veclen_);
|
||||
for (int i=0; i<centers_length; ++i) {
|
||||
ElementType* vec = dataset_[centers_idx[i]];
|
||||
for (size_t k=0; k<veclen_; ++k) {
|
||||
dcenters[i][k] = double(vec[k]);
|
||||
}
|
||||
}
|
||||
|
||||
bool converged = false;
|
||||
int iteration = 0;
|
||||
while (!converged && iteration<iterations_) {
|
||||
converged = true;
|
||||
iteration++;
|
||||
|
||||
// compute the new cluster centers
|
||||
for (int i=0; i<branching; ++i) {
|
||||
memset(dcenters[i],0,sizeof(double)*veclen_);
|
||||
radiuses[i] = 0;
|
||||
}
|
||||
for (int i=0; i<indices_length; ++i) {
|
||||
ElementType* vec = dataset_[indices[i]];
|
||||
double* center = dcenters[belongs_to[i]];
|
||||
for (size_t k=0; k<veclen_; ++k) {
|
||||
center[k] += vec[k];
|
||||
}
|
||||
}
|
||||
for (int i=0; i<branching; ++i) {
|
||||
int cnt = count[i];
|
||||
for (size_t k=0; k<veclen_; ++k) {
|
||||
dcenters[i][k] /= cnt;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int> new_centroids(indices_length);
|
||||
std::vector<DistanceType> sq_dists(indices_length);
|
||||
|
||||
// reassign points to clusters
|
||||
KMeansDistanceComputer<Matrix<double> > invoker(distance_, dataset_, branching, indices, dcenters, veclen_, new_centroids, sq_dists);
|
||||
parallel_for_(cv::Range(0, (int)indices_length), invoker);
|
||||
|
||||
for (int i=0; i < (int)indices_length; ++i) {
|
||||
DistanceType sq_dist(sq_dists[i]);
|
||||
int new_centroid(new_centroids[i]);
|
||||
if (sq_dist > radiuses[new_centroid]) {
|
||||
radiuses[new_centroid] = sq_dist;
|
||||
}
|
||||
if (new_centroid != belongs_to[i]) {
|
||||
count[belongs_to[i]]--;
|
||||
count[new_centroid]++;
|
||||
belongs_to[i] = new_centroid;
|
||||
converged = false;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i=0; i<branching; ++i) {
|
||||
// if one cluster converges to an empty cluster,
|
||||
// move an element into that cluster
|
||||
if (count[i]==0) {
|
||||
int j = (i+1)%branching;
|
||||
while (count[j]<=1) {
|
||||
j = (j+1)%branching;
|
||||
}
|
||||
|
||||
for (int k=0; k<indices_length; ++k) {
|
||||
if (belongs_to[k]==j) {
|
||||
// for cluster j, we move the furthest element from the center to the empty cluster i
|
||||
if ( distance_(dataset_[indices[k]], dcenters[j], veclen_) == radiuses[j] ) {
|
||||
belongs_to[k] = i;
|
||||
count[j]--;
|
||||
count[i]++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
converged = false;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
CentersType** centers = new CentersType*[branching];
|
||||
|
||||
for (int i=0; i<branching; ++i) {
|
||||
centers[i] = new CentersType[veclen_];
|
||||
memoryCounter_ += (int)(veclen_*sizeof(CentersType));
|
||||
for (size_t k=0; k<veclen_; ++k) {
|
||||
centers[i][k] = (CentersType)dcenters[i][k];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// compute kmeans clustering for each of the resulting clusters
|
||||
node->childs = pool_.allocate<KMeansNodePtr>(branching);
|
||||
int start = 0;
|
||||
int end = start;
|
||||
for (int c=0; c<branching; ++c) {
|
||||
int s = count[c];
|
||||
|
||||
DistanceType variance = 0;
|
||||
DistanceType mean_radius =0;
|
||||
for (int i=0; i<indices_length; ++i) {
|
||||
if (belongs_to[i]==c) {
|
||||
DistanceType d = distance_(dataset_[indices[i]], ZeroIterator<ElementType>(), veclen_);
|
||||
variance += d;
|
||||
mean_radius += static_cast<DistanceType>( sqrt(d) );
|
||||
std::swap(indices[i],indices[end]);
|
||||
std::swap(belongs_to[i],belongs_to[end]);
|
||||
end++;
|
||||
}
|
||||
}
|
||||
variance /= s;
|
||||
mean_radius /= s;
|
||||
variance -= distance_(centers[c], ZeroIterator<ElementType>(), veclen_);
|
||||
|
||||
node->childs[c] = pool_.allocate<KMeansNode>();
|
||||
std::memset(node->childs[c], 0, sizeof(KMeansNode));
|
||||
node->childs[c]->radius = radiuses[c];
|
||||
node->childs[c]->pivot = centers[c];
|
||||
node->childs[c]->variance = variance;
|
||||
node->childs[c]->mean_radius = mean_radius;
|
||||
computeClustering(node->childs[c],indices+start, end-start, branching, level+1);
|
||||
start=end;
|
||||
}
|
||||
Distance* dummy = NULL;
|
||||
refineAndSplitClustering(node, indices, indices_length, branching, level,
|
||||
centers, radiuses, belongs_to, count, dummy);
|
||||
|
||||
delete[] centers;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The method responsible with doing the recursive hierarchical clustering on
|
||||
* binary vectors.
|
||||
* As some might have heared that KMeans on binary data doesn't make sense,
|
||||
* it's worth a little explanation why it actually fairly works. As
|
||||
* with the Hierarchical Clustering algortihm, we seed several centers for the
|
||||
* current node by picking some of its points. Then in a first pass each point
|
||||
* of the node is then related to its closest center. Now let's have a look at
|
||||
* the 5 central dimensions of the 9 following points:
|
||||
*
|
||||
* xxxxxx11100xxxxx (1)
|
||||
* xxxxxx11010xxxxx (2)
|
||||
* xxxxxx11001xxxxx (3)
|
||||
* xxxxxx10110xxxxx (4)
|
||||
* xxxxxx10101xxxxx (5)
|
||||
* xxxxxx10011xxxxx (6)
|
||||
* xxxxxx01110xxxxx (7)
|
||||
* xxxxxx01101xxxxx (8)
|
||||
* xxxxxx01011xxxxx (9)
|
||||
* sum _____
|
||||
* of 1: 66555
|
||||
*
|
||||
* Even if the barycenter notion doesn't apply, we can set a center
|
||||
* xxxxxx11111xxxxx that will better fit the five dimensions we are focusing
|
||||
* on for these points.
|
||||
*
|
||||
* Note that convergence isn't ensured anymore. In practice, using Gonzales
|
||||
* as seeding algorithm should be fine for getting convergence ("iterations"
|
||||
* value can be set to -1). But with KMeans++ seeding you should definitely
|
||||
* set a maximum number of iterations (but make it higher than the "iterations"
|
||||
* default value of 11).
|
||||
*
|
||||
* Params:
|
||||
* node = the node to cluster
|
||||
* indices = indices of the points belonging to the current node
|
||||
* indices_length = number of points in the current node
|
||||
* branching = the branching factor to use in the clustering
|
||||
* level = 0 for the root node, it increases with the subdivision levels
|
||||
*/
|
||||
void computeBitfieldClustering(KMeansNodePtr node, int* indices,
|
||||
int indices_length, int branching, int level)
|
||||
{
|
||||
node->size = indices_length;
|
||||
node->level = level;
|
||||
|
||||
if (indices_length < branching) {
|
||||
node->indices = indices;
|
||||
std::sort(node->indices,node->indices+indices_length);
|
||||
node->childs = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
cv::AutoBuffer<int> centers_idx_buf(branching);
|
||||
int* centers_idx = centers_idx_buf.data();
|
||||
int centers_length;
|
||||
(this->*chooseCenters)(branching, indices, indices_length, centers_idx, centers_length);
|
||||
|
||||
if (centers_length<branching) {
|
||||
node->indices = indices;
|
||||
std::sort(node->indices,node->indices+indices_length);
|
||||
node->childs = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
const unsigned int accumulator_veclen = static_cast<unsigned int>(
|
||||
veclen_*sizeof(ElementType)*BITS_PER_CHAR);
|
||||
cv::AutoBuffer<unsigned int> dcenters_buf(branching*accumulator_veclen);
|
||||
Matrix<unsigned int> dcenters(dcenters_buf.data(), branching, accumulator_veclen);
|
||||
|
||||
CentersType** centers = new CentersType*[branching];
|
||||
|
||||
for (int i=0; i<branching; ++i) {
|
||||
centers[i] = new CentersType[veclen_];
|
||||
memoryCounter_ += (int)(veclen_*sizeof(CentersType));
|
||||
}
|
||||
|
||||
std::vector<DistanceType> radiuses(branching);
|
||||
cv::AutoBuffer<int> count_buf(branching);
|
||||
int* count = count_buf.data();
|
||||
for (int i=0; i<branching; ++i) {
|
||||
radiuses[i] = 0;
|
||||
count[i] = 0;
|
||||
}
|
||||
|
||||
// assign points to clusters
|
||||
cv::AutoBuffer<int> belongs_to_buf(indices_length);
|
||||
int* belongs_to = belongs_to_buf.data();
|
||||
for (int i=0; i<indices_length; ++i) {
|
||||
|
||||
DistanceType dist = distance_(dataset_[indices[i]], dataset_[centers_idx[0]], veclen_);
|
||||
belongs_to[i] = 0;
|
||||
for (int j=1; j<branching; ++j) {
|
||||
DistanceType new_dist = distance_(dataset_[indices[i]], dataset_[centers_idx[j]], veclen_);
|
||||
if (dist>new_dist) {
|
||||
belongs_to[i] = j;
|
||||
dist = new_dist;
|
||||
}
|
||||
}
|
||||
if (dist>radiuses[belongs_to[i]]) {
|
||||
radiuses[belongs_to[i]] = dist;
|
||||
}
|
||||
count[belongs_to[i]]++;
|
||||
}
|
||||
|
||||
bool converged = false;
|
||||
int iteration = 0;
|
||||
while (!converged && iteration<iterations_) {
|
||||
converged = true;
|
||||
iteration++;
|
||||
|
||||
// compute the new cluster centers
|
||||
for (int i=0; i<branching; ++i) {
|
||||
memset(dcenters[i],0,sizeof(unsigned int)*accumulator_veclen);
|
||||
radiuses[i] = 0;
|
||||
}
|
||||
for (int i=0; i<indices_length; ++i) {
|
||||
unsigned char* vec = (unsigned char*)dataset_[indices[i]];
|
||||
unsigned int* dcenter = dcenters[belongs_to[i]];
|
||||
for (size_t k=0, l=0; k<accumulator_veclen; k+=BITS_PER_CHAR, ++l) {
|
||||
dcenter[k] += (vec[l]) & 0x01;
|
||||
dcenter[k+1] += (vec[l]>>1) & 0x01;
|
||||
dcenter[k+2] += (vec[l]>>2) & 0x01;
|
||||
dcenter[k+3] += (vec[l]>>3) & 0x01;
|
||||
dcenter[k+4] += (vec[l]>>4) & 0x01;
|
||||
dcenter[k+5] += (vec[l]>>5) & 0x01;
|
||||
dcenter[k+6] += (vec[l]>>6) & 0x01;
|
||||
dcenter[k+7] += (vec[l]>>7) & 0x01;
|
||||
}
|
||||
}
|
||||
for (int i=0; i<branching; ++i) {
|
||||
double cnt = static_cast<double>(count[i]);
|
||||
unsigned int* dcenter = dcenters[i];
|
||||
unsigned char* charCenter = (unsigned char*)centers[i];
|
||||
for (size_t k=0, l=0; k<accumulator_veclen; k+=BITS_PER_CHAR, ++l) {
|
||||
charCenter[l] = static_cast<unsigned char>(
|
||||
(((int)(0.5 + (double)(dcenter[k]) / cnt)))
|
||||
| (((int)(0.5 + (double)(dcenter[k+1]) / cnt))<<1)
|
||||
| (((int)(0.5 + (double)(dcenter[k+2]) / cnt))<<2)
|
||||
| (((int)(0.5 + (double)(dcenter[k+3]) / cnt))<<3)
|
||||
| (((int)(0.5 + (double)(dcenter[k+4]) / cnt))<<4)
|
||||
| (((int)(0.5 + (double)(dcenter[k+5]) / cnt))<<5)
|
||||
| (((int)(0.5 + (double)(dcenter[k+6]) / cnt))<<6)
|
||||
| (((int)(0.5 + (double)(dcenter[k+7]) / cnt))<<7));
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int> new_centroids(indices_length);
|
||||
std::vector<DistanceType> dists(indices_length);
|
||||
|
||||
// reassign points to clusters
|
||||
KMeansDistanceComputer<ElementType**> invoker(distance_, dataset_, branching, indices, centers, veclen_, new_centroids, dists);
|
||||
parallel_for_(cv::Range(0, (int)indices_length), invoker);
|
||||
|
||||
for (int i=0; i < indices_length; ++i) {
|
||||
DistanceType dist(dists[i]);
|
||||
int new_centroid(new_centroids[i]);
|
||||
if (dist > radiuses[new_centroid]) {
|
||||
radiuses[new_centroid] = dist;
|
||||
}
|
||||
if (new_centroid != belongs_to[i]) {
|
||||
count[belongs_to[i]]--;
|
||||
count[new_centroid]++;
|
||||
belongs_to[i] = new_centroid;
|
||||
converged = false;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i=0; i<branching; ++i) {
|
||||
// if one cluster converges to an empty cluster,
|
||||
// move an element into that cluster
|
||||
if (count[i]==0) {
|
||||
int j = (i+1)%branching;
|
||||
while (count[j]<=1) {
|
||||
j = (j+1)%branching;
|
||||
}
|
||||
|
||||
for (int k=0; k<indices_length; ++k) {
|
||||
if (belongs_to[k]==j) {
|
||||
// for cluster j, we move the furthest element from the center to the empty cluster i
|
||||
if ( distance_(dataset_[indices[k]], centers[j], veclen_) == radiuses[j] ) {
|
||||
belongs_to[k] = i;
|
||||
count[j]--;
|
||||
count[i]++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
converged = false;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
// compute kmeans clustering for each of the resulting clusters
|
||||
node->childs = pool_.allocate<KMeansNodePtr>(branching);
|
||||
int start = 0;
|
||||
int end = start;
|
||||
for (int c=0; c<branching; ++c) {
|
||||
int s = count[c];
|
||||
|
||||
unsigned long long variance = 0ull;
|
||||
DistanceType mean_radius =0;
|
||||
for (int i=0; i<indices_length; ++i) {
|
||||
if (belongs_to[i]==c) {
|
||||
DistanceType d = distance_(dataset_[indices[i]], ZeroIterator<ElementType>(), veclen_);
|
||||
variance += static_cast<unsigned long long>( ensureSquareDistance<Distance>(d) );
|
||||
mean_radius += ensureSimpleDistance<Distance>(d);
|
||||
std::swap(indices[i],indices[end]);
|
||||
std::swap(belongs_to[i],belongs_to[end]);
|
||||
end++;
|
||||
}
|
||||
}
|
||||
mean_radius = static_cast<DistanceType>(
|
||||
0.5f + static_cast<float>(mean_radius) / static_cast<float>(s));
|
||||
variance = static_cast<unsigned long long>(
|
||||
0.5 + static_cast<double>(variance) / static_cast<double>(s));
|
||||
variance -= static_cast<unsigned long long>(
|
||||
ensureSquareDistance<Distance>(
|
||||
distance_(centers[c], ZeroIterator<ElementType>(), veclen_)));
|
||||
|
||||
node->childs[c] = pool_.allocate<KMeansNode>();
|
||||
std::memset(node->childs[c], 0, sizeof(KMeansNode));
|
||||
node->childs[c]->radius = radiuses[c];
|
||||
node->childs[c]->pivot = centers[c];
|
||||
node->childs[c]->variance = static_cast<DistanceType>(variance);
|
||||
node->childs[c]->mean_radius = mean_radius;
|
||||
computeBitfieldClustering(node->childs[c],indices+start, end-start, branching, level+1);
|
||||
start=end;
|
||||
}
|
||||
|
||||
delete[] centers;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Performs one descent in the hierarchical k-means tree. The branches not
|
||||
* visited are stored in a priority queue.
|
||||
|
250
samples/cpp/flann_search_dataset.cpp
Normal file
250
samples/cpp/flann_search_dataset.cpp
Normal file
@ -0,0 +1,250 @@
|
||||
// flann_search_dataset.cpp
|
||||
// Naive program to search a query picture in a dataset illustrating usage of FLANN
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include "opencv2/core.hpp"
|
||||
#include "opencv2/core/utils/filesystem.hpp"
|
||||
#include "opencv2/highgui.hpp"
|
||||
#include "opencv2/features2d.hpp"
|
||||
#include "opencv2/flann.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using std::cout;
|
||||
using std::endl;
|
||||
|
||||
#define _ORB_
|
||||
|
||||
const char* keys =
|
||||
"{ help h | | Print help message. }"
|
||||
"{ dataset | | Path to the images folder used as dataset. }"
|
||||
"{ image | | Path to the image to search for in the dataset. }"
|
||||
"{ save | | Path and filename where to save the flann structure to. }"
|
||||
"{ load | | Path and filename where to load the flann structure from. }";
|
||||
|
||||
struct img_info {
|
||||
int img_index;
|
||||
unsigned int nbr_of_matches;
|
||||
|
||||
img_info(int _img_index, unsigned int _nbr_of_matches)
|
||||
: img_index(_img_index)
|
||||
, nbr_of_matches(_nbr_of_matches)
|
||||
{}
|
||||
};
|
||||
|
||||
|
||||
int main( int argc, char* argv[] )
|
||||
{
|
||||
//-- Test the program options
|
||||
CommandLineParser parser( argc, argv, keys );
|
||||
if (parser.has("help"))
|
||||
{
|
||||
parser.printMessage();
|
||||
return -1;
|
||||
}
|
||||
|
||||
const cv::String img_path = parser.get<String>("image");
|
||||
Mat img = imread( samples::findFile( img_path ), IMREAD_GRAYSCALE );
|
||||
if (img.empty() )
|
||||
{
|
||||
cout << "Could not open the image "<< img_path << endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
const cv::String db_path = parser.get<String>("dataset");
|
||||
if (!utils::fs::isDirectory(db_path))
|
||||
{
|
||||
cout << "Dataset folder "<< db_path.c_str() <<" doesn't exist!" << endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
const cv::String load_db_path = parser.get<String>("load");
|
||||
if ((load_db_path != String()) && (!utils::fs::exists(load_db_path)))
|
||||
{
|
||||
cout << "File " << load_db_path.c_str()
|
||||
<< " where to load the flann structure from doesn't exist!" << endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
const cv::String save_db_path = parser.get<String>("save");
|
||||
|
||||
//-- Step 1: Detect the keypoints using a detector, compute the descriptors
|
||||
// in the folder containing the images of the dataset
|
||||
#ifdef _SIFT_
|
||||
int minHessian = 400;
|
||||
Ptr<Feature2D> detector = SIFT::create( minHessian );
|
||||
#elif defined(_ORB_)
|
||||
Ptr<Feature2D> detector = ORB::create();
|
||||
#else
|
||||
cout << "Missing or unknown defined descriptor. "
|
||||
"Only SIFT and ORB are currently interfaced here" << endl;
|
||||
return -1;
|
||||
#endif
|
||||
|
||||
std::vector<KeyPoint> db_keypoints;
|
||||
Mat db_descriptors;
|
||||
std::vector<unsigned int> db_images_indice_range; //store the range of indices per image
|
||||
std::vector<int> db_indice_2_image_lut; //match descriptor indice to its image
|
||||
|
||||
db_images_indice_range.push_back(0);
|
||||
std::vector<cv::String> files;
|
||||
utils::fs::glob(db_path, cv::String(), files);
|
||||
for (std::vector<cv::String>::iterator itr = files.begin(); itr != files.end(); ++itr)
|
||||
{
|
||||
Mat tmp_img = imread( *itr, IMREAD_GRAYSCALE );
|
||||
if (!tmp_img.empty())
|
||||
{
|
||||
std::vector<KeyPoint> kpts;
|
||||
Mat descriptors;
|
||||
detector->detectAndCompute( tmp_img, noArray(), kpts, descriptors );
|
||||
|
||||
db_keypoints.insert( db_keypoints.end(), kpts.begin(), kpts.end() );
|
||||
db_descriptors.push_back( descriptors );
|
||||
db_images_indice_range.push_back( db_images_indice_range.back()
|
||||
+ static_cast<unsigned int>(kpts.size()) );
|
||||
}
|
||||
}
|
||||
|
||||
//-- Set the LUT
|
||||
db_indice_2_image_lut.resize( db_images_indice_range.back() );
|
||||
const int nbr_of_imgs = static_cast<int>( db_images_indice_range.size()-1 );
|
||||
for (int i = 0; i < nbr_of_imgs; ++i)
|
||||
{
|
||||
const unsigned int first_indice = db_images_indice_range[i];
|
||||
const unsigned int last_indice = db_images_indice_range[i+1];
|
||||
std::fill( db_indice_2_image_lut.begin() + first_indice,
|
||||
db_indice_2_image_lut.begin() + last_indice,
|
||||
i );
|
||||
}
|
||||
|
||||
//-- Step 2: build the structure storing the descriptors
|
||||
#if defined(_SIFT_)
|
||||
cv::Ptr<flann::GenericIndex<cvflann::L2<float> > > index;
|
||||
if (load_db_path != String())
|
||||
index = cv::makePtr<flann::GenericIndex<cvflann::L2<float> > >(db_descriptors,
|
||||
cvflann::SavedIndexParams(load_db_path));
|
||||
else
|
||||
index = cv::makePtr<flann::GenericIndex<cvflann::L2<float> > >(db_descriptors,
|
||||
cvflann::KDTreeIndexParams(4));
|
||||
|
||||
#elif defined(_ORB_)
|
||||
cv::Ptr<flann::GenericIndex<cvflann::Hamming<unsigned char> > > index;
|
||||
if (load_db_path != String())
|
||||
index = cv::makePtr<flann::GenericIndex<cvflann::Hamming<unsigned char> > >
|
||||
(db_descriptors, cvflann::SavedIndexParams(load_db_path));
|
||||
else
|
||||
index = cv::makePtr<flann::GenericIndex<cvflann::Hamming<unsigned char> > >
|
||||
(db_descriptors, cvflann::LshIndexParams());
|
||||
#else
|
||||
cout<< "Descriptor not listed. Set the proper FLANN distance for this descriptor" <<endl;
|
||||
return -1;
|
||||
#endif
|
||||
if (save_db_path != String())
|
||||
index->save(save_db_path);
|
||||
|
||||
|
||||
// Return if no query image was set
|
||||
if (img_path == String())
|
||||
return 0;
|
||||
|
||||
//-- Detect the keypoints and compute the descriptors for the query image
|
||||
std::vector<KeyPoint> img_keypoints;
|
||||
Mat img_descriptors;
|
||||
detector->detectAndCompute( img, noArray(), img_keypoints, img_descriptors );
|
||||
|
||||
|
||||
//-- Step 3: retrieve the descriptors in the dataset matching the ones of the query image
|
||||
// /!\ knnSearch doesn't follow OpenCV standards by not initialising empty Mat properties
|
||||
const int knn = 2;
|
||||
Mat indices(img_descriptors.rows, knn, CV_32S);
|
||||
#if defined(_SIFT_)
|
||||
#define DIST_TYPE float
|
||||
Mat dists(img_descriptors.rows, knn, CV_32F);
|
||||
#elif defined(_ORB_)
|
||||
#define DIST_TYPE int
|
||||
Mat dists(img_descriptors.rows, knn, CV_32S);
|
||||
#endif
|
||||
index->knnSearch( img_descriptors, indices, dists, knn, cvflann::SearchParams(32) );
|
||||
|
||||
//-- Filter matches using the Lowe's ratio test
|
||||
const float ratio_thresh = 0.7f;
|
||||
std::vector<DMatch> good_matches; //contains
|
||||
std::vector<unsigned int> matches_per_img_histogram( nbr_of_imgs, 0 );
|
||||
for (int i = 0; i < dists.rows; ++i)
|
||||
{
|
||||
if (dists.at<DIST_TYPE>(i,0) < ratio_thresh * dists.at<DIST_TYPE>(i,1))
|
||||
{
|
||||
const int indice_in_db = indices.at<int>(i,0);
|
||||
DMatch dmatch(i, indice_in_db, db_indice_2_image_lut[indice_in_db],
|
||||
static_cast<float>(dists.at<DIST_TYPE>(i,0)));
|
||||
good_matches.push_back( dmatch );
|
||||
matches_per_img_histogram[ db_indice_2_image_lut[indice_in_db] ]++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//-- Step 4: find the dataset image with the highest proportion of matches
|
||||
std::multimap<float, img_info> images_infos;
|
||||
for (int i = 0; i < nbr_of_imgs; ++i)
|
||||
{
|
||||
const unsigned int nbr_of_matches = matches_per_img_histogram[i];
|
||||
if (nbr_of_matches < 4) //we need at leat 4 points for a homography
|
||||
continue;
|
||||
|
||||
const unsigned int nbr_of_kpts = db_images_indice_range[i+1] - db_images_indice_range[i];
|
||||
const float inverse_proportion_of_retrieved_kpts =
|
||||
static_cast<float>(nbr_of_kpts) / static_cast<float>(nbr_of_matches);
|
||||
|
||||
img_info info(i, nbr_of_matches);
|
||||
images_infos.insert( std::pair<float,img_info>(inverse_proportion_of_retrieved_kpts,
|
||||
info) );
|
||||
}
|
||||
|
||||
if (images_infos.begin() == images_infos.end())
|
||||
{
|
||||
cout<<"No good match could be found."<<endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
//-- if there are several images with a similar proportion of matches,
|
||||
// select the one with the highest number of matches weighted by the
|
||||
// squared ratio of proportions
|
||||
const float best_matches_proportion = images_infos.begin()->first;
|
||||
float new_matches_proportion = best_matches_proportion;
|
||||
img_info best_img = images_infos.begin()->second;
|
||||
|
||||
std::multimap<float, img_info>::iterator it = images_infos.begin();
|
||||
++it;
|
||||
while ((it!=images_infos.end()) && (it->first < 1.1*best_matches_proportion))
|
||||
{
|
||||
const float ratio = new_matches_proportion / it->first;
|
||||
if( it->second.nbr_of_matches * (ratio * ratio) > best_img.nbr_of_matches)
|
||||
{
|
||||
new_matches_proportion = it->first;
|
||||
best_img = it->second;
|
||||
}
|
||||
++it;
|
||||
}
|
||||
|
||||
//-- Step 5: filter goodmatches that belong to the best image match of the dataset
|
||||
std::vector<DMatch> filtered_good_matches;
|
||||
for (std::vector<DMatch>::iterator itr(good_matches.begin()); itr != good_matches.end(); ++itr)
|
||||
{
|
||||
if (itr->imgIdx == best_img.img_index)
|
||||
filtered_good_matches.push_back(*itr);
|
||||
}
|
||||
|
||||
//-- Retrieve the best image match from the dataset
|
||||
Mat db_img = imread( files[best_img.img_index], IMREAD_GRAYSCALE );
|
||||
|
||||
//-- Draw matches
|
||||
Mat img_matches;
|
||||
drawMatches( img, img_keypoints, db_img, db_keypoints, filtered_good_matches, img_matches, Scalar::all(-1),
|
||||
Scalar::all(-1), std::vector<char>(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS );
|
||||
|
||||
//-- Show detected matches
|
||||
imshow("Good Matches", img_matches );
|
||||
waitKey();
|
||||
|
||||
return 0;
|
||||
}
|
@ -14,8 +14,8 @@ import argparse
|
||||
import sys
|
||||
|
||||
class DaSiamRPNTracker:
|
||||
#initialization of used values, initial bounding box, used network
|
||||
def __init__(self, im, target_pos, target_sz, net, kernel_r1, kernel_cls1):
|
||||
# Initialization of used values, initial bounding box, used network
|
||||
def __init__(self, net="dasiamrpn_model.onnx", kernel_r1="dasiamrpn_kernel_r1.onnx", kernel_cls1="dasiamrpn_kernel_cls1.onnx"):
|
||||
self.windowing = "cosine"
|
||||
self.exemplar_size = 127
|
||||
self.instance_size = 271
|
||||
@ -28,42 +28,52 @@ class DaSiamRPNTracker:
|
||||
self.penalty_k = 0.055
|
||||
self.window_influence = 0.42
|
||||
self.lr = 0.295
|
||||
self.im_h = im.shape[0]
|
||||
self.im_w = im.shape[1]
|
||||
self.target_pos = target_pos
|
||||
self.target_sz = target_sz
|
||||
self.avg_chans = np.mean(im, axis=(0, 1))
|
||||
self.net = net
|
||||
self.score = []
|
||||
|
||||
if ((self.target_sz[0] * self.target_sz[1]) / float(self.im_h * self.im_w)) < 0.004:
|
||||
raise AssertionError("Initializing BB is too small-try to restart tracker with larger BB")
|
||||
|
||||
self.anchor = self.__generate_anchor()
|
||||
wc_z = self.target_sz[0] + self.context_amount * sum(self.target_sz)
|
||||
hc_z = self.target_sz[1] + self.context_amount * sum(self.target_sz)
|
||||
s_z = round(np.sqrt(wc_z * hc_z))
|
||||
|
||||
z_crop = self.__get_subwindow_tracking(im, self.exemplar_size, s_z)
|
||||
z_crop = z_crop.transpose(2, 0, 1).reshape(1, 3, 127, 127).astype(np.float32)
|
||||
self.net.setInput(z_crop)
|
||||
z_f = self.net.forward('63')
|
||||
kernel_r1.setInput(z_f)
|
||||
r1 = kernel_r1.forward()
|
||||
kernel_cls1.setInput(z_f)
|
||||
cls1 = kernel_cls1.forward()
|
||||
r1 = r1.reshape(20, 256, 4, 4)
|
||||
cls1 = cls1.reshape(10, 256 , 4, 4)
|
||||
self.net.setParam(self.net.getLayerId('65'), 0, r1)
|
||||
self.net.setParam(self.net.getLayerId('68'), 0, cls1)
|
||||
|
||||
if self.windowing == "cosine":
|
||||
self.window = np.outer(np.hanning(self.score_size), np.hanning(self.score_size))
|
||||
elif self.windowing == "uniform":
|
||||
self.window = np.ones((self.score_size, self.score_size))
|
||||
self.window = np.tile(self.window.flatten(), self.anchor_num)
|
||||
# Loading network`s and kernel`s models
|
||||
self.net = cv.dnn.readNet(net)
|
||||
self.kernel_r1 = cv.dnn.readNet(kernel_r1)
|
||||
self.kernel_cls1 = cv.dnn.readNet(kernel_cls1)
|
||||
|
||||
#creating anchor for tracking bounding box
|
||||
def init(self, im, init_bb):
|
||||
target_pos, target_sz = np.array([init_bb[0], init_bb[1]]), np.array([init_bb[2], init_bb[3]])
|
||||
self.im_h = im.shape[0]
|
||||
self.im_w = im.shape[1]
|
||||
self.target_pos = target_pos
|
||||
self.target_sz = target_sz
|
||||
self.avg_chans = np.mean(im, axis=(0, 1))
|
||||
|
||||
# When we trying to generate ONNX model from the pre-trained .pth model
|
||||
# we are using only one state of the network. In our case used state
|
||||
# with big bounding box, so we were forced to add assertion for
|
||||
# too small bounding boxes - current state of the network can not
|
||||
# work properly with such small bounding boxes
|
||||
if ((self.target_sz[0] * self.target_sz[1]) / float(self.im_h * self.im_w)) < 0.004:
|
||||
raise AssertionError(
|
||||
"Initializing BB is too small-try to restart tracker with larger BB")
|
||||
|
||||
self.anchor = self.__generate_anchor()
|
||||
wc_z = self.target_sz[0] + self.context_amount * sum(self.target_sz)
|
||||
hc_z = self.target_sz[1] + self.context_amount * sum(self.target_sz)
|
||||
s_z = round(np.sqrt(wc_z * hc_z))
|
||||
z_crop = self.__get_subwindow_tracking(im, self.exemplar_size, s_z)
|
||||
z_crop = z_crop.transpose(2, 0, 1).reshape(1, 3, 127, 127).astype(np.float32)
|
||||
self.net.setInput(z_crop)
|
||||
z_f = self.net.forward('63')
|
||||
self.kernel_r1.setInput(z_f)
|
||||
r1 = self.kernel_r1.forward()
|
||||
self.kernel_cls1.setInput(z_f)
|
||||
cls1 = self.kernel_cls1.forward()
|
||||
r1 = r1.reshape(20, 256, 4, 4)
|
||||
cls1 = cls1.reshape(10, 256 , 4, 4)
|
||||
self.net.setParam(self.net.getLayerId('65'), 0, r1)
|
||||
self.net.setParam(self.net.getLayerId('68'), 0, cls1)
|
||||
|
||||
# Сreating anchor for tracking bounding box
|
||||
def __generate_anchor(self):
|
||||
self.anchor = np.zeros((self.anchor_num, 4), dtype = np.float32)
|
||||
size = self.total_stride * self.total_stride
|
||||
@ -86,8 +96,8 @@ class DaSiamRPNTracker:
|
||||
self.anchor[:, 0], self.anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32)
|
||||
return self.anchor
|
||||
|
||||
#track function
|
||||
def track(self, im):
|
||||
# Function for updating tracker state
|
||||
def update(self, im):
|
||||
wc_z = self.target_sz[1] + self.context_amount * sum(self.target_sz)
|
||||
hc_z = self.target_sz[0] + self.context_amount * sum(self.target_sz)
|
||||
s_z = np.sqrt(wc_z * hc_z)
|
||||
@ -96,7 +106,7 @@ class DaSiamRPNTracker:
|
||||
pad = d_search / scale_z
|
||||
s_x = round(s_z + 2 * pad)
|
||||
|
||||
#region preprocessing
|
||||
# Region preprocessing part
|
||||
x_crop = self.__get_subwindow_tracking(im, self.instance_size, s_x)
|
||||
x_crop = x_crop.transpose(2, 0, 1).reshape(1, 3, 271, 271).astype(np.float32)
|
||||
self.score = self.__tracker_eval(x_crop, scale_z)
|
||||
@ -105,7 +115,12 @@ class DaSiamRPNTracker:
|
||||
self.target_sz[0] = max(10, min(self.im_w, self.target_sz[0]))
|
||||
self.target_sz[1] = max(10, min(self.im_h, self.target_sz[1]))
|
||||
|
||||
#update bounding box position
|
||||
cx, cy = self.target_pos
|
||||
w, h = self.target_sz
|
||||
updated_bb = (cx, cy, w, h)
|
||||
return True, updated_bb
|
||||
|
||||
# Function for updating position of the bounding box
|
||||
def __tracker_eval(self, x_crop, scale_z):
|
||||
target_size = self.target_sz * scale_z
|
||||
self.net.setInput(x_crop)
|
||||
@ -160,7 +175,7 @@ class DaSiamRPNTracker:
|
||||
y = e_x / e_x.sum(axis = 0)
|
||||
return y
|
||||
|
||||
#evaluations with cropped image
|
||||
# Reshaping cropped image for using in the model
|
||||
def __get_subwindow_tracking(self, im, model_size, original_sz):
|
||||
im_sz = im.shape
|
||||
c = (original_sz + 1) / 2
|
||||
@ -171,19 +186,20 @@ class DaSiamRPNTracker:
|
||||
left_pad = int(max(0., -context_xmin))
|
||||
top_pad = int(max(0., -context_ymin))
|
||||
right_pad = int(max(0., context_xmax - im_sz[1] + 1))
|
||||
bottom_pad = int(max(0., context_ymax - im_sz[0] + 1))
|
||||
bot_pad = int(max(0., context_ymax - im_sz[0] + 1))
|
||||
context_xmin += left_pad
|
||||
context_xmax += left_pad
|
||||
context_ymin += top_pad
|
||||
context_ymax += top_pad
|
||||
r, c, k = im.shape
|
||||
|
||||
if any([top_pad, bottom_pad, left_pad, right_pad]):
|
||||
te_im = np.zeros((r + top_pad + bottom_pad, c + left_pad + right_pad, k), np.uint8)
|
||||
if any([top_pad, bot_pad, left_pad, right_pad]):
|
||||
te_im = np.zeros((
|
||||
r + top_pad + bot_pad, c + left_pad + right_pad, k), np.uint8)
|
||||
te_im[top_pad:top_pad + r, left_pad:left_pad + c, :] = im
|
||||
if top_pad:
|
||||
te_im[0:top_pad, left_pad:left_pad + c, :] = self.avg_chans
|
||||
if bottom_pad:
|
||||
if bot_pad:
|
||||
te_im[r + top_pad:, left_pad:left_pad + c, :] = self.avg_chans
|
||||
if left_pad:
|
||||
te_im[:, 0:left_pad, :] = self.avg_chans
|
||||
@ -195,23 +211,22 @@ class DaSiamRPNTracker:
|
||||
|
||||
if not np.array_equal(model_size, original_sz):
|
||||
im_patch_original = cv.resize(im_patch_original, (model_size, model_size))
|
||||
|
||||
return im_patch_original
|
||||
|
||||
#function for reading paths, bounding box drawing, showing results
|
||||
# Sample for using DaSiamRPN tracker
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Run tracker")
|
||||
parser.add_argument("--input", type=str, help="Full path to input (empty for camera)")
|
||||
parser.add_argument("--net", type=str, default="dasiamrpn_model.onnx", help="Full path to onnx model of net")
|
||||
parser.add_argument("--kernel_r1", type=str, default="dasiamrpn_kernel_r1.onnx", help="Full path to onnx model of kernel_r1")
|
||||
parser.add_argument("--kernel_cls1", type=str, default="dasiamrpn_kernel_cls1.onnx", help="Full path to onnx model of kernel_cls1")
|
||||
parser.add_argument("--input", type=str, help="Full path to input. Do not use if input is camera")
|
||||
args = parser.parse_args()
|
||||
point1 = ()
|
||||
point2 = ()
|
||||
mark = True
|
||||
drawing = False
|
||||
cx, cy, w, h = 0.0, 0.0, 0, 0
|
||||
|
||||
# Fucntion for drawing during videostream
|
||||
def get_bb(event, x, y, flag, param):
|
||||
nonlocal point1, point2, cx, cy, w, h, drawing, mark
|
||||
|
||||
@ -233,12 +248,7 @@ def main():
|
||||
h = abs(point1[1] - point2[1])
|
||||
mark = False
|
||||
|
||||
#loading network`s and kernel`s models
|
||||
net = cv.dnn.readNet(args.net)
|
||||
kernel_r1 = cv.dnn.readNet(args.kernel_r1)
|
||||
kernel_cls1 = cv.dnn.readNet(args.kernel_cls1)
|
||||
|
||||
#initializing bounding box
|
||||
# Creating window for visualization
|
||||
cap = cv.VideoCapture(args.input if args.input else 0)
|
||||
cv.namedWindow("DaSiamRPN")
|
||||
cv.setMouseCallback("DaSiamRPN", get_bb)
|
||||
@ -257,17 +267,17 @@ def main():
|
||||
cv.imshow("DaSiamRPN", twin)
|
||||
cv.waitKey(40)
|
||||
|
||||
target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
|
||||
tracker = DaSiamRPNTracker(frame, target_pos, target_sz, net, kernel_r1, kernel_cls1)
|
||||
init_bb = (cx, cy, w, h)
|
||||
tracker = DaSiamRPNTracker(args.net, args.kernel_r1, args.kernel_cls1)
|
||||
tracker.init(frame, init_bb)
|
||||
|
||||
#tracking loop
|
||||
# Tracking loop
|
||||
while cap.isOpened():
|
||||
has_frame, frame = cap.read()
|
||||
if not has_frame:
|
||||
sys.exit(0)
|
||||
tracker.track(frame)
|
||||
w, h = tracker.target_sz
|
||||
cx, cy = tracker.target_pos
|
||||
_, new_bb = tracker.update(frame)
|
||||
cx, cy, w, h = new_bb
|
||||
cv.rectangle(frame, (int(cx - w // 2), int(cy - h // 2)), (int(cx - w // 2) + int(w), int(cy - h // 2) + int(h)),(0, 255, 255), 3)
|
||||
cv.imshow("DaSiamRPN", frame)
|
||||
key = cv.waitKey(1)
|
||||
|
Loading…
Reference in New Issue
Block a user