mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00
Merge pull request #24004 from dkurt:tflite_new_layers
[TFLite] Pack layer and other fixes for SSD from Keras #24004 ### Pull Request Readiness Checklist resolves https://github.com/opencv/opencv/issues/23992 **Merge with extra**: https://github.com/opencv/opencv_extra/pull/1076 See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
1794cdc03c
commit
e41ba90f17
@ -59,6 +59,7 @@ private:
|
|||||||
void parseUnpooling(const Operator& op, const std::string& opcode, LayerParams& layerParams);
|
void parseUnpooling(const Operator& op, const std::string& opcode, LayerParams& layerParams);
|
||||||
void parseReshape(const Operator& op, const std::string& opcode, LayerParams& layerParams);
|
void parseReshape(const Operator& op, const std::string& opcode, LayerParams& layerParams);
|
||||||
void parseConcat(const Operator& op, const std::string& opcode, LayerParams& layerParams);
|
void parseConcat(const Operator& op, const std::string& opcode, LayerParams& layerParams);
|
||||||
|
void parsePack(const Operator& op, const std::string& opcode, LayerParams& layerParams);
|
||||||
void parseResize(const Operator& op, const std::string& opcode, LayerParams& layerParams);
|
void parseResize(const Operator& op, const std::string& opcode, LayerParams& layerParams);
|
||||||
void parseDeconvolution(const Operator& op, const std::string& opcode, LayerParams& layerParams);
|
void parseDeconvolution(const Operator& op, const std::string& opcode, LayerParams& layerParams);
|
||||||
void parseQuantize(const Operator& op, const std::string& opcode, LayerParams& layerParams);
|
void parseQuantize(const Operator& op, const std::string& opcode, LayerParams& layerParams);
|
||||||
@ -70,6 +71,8 @@ private:
|
|||||||
void parseActivation(const Operator& op, const std::string& opcode, LayerParams& layerParams, bool isFused);
|
void parseActivation(const Operator& op, const std::string& opcode, LayerParams& layerParams, bool isFused);
|
||||||
void addLayer(LayerParams& layerParams, const Operator& op);
|
void addLayer(LayerParams& layerParams, const Operator& op);
|
||||||
int addPermuteLayer(const std::vector<int>& order, const std::string& permName, const std::pair<int, int>& inpId, int dtype);
|
int addPermuteLayer(const std::vector<int>& order, const std::string& permName, const std::pair<int, int>& inpId, int dtype);
|
||||||
|
int addReshapeLayer(const std::vector<int>& shape, int axis, int num_axes,
|
||||||
|
const std::string& name, const std::pair<int, int>& inpId, int dtype);
|
||||||
inline bool isInt8(const Operator& op);
|
inline bool isInt8(const Operator& op);
|
||||||
inline void getQuantParams(const Operator& op, float& inpScale, int& inpZero, float& outScale, int& outZero);
|
inline void getQuantParams(const Operator& op, float& inpScale, int& inpZero, float& outScale, int& outZero);
|
||||||
};
|
};
|
||||||
@ -267,6 +270,7 @@ TFLiteImporter::DispatchMap TFLiteImporter::buildDispatchMap()
|
|||||||
dispatch["PAD"] = &TFLiteImporter::parsePadding;
|
dispatch["PAD"] = &TFLiteImporter::parsePadding;
|
||||||
dispatch["RESHAPE"] = &TFLiteImporter::parseReshape;
|
dispatch["RESHAPE"] = &TFLiteImporter::parseReshape;
|
||||||
dispatch["CONCATENATION"] = &TFLiteImporter::parseConcat;
|
dispatch["CONCATENATION"] = &TFLiteImporter::parseConcat;
|
||||||
|
dispatch["PACK"] = &TFLiteImporter::parsePack;
|
||||||
dispatch["RESIZE_BILINEAR"] = dispatch["RESIZE_NEAREST_NEIGHBOR"] = &TFLiteImporter::parseResize;
|
dispatch["RESIZE_BILINEAR"] = dispatch["RESIZE_NEAREST_NEIGHBOR"] = &TFLiteImporter::parseResize;
|
||||||
dispatch["Convolution2DTransposeBias"] = &TFLiteImporter::parseDeconvolution;
|
dispatch["Convolution2DTransposeBias"] = &TFLiteImporter::parseDeconvolution;
|
||||||
dispatch["QUANTIZE"] = &TFLiteImporter::parseQuantize;
|
dispatch["QUANTIZE"] = &TFLiteImporter::parseQuantize;
|
||||||
@ -596,16 +600,6 @@ void TFLiteImporter::parseUnpooling(const Operator& op, const std::string& opcod
|
|||||||
void TFLiteImporter::parseReshape(const Operator& op, const std::string& opcode, LayerParams& layerParams) {
|
void TFLiteImporter::parseReshape(const Operator& op, const std::string& opcode, LayerParams& layerParams) {
|
||||||
DataLayout inpLayout = layouts[op.inputs()->Get(0)];
|
DataLayout inpLayout = layouts[op.inputs()->Get(0)];
|
||||||
|
|
||||||
if (inpLayout == DNN_LAYOUT_NHWC) {
|
|
||||||
// Permute to NCHW
|
|
||||||
std::vector<int> order = {0, 2, 3, 1};
|
|
||||||
const std::string name = layerParams.name + "/permute";
|
|
||||||
auto inpId = layerIds[op.inputs()->Get(0)];
|
|
||||||
int permId = addPermuteLayer(order, name, inpId, isInt8(op) ? CV_8S : CV_32F); // NCHW -> NHWC
|
|
||||||
layerIds[op.inputs()->Get(0)] = std::make_pair(permId, 0);
|
|
||||||
layouts[op.outputs()->Get(0)] = DNN_LAYOUT_NCHW;
|
|
||||||
}
|
|
||||||
|
|
||||||
layerParams.type = "Reshape";
|
layerParams.type = "Reshape";
|
||||||
std::vector<int> shape;
|
std::vector<int> shape;
|
||||||
if (op.inputs()->size() > 1) {
|
if (op.inputs()->size() > 1) {
|
||||||
@ -615,6 +609,22 @@ void TFLiteImporter::parseReshape(const Operator& op, const std::string& opcode,
|
|||||||
CV_Assert(options);
|
CV_Assert(options);
|
||||||
shape.assign(options->new_shape()->begin(), options->new_shape()->end());
|
shape.assign(options->new_shape()->begin(), options->new_shape()->end());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (inpLayout == DNN_LAYOUT_NHWC) {
|
||||||
|
if (shape.size() == 4) {
|
||||||
|
// Keep data but change a shape to OpenCV's NCHW order
|
||||||
|
std::swap(shape[2], shape[3]);
|
||||||
|
std::swap(shape[1], shape[2]);
|
||||||
|
} else {
|
||||||
|
// Permute to NCHW entire data and reshape to given a shape
|
||||||
|
std::vector<int> order = {0, 2, 3, 1};
|
||||||
|
const std::string name = layerParams.name + "/permute";
|
||||||
|
auto inpId = layerIds[op.inputs()->Get(0)];
|
||||||
|
int permId = addPermuteLayer(order, name, inpId, isInt8(op) ? CV_8S : CV_32F); // NCHW -> NHWC
|
||||||
|
layerIds[op.inputs()->Get(0)] = std::make_pair(permId, 0);
|
||||||
|
layouts[op.outputs()->Get(0)] = DNN_LAYOUT_NCHW;
|
||||||
|
}
|
||||||
|
}
|
||||||
layerParams.set("dim", DictValue::arrayInt<int*>(shape.data(), shape.size()));
|
layerParams.set("dim", DictValue::arrayInt<int*>(shape.data(), shape.size()));
|
||||||
addLayer(layerParams, op);
|
addLayer(layerParams, op);
|
||||||
}
|
}
|
||||||
@ -636,6 +646,47 @@ void TFLiteImporter::parseConcat(const Operator& op, const std::string& opcode,
|
|||||||
parseFusedActivation(op, options->fused_activation_function());
|
parseFusedActivation(op, options->fused_activation_function());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void TFLiteImporter::parsePack(const Operator& op, const std::string& opcode, LayerParams& layerParams) {
|
||||||
|
auto options = reinterpret_cast<const PackOptions*>(op.builtin_options());
|
||||||
|
int axis = options->axis();
|
||||||
|
|
||||||
|
DataLayout inpLayout = layouts[op.inputs()->Get(0)];
|
||||||
|
if (inpLayout == DNN_LAYOUT_NHWC) {
|
||||||
|
// OpenCV works in NCHW data layout. So change the axis correspondingly.
|
||||||
|
axis = normalize_axis(axis, 5); // 5 because Pack adds a new axis so -1 would mean 4
|
||||||
|
static const int remap[] = {0, 1, 3, 4, 2};
|
||||||
|
axis = remap[axis];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Replace Pack layer to Reshape + Concat
|
||||||
|
// Use a set because there are models which replicate single layer data by Pack.
|
||||||
|
std::set<int> op_inputs(op.inputs()->begin(), op.inputs()->end());
|
||||||
|
std::map<int, std::pair<int, int> > originLayerIds;
|
||||||
|
for (int inp : op_inputs) {
|
||||||
|
auto inpId = layerIds[inp];
|
||||||
|
int dims = modelTensors->Get(inp)->shape()->size();
|
||||||
|
|
||||||
|
std::vector<int> shape{1, -1};
|
||||||
|
if (axis == dims) {
|
||||||
|
std::swap(shape[0], shape[1]);
|
||||||
|
}
|
||||||
|
const auto name = modelTensors->Get(inp)->name()->str() + "/reshape";
|
||||||
|
int reshapeId = addReshapeLayer(shape, axis == dims ? dims - 1 : axis, 1,
|
||||||
|
name, inpId, isInt8(op) ? CV_8S : CV_32F);
|
||||||
|
|
||||||
|
originLayerIds[inp] = layerIds[inp];
|
||||||
|
layerIds[inp] = std::make_pair(reshapeId, 0);
|
||||||
|
}
|
||||||
|
layerParams.type = "Concat";
|
||||||
|
layerParams.set("axis", axis);
|
||||||
|
addLayer(layerParams, op);
|
||||||
|
|
||||||
|
// Restore origin layer inputs
|
||||||
|
for (const auto& ids : originLayerIds) {
|
||||||
|
layerIds[ids.first] = ids.second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void TFLiteImporter::parseResize(const Operator& op, const std::string& opcode, LayerParams& layerParams) {
|
void TFLiteImporter::parseResize(const Operator& op, const std::string& opcode, LayerParams& layerParams) {
|
||||||
layerParams.type = "Resize";
|
layerParams.type = "Resize";
|
||||||
|
|
||||||
@ -666,6 +717,18 @@ int TFLiteImporter::addPermuteLayer(const std::vector<int>& order, const std::st
|
|||||||
return permId;
|
return permId;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int TFLiteImporter::addReshapeLayer(const std::vector<int>& shape, int axis, int num_axes,
|
||||||
|
const std::string& name, const std::pair<int, int>& inpId, int dtype)
|
||||||
|
{
|
||||||
|
LayerParams lp;
|
||||||
|
lp.set("axis", axis);
|
||||||
|
lp.set("dim", DictValue::arrayInt<const int*>(shape.data(), shape.size()));
|
||||||
|
lp.set("num_axes", num_axes);
|
||||||
|
int id = dstNet.addLayer(name, "Reshape", dtype, lp);
|
||||||
|
dstNet.connect(inpId.first, inpId.second, id, 0);
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
void TFLiteImporter::parseDeconvolution(const Operator& op, const std::string& opcode, LayerParams& layerParams) {
|
void TFLiteImporter::parseDeconvolution(const Operator& op, const std::string& opcode, LayerParams& layerParams) {
|
||||||
layerParams.type = "Deconvolution";
|
layerParams.type = "Deconvolution";
|
||||||
|
|
||||||
@ -771,6 +834,8 @@ void TFLiteImporter::parseDetectionPostProcess(const Operator& op, const std::st
|
|||||||
parameters[keys[i]] = *reinterpret_cast<const uint32_t*>(data + offset + i * 4);
|
parameters[keys[i]] = *reinterpret_cast<const uint32_t*>(data + offset + i * 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
parameters["num_classes"] = modelTensors->Get(op.inputs()->Get(1))->shape()->Get(2);
|
||||||
|
|
||||||
layerParams.type = "DetectionOutput";
|
layerParams.type = "DetectionOutput";
|
||||||
layerParams.set("num_classes", parameters["num_classes"]);
|
layerParams.set("num_classes", parameters["num_classes"]);
|
||||||
layerParams.set("share_location", true);
|
layerParams.set("share_location", true);
|
||||||
@ -780,7 +845,6 @@ void TFLiteImporter::parseDetectionPostProcess(const Operator& op, const std::st
|
|||||||
layerParams.set("top_k", parameters["max_detections"]);
|
layerParams.set("top_k", parameters["max_detections"]);
|
||||||
layerParams.set("keep_top_k", parameters["max_detections"]);
|
layerParams.set("keep_top_k", parameters["max_detections"]);
|
||||||
layerParams.set("code_type", "CENTER_SIZE");
|
layerParams.set("code_type", "CENTER_SIZE");
|
||||||
layerParams.set("variance_encoded_in_target", true);
|
|
||||||
layerParams.set("loc_pred_transposed", true);
|
layerParams.set("loc_pred_transposed", true);
|
||||||
|
|
||||||
// Replace third input from tensor to Const layer with the priors
|
// Replace third input from tensor to Const layer with the priors
|
||||||
@ -796,10 +860,27 @@ void TFLiteImporter::parseDetectionPostProcess(const Operator& op, const std::st
|
|||||||
priors.col(2) = priors.col(0) + priors.col(3);
|
priors.col(2) = priors.col(0) + priors.col(3);
|
||||||
priors.col(3) = priors.col(1) + tmp;
|
priors.col(3) = priors.col(1) + tmp;
|
||||||
|
|
||||||
|
float x_scale = *(float*)¶meters["x_scale"];
|
||||||
|
float y_scale = *(float*)¶meters["y_scale"];
|
||||||
|
float w_scale = *(float*)¶meters["w_scale"];
|
||||||
|
float h_scale = *(float*)¶meters["h_scale"];
|
||||||
|
if (x_scale != 1.0f || y_scale != 1.0f || w_scale != 1.0f || h_scale != 1.0f) {
|
||||||
|
int numPriors = priors.rows;
|
||||||
|
priors.resize(numPriors * 2);
|
||||||
|
Mat_<float> scales({1, 4}, {1.f / x_scale, 1.f / y_scale,
|
||||||
|
1.f / w_scale, 1.f / h_scale});
|
||||||
|
repeat(scales, numPriors, 1, priors.rowRange(numPriors, priors.rows));
|
||||||
|
priors = priors.reshape(1, {1, 2, (int)priors.total() / 2});
|
||||||
|
layerParams.set("variance_encoded_in_target", false);
|
||||||
|
} else {
|
||||||
|
priors = priors.reshape(1, {1, 1, (int)priors.total()});
|
||||||
|
layerParams.set("variance_encoded_in_target", true);
|
||||||
|
}
|
||||||
|
|
||||||
LayerParams priorsLP;
|
LayerParams priorsLP;
|
||||||
priorsLP.name = layerParams.name + "/priors";
|
priorsLP.name = layerParams.name + "/priors";
|
||||||
priorsLP.type = "Const";
|
priorsLP.type = "Const";
|
||||||
priorsLP.blobs.resize(1, priors.reshape(1, {1, 1, (int)priors.total()}));
|
priorsLP.blobs.resize(1, priors);
|
||||||
|
|
||||||
int priorsId = dstNet.addLayer(priorsLP.name, priorsLP.type, priorsLP);
|
int priorsId = dstNet.addLayer(priorsLP.name, priorsLP.type, priorsLP);
|
||||||
layerIds[op.inputs()->Get(2)] = std::make_pair(priorsId, 0);
|
layerIds[op.inputs()->Get(2)] = std::make_pair(priorsId, 0);
|
||||||
|
@ -31,9 +31,8 @@ void testInputShapes(const Net& net, const std::vector<Mat>& inps) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void testModel(const std::string& modelName, const Mat& input, double l1 = 1e-5, double lInf = 1e-4)
|
void testModel(Net& net, const std::string& modelName, const Mat& input, double l1 = 1e-5, double lInf = 1e-4)
|
||||||
{
|
{
|
||||||
Net net = readNet(findDataFile("dnn/tflite/" + modelName + ".tflite", false));
|
|
||||||
testInputShapes(net, {input});
|
testInputShapes(net, {input});
|
||||||
net.setInput(input);
|
net.setInput(input);
|
||||||
|
|
||||||
@ -49,6 +48,12 @@ void testModel(const std::string& modelName, const Mat& input, double l1 = 1e-5,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void testModel(const std::string& modelName, const Mat& input, double l1 = 1e-5, double lInf = 1e-4)
|
||||||
|
{
|
||||||
|
Net net = readNet(findDataFile("dnn/tflite/" + modelName + ".tflite", false));
|
||||||
|
testModel(net, modelName, input, l1, lInf);
|
||||||
|
}
|
||||||
|
|
||||||
void testModel(const std::string& modelName, const Size& inpSize, double l1 = 1e-5, double lInf = 1e-4)
|
void testModel(const std::string& modelName, const Size& inpSize, double l1 = 1e-5, double lInf = 1e-4)
|
||||||
{
|
{
|
||||||
Mat input = imread(findDataFile("cv/shared/lena.png"));
|
Mat input = imread(findDataFile("cv/shared/lena.png"));
|
||||||
@ -56,6 +61,13 @@ void testModel(const std::string& modelName, const Size& inpSize, double l1 = 1e
|
|||||||
testModel(modelName, input, l1, lInf);
|
testModel(modelName, input, l1, lInf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void testLayer(const std::string& modelName, double l1 = 1e-5, double lInf = 1e-4)
|
||||||
|
{
|
||||||
|
Mat inp = blobFromNPY(findDataFile("dnn/tflite/" + modelName + "_inp.npy"));
|
||||||
|
Net net = readNet(findDataFile("dnn/tflite/" + modelName + ".tflite"));
|
||||||
|
testModel(net, modelName, inp, l1, lInf);
|
||||||
|
}
|
||||||
|
|
||||||
// https://google.github.io/mediapipe/solutions/face_mesh
|
// https://google.github.io/mediapipe/solutions/face_mesh
|
||||||
TEST(Test_TFLite, face_landmark)
|
TEST(Test_TFLite, face_landmark)
|
||||||
{
|
{
|
||||||
@ -146,6 +158,10 @@ TEST(Test_TFLite, EfficientDet_int8) {
|
|||||||
normAssertDetections(ref, out, "", 0.5, 0.05, 0.1);
|
normAssertDetections(ref, out, "", 0.5, 0.05, 0.1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(Test_TFLite, replicate_by_pack) {
|
||||||
|
testLayer("replicate_by_pack");
|
||||||
|
}
|
||||||
|
|
||||||
}} // namespace
|
}} // namespace
|
||||||
|
|
||||||
#endif // OPENCV_TEST_DNN_TFLITE
|
#endif // OPENCV_TEST_DNN_TFLITE
|
||||||
|
Loading…
Reference in New Issue
Block a user