Merge pull request #27307 from dkurt:tflite_face_blendshape_model

TFLite fixes for Face Blendshapes V2 #27307 ### Pull Request Readiness Checklist * Scalars support * Better handling of 1D tensors * New ops import: SUB, SQRT, DIV, NEG, SQUARED_DIFFERENCE, SUM * Number of NHWC<->NCHW layouts compatibility improvements resolves #27211 **Merge with extra**: https://github.com/opencv/opencv_extra/pull/1257 See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
2025-08-06 06:26:29 +08:00 · 2025-05-19 10:45:18 +03:00 · 2025-05-19 10:45:18 +03:00 · 1e3ab44cff
commit 1e3ab44cff
parent eae77dae86
6 changed files with 180 additions and 43 deletions
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@ -2015,7 +2015,7 @@ public:
        if( weightsMat.empty() )
        {
            transpose(blobs[0].reshape(1, inpCn), weightsMat);
-            biasesMat = hasBias() ? blobs[1].reshape(1, outCn) : Mat::zeros(outCn, 1, CV_32F);
+            biasesMat = hasBias() ? blobs[1] : Mat::zeros(outCn, 1, CV_32F);
        }

        for (size_t ii = 0; ii < outputs.size(); ii++)
@ -2041,7 +2041,7 @@ public:

                    Mat convMat = convBlob.rowRange(_Range((g + n * ngroups) * inpGroupCn, inpGroupCn));
                    Mat wghtMat = weightsMat.colRange(_Range(g * inpGroupCn, inpGroupCn));
-                    Mat curBiasMat = biasesMat.rowRange(_Range(g * outGroupCn, outGroupCn));
+                    Mat curBiasMat = biasesMat.reshape(1, {outCn, 1}).rowRange(_Range(g * outGroupCn, outGroupCn));

                    //gemm(wghtMat, convMat, 1, colMat, 0, colMat, 0);
                    MatMulInvoker mminvoker(wghtMat, convMat, colMat, nstripes);
--- a/modules/dnn/src/layers/elementwise_layers.cpp
+++ b/modules/dnn/src/layers/elementwise_layers.cpp
@ -257,8 +257,8 @@ public:
        {
            const Mat &src = inputs[i];
            Mat &dst = outputs[i];
-            CV_Assert(src.size == dst.size && src.type() == dst.type() &&
-                      src.isContinuous() && dst.isContinuous() && src.type() == CV_32F);
+            CV_Assert_N(src.size == dst.size, src.type() == dst.type(),
+                      src.isContinuous(), dst.isContinuous(), src.type() == CV_32F);

            const int nstripes = getNumThreads();
            PBody body(func, src, dst, nstripes);
--- a/modules/dnn/src/layers/eltwise_layer.cpp
+++ b/modules/dnn/src/layers/eltwise_layer.cpp
@ -106,7 +106,7 @@ public:
        if (params.has("operation"))
        {
            String operation = toLowerCase(params.get<String>("operation"));
-            if (operation == "prod")
+            if (operation == "prod" || operation == "mul")
                op = PROD;
            else if (operation == "sum")
                op = SUM;
--- a/modules/dnn/src/legacy_backend.hpp
+++ b/modules/dnn/src/legacy_backend.hpp
@ -213,6 +213,7 @@ public:
            {
                reuse(bestBlobPin, lp);
                dst = bestBlob.reshape(1, 1).colRange(0, targetTotal).reshape(1, shape);
+                dst.dims = shape.size();
                return;
            }
        }
--- a/modules/dnn/src/tflite/tflite_importer.cpp
+++ b/modules/dnn/src/tflite/tflite_importer.cpp
@ -72,7 +72,7 @@ private:
    void parseSoftmax(const Operator& op, const std::string& opcode, LayerParams& layerParams);
    void parseCast(const Operator& op, const std::string& opcode, LayerParams& layerParams);
    void parseTranspose(const Operator& op, const std::string& opcode, LayerParams& layerParams);
-    void parseGlobalPooling(const Operator& op, const std::string& opcode, LayerParams& layerParams);
+    void parseReduce(const Operator& op, const std::string& opcode, LayerParams& layerParams);

    void parseFusedActivation(const Operator& op, ActivationFunctionType activ);
    void parseActivation(const Operator& op, const std::string& opcode, LayerParams& layerParams, bool isFused);
@ -81,6 +81,7 @@ private:
    int addReshapeLayer(const std::vector<int>& shape, int axis, int num_axes,
                        const std::string& name, const std::pair<int, int>& inpId, int dtype);
    int addFlattenLayer(int axis, int end_axis, const std::string& name, const std::pair<int, int>& inpId, int dtype);
+    int addConstLayer(const Mat& data, const std::string& name);

    inline bool isInt8(const Operator& op);
    inline void getQuantParams(const Operator& op, float& inpScale, int& inpZero, float& outScale, int& outZero);
@ -88,9 +89,12 @@ private:

 Mat TFLiteImporter::parseTensor(const Tensor& tensor)
 {
+    std::vector<int> shape;
    const auto tensor_shape = tensor.shape();
-    CV_Assert(tensor_shape);
-    std::vector<int> shape(tensor_shape->begin(), tensor_shape->end());
+    if (tensor_shape && tensor_shape->size())
+        shape.assign(tensor_shape->begin(), tensor_shape->end());
+    else
+        shape.resize(1, 1);
    int bufferIdx = tensor.buffer();
    CV_Assert(bufferIdx != 0);  // 0th buffer is a no-data buffer
    const Buffer* buffer = model->buffers()->Get(bufferIdx);
@ -118,7 +122,11 @@ Mat TFLiteImporter::parseTensor(const Tensor& tensor)
    default:
        CV_Error(Error::StsNotImplemented, format("Parse tensor with type %s", EnumNameTensorType(tensor.type())));
    }
-    return shape.empty() ? Mat() : Mat(shape, dtype, const_cast<void*>(data));
+    Mat res = Mat(shape, dtype, const_cast<void*>(data));
+    // workaround for scalars support
+    if (!tensor_shape || shape.size() == 1)
+        res.dims = 1;
+    return res;
 }

 TFLiteImporter::TFLiteImporter(Net& dstNet, const char* modelBuffer, size_t bufSize)
@ -237,6 +245,8 @@ void TFLiteImporter::populateNet()
                    // Dequantize a buffer
                    Mat dataFP32;
                    data.convertTo(dataFP32, CV_32F);
+                    // workaround for scalars support
+                    dataFP32.dims = data.dims;
                    allTensors[op_outputs->Get(0)] = dataFP32;
                    continue;
                }
@ -259,6 +269,11 @@ void TFLiteImporter::populateNet()
            }
            throw;
        }
+        // Uncomment to finish model build aftet specific node
+        // if (op_outputs->Get(0) == 90)
+        // {
+        //     break;
+        // }
    }
 }

@ -270,7 +285,9 @@ TFLiteImporter::DispatchMap TFLiteImporter::buildDispatchMap()

    dispatch["CONV_2D"] = &TFLiteImporter::parseConvolution;
    dispatch["DEPTHWISE_CONV_2D"] = &TFLiteImporter::parseDWConvolution;
-    dispatch["ADD"] = dispatch["MUL"] = &TFLiteImporter::parseEltwise;
+    dispatch["ADD"] = dispatch["MUL"] = dispatch["SUB"] =
+        dispatch["SQRT"] = dispatch["DIV"] = dispatch["NEG"] =
+        dispatch["RSQRT"] = dispatch["SQUARED_DIFFERENCE"] = &TFLiteImporter::parseEltwise;
    dispatch["RELU"] = dispatch["PRELU"] = dispatch["HARD_SWISH"] =
        dispatch["LOGISTIC"] = dispatch["LEAKY_RELU"] = &TFLiteImporter::parseActivation;
    dispatch["MAX_POOL_2D"] = dispatch["AVERAGE_POOL_2D"] = &TFLiteImporter::parsePooling;
@ -290,8 +307,8 @@ TFLiteImporter::DispatchMap TFLiteImporter::buildDispatchMap()
    dispatch["CAST"] = &TFLiteImporter::parseCast;
    dispatch["TFLite_Detection_PostProcess"] = &TFLiteImporter::parseDetectionPostProcess;
    dispatch["TRANSPOSE"] = &TFLiteImporter::parseTranspose;
-    dispatch["MEAN"] = dispatch["REDUCE_MAX"] = &TFLiteImporter::parseGlobalPooling;
    dispatch["STRIDED_SLICE"] = &TFLiteImporter::parseStridedSlice;
+    dispatch["REDUCE_MAX"] = dispatch["MEAN"] = dispatch["SUM"] = &TFLiteImporter::parseReduce;
    return dispatch;
 }

@ -374,6 +391,14 @@ void TFLiteImporter::addLayer(LayerParams& layerParams, const Operator& op) {
 void TFLiteImporter::parseConvolution(const Operator& op, const std::string& opcode, LayerParams& layerParams) {
    layerParams.type = "Convolution";

+    int inpId = op.inputs()->Get(0);
+    if (layouts[inpId] == DNN_LAYOUT_UNKNOWN && modelTensors->Get(inpId)->shape()->size() == 4)
+    {
+        int permId = addPermuteLayer({0, 3, 1, 2}, layerParams.name + "/permute_input", layerIds[inpId], isInt8(op) ? CV_8S : CV_32F);  // NHWC -> NCHW
+        layerIds[inpId] = std::make_pair(permId, 0);
+        layouts[op.outputs()->Get(0)] = DNN_LAYOUT_NHWC;
+    }
+
    auto options = reinterpret_cast<const Conv2DOptions*>(op.builtin_options());
    layerParams.set("pad_mode", EnumNamePadding(options->padding()));
    layerParams.set("stride_w", options->stride_w());
@ -517,8 +542,9 @@ void TFLiteImporter::parsePadding(const Operator& op, const std::string& opcode,
 }

 void TFLiteImporter::parseEltwise(const Operator& op, const std::string& opcode, LayerParams& layerParams) {
+    bool isOpInt8 = isInt8(op);
    ActivationFunctionType activ = ActivationFunctionType_NONE;
-    layerParams.type = "Eltwise";
+    layerParams.type = isOpInt8 ? "Eltwise" : "NaryEltwise";
    if (opcode == "ADD") {
        auto options = reinterpret_cast<const AddOptions*>(op.builtin_options());
        activ = options->fused_activation_function();
@ -527,12 +553,35 @@ void TFLiteImporter::parseEltwise(const Operator& op, const std::string& opcode,
    else if (opcode == "MUL") {
        auto options = reinterpret_cast<const MulOptions*>(op.builtin_options());
        activ = options->fused_activation_function();
-        layerParams.set("operation", "prod");
+        layerParams.set("operation", "mul");
+    }
+    else if (opcode == "DIV") {
+        auto options = reinterpret_cast<const DivOptions*>(op.builtin_options());
+        activ = options->fused_activation_function();
+        layerParams.set("operation", "div");
+    }
+    else if (opcode == "SUB" && !isOpInt8) {
+        auto options = reinterpret_cast<const SubOptions*>(op.builtin_options());
+        activ = options->fused_activation_function();
+        layerParams.set("operation", "sub");
+    }
+    else if (opcode == "NEG") {
+        layerParams.type = "Scale";
+        layerParams.blobs.resize(1, Mat(1, 1, CV_32F, Scalar(-1)));
+    }
+    else if (opcode == "SQUARED_DIFFERENCE" && !isOpInt8) {
+        layerParams.set("operation", "sub");
+    }
+    else if (opcode == "RSQRT" && !isOpInt8) {
+        layerParams.type = "Sqrt";
+    }
+    else if (opcode == "SQRT" && !isOpInt8) {
+        layerParams.type = "Sqrt";
    } else {
-        CV_Error(Error::StsNotImplemented, "Unknown opcode for Eltwise layer: " + opcode);
+        CV_Error(Error::StsNotImplemented, cv::format("DNN/TFLite: Unknown opcode for %s Eltwise layer '%s'", isOpInt8 ? "INT8" : "FP32", opcode.c_str()));
    }

-    if (isInt8(op)) {
+    if (isOpInt8) {
        const Tensor* out = modelTensors->Get(op.outputs()->Get(0));
        float outScale = out->quantization()->scale()->Get(0);
        int outZero = out->quantization()->zero_point()->Get(0);
@ -559,8 +608,35 @@ void TFLiteImporter::parseEltwise(const Operator& op, const std::string& opcode,
        layerParams.set("scales", outScale);
        layerParams.set("zeropoints", outZero);
    }
+
+    // Force all inputs to be in graph, not as blobs
+    for (int idx : *op.inputs()) {
+        if (layerIds.find(idx) != layerIds.end()) {
+            continue;  // Output from a different layer
+        }
+        Mat blob = allTensors[idx];
+        if (layouts[op.inputs()->Get(0)] == DNN_LAYOUT_NHWC && blob.dims == 1) {
+            blob = blob.reshape(1, {1, (int)blob.total(), 1, 1});
+        }
+        int constId = addConstLayer(blob, modelTensors->Get(idx)->name()->str());
+        layerIds[idx] = std::make_pair(constId, 0);
+    }
+
    addLayer(layerParams, op);
    parseFusedActivation(op, activ);
+
+    // Layers that split on multiple operations
+    if (opcode == "SQUARED_DIFFERENCE") {
+        LayerParams lp;
+        lp.set("power", 2);
+        int id = dstNet.addLayerToPrev(layerParams.name + "/square", "Power", isOpInt8 ? CV_8S : CV_32F, lp);
+        layerIds[op.outputs()->Get(0)] = std::make_pair(id, 0);
+    }
+    else if (opcode == "RSQRT") {
+        LayerParams lp;
+        int id = dstNet.addLayerToPrev(layerParams.name + "/inv", "Reciprocal", isOpInt8 ? CV_8S : CV_32F, lp);
+        layerIds[op.outputs()->Get(0)] = std::make_pair(id, 0);
+    }
 }

 void TFLiteImporter::parsePooling(const Operator& op, const std::string& opcode, LayerParams& layerParams) {
@ -654,14 +730,35 @@ void TFLiteImporter::parseConcat(const Operator& op, const std::string& opcode,
    auto options = reinterpret_cast<const ConcatenationOptions*>(op.builtin_options());
    int axis = options->axis();

-    DataLayout inpLayout = layouts[op.inputs()->Get(0)];
-    if (inpLayout == DNN_LAYOUT_NHWC) {
-        // OpenCV works in NCHW data layout. So change the axis correspondingly.
-        axis = normalize_axis(axis, 4);
-        static const int remap[] = {0, 2, 3, 1};
-        axis = remap[axis];
+    bool hasNHWCInput = false;
+    for (int idx : *op.inputs()) {
+        DataLayout inpLayout = layouts[idx];
+        if (inpLayout == DNN_LAYOUT_NHWC) {
+            // OpenCV works in NCHW data layout. So change the axis correspondingly.
+            axis = normalize_axis(axis, 4);
+            static const int remap[] = {0, 2, 3, 1};
+            axis = remap[axis];
+            hasNHWCInput = true;
+            break;
+        }
    }
    layerParams.set("axis", axis);
+
+    // Force all inputs to be in graph, not as blobs
+    for (int idx : *op.inputs()) {
+        if (layerIds.find(idx) != layerIds.end()) {
+            continue;  // Output from a different layer
+        }
+        Mat blob = allTensors[idx];
+        if (hasNHWCInput && blob.dims == 4)
+        {
+            Mat nchwBlob;
+            transposeND(blob, {0, 3, 1, 2}, nchwBlob);
+            blob = nchwBlob;
+        }
+        int constId = addConstLayer(blob, modelTensors->Get(idx)->name()->str());
+        layerIds[idx] = std::make_pair(constId, 0);
+    }
    addLayer(layerParams, op);
    parseFusedActivation(op, options->fused_activation_function());
 }
@ -770,35 +867,38 @@ void TFLiteImporter::parseTranspose(const Operator& op, const std::string& opcod
    addLayer(layerParams, op);
 }

-void TFLiteImporter::parseGlobalPooling(const Operator& op, const std::string& opcode, LayerParams& layerParams)
+void TFLiteImporter::parseReduce(const Operator& op, const std::string& opcode, LayerParams& layerParams)
 {
-    layerParams.type = "Pooling";
-    if(opcode == "MEAN") {
-        layerParams.set("pool", "ave");
+    layerParams.type = "Reduce";
+    if (opcode == "REDUCE_MAX") {
+        layerParams.set("reduce", "max");
    }
-    else if (opcode == "REDUCE_MAX") {
-        layerParams.set("pool", "max");
+    else if (opcode == "SUM") {
+        layerParams.set("reduce", "sum");
+    }
+    else if (opcode == "MEAN") {
+        layerParams.set("reduce", "mean");
    }
    else {
-        CV_Error(Error::StsNotImplemented, "Unsupported pooling " + opcode);
+        CV_Error(Error::StsNotImplemented, "Unsupported reducing " + opcode);
    }
-    layerParams.set("global_pooling", true);
    auto options = op.builtin_options_as_ReducerOptions();
-    bool keep_dims = options->keep_dims();
+    layerParams.set("keepdims", options->keep_dims());

-    if (!keep_dims) {
-        const auto name = layerParams.name;
-        layerParams.name += "/global_pooling";
-        addLayer(layerParams, op);
+    Mat axes = allTensors[op.inputs()->Get(1)].clone();
+    CV_CheckTypeEQ(axes.type(), CV_32S, "");

-        int out = op.outputs()->Get(0);
-        auto outId = layerIds[out];
-        int flattenId = addFlattenLayer(1, -1, name, outId, isInt8(op) ? CV_8S : CV_32F);
-        layerIds[out] = std::make_pair(flattenId, 0);
-    }
-    else {
-        addLayer(layerParams, op);
+    DataLayout inpLayout = layouts[op.inputs()->Get(0)];
+    if (inpLayout == DNN_LAYOUT_NHWC) {
+        static const int remap[] = {0, 2, 3, 1};
+        // OpenCV works in NCHW data layout. So change the axis correspondingly.
+        for (int i = 0; i < axes.total(); ++i) {
+            axes.at<int>(i) = remap[normalize_axis(axes.at<int>(i), 4)];
+        }
    }
+
+    layerParams.set("axes", DictValue::arrayInt(axes.ptr<int>(), axes.total()));
+    addLayer(layerParams, op);
 }

 int TFLiteImporter::addPermuteLayer(const std::vector<int>& order, const std::string& permName,
@ -833,6 +933,13 @@ int TFLiteImporter::addFlattenLayer(int axis, int end_axis, const std::string& n
    return id;
 }

+int TFLiteImporter::addConstLayer(const Mat& blob, const std::string& name)
+{
+    LayerParams lp;
+    lp.blobs.push_back(blob.u ? blob : blob.clone());  // some tensors are owned by OpenCV
+    return dstNet.addLayer(name, "Const", lp);
+}
+
 void TFLiteImporter::parseDeconvolution(const Operator& op, const std::string& opcode, LayerParams& layerParams) {
    layerParams.type = "Deconvolution";

@ -928,8 +1035,7 @@ void TFLiteImporter::parseStridedSlice(const Operator& op, const std::string& op
    int endMask = options->end_mask();
    if (options->new_axis_mask())
        CV_Error(Error::StsNotImplemented, "New axis during StridedSlice");
-    if (options->shrink_axis_mask())
-        CV_Error(Error::StsNotImplemented, "Shrink axis during StridedSlice");
+    int shrinkMask = options->shrink_axis_mask();

    Mat begins = allTensors[op.inputs()->Get(1)];
    Mat ends = allTensors[op.inputs()->Get(2)];
@ -958,7 +1064,30 @@ void TFLiteImporter::parseStridedSlice(const Operator& op, const std::string& op
    layerParams.set("begin", DictValue::arrayInt((int*)begins.data, begins.total()));
    layerParams.set("end", DictValue::arrayInt((int*)ends.data, ends.total()));
    layerParams.set("steps", DictValue::arrayInt((int*)strides.data, strides.total()));
+
+    int lastShrinkAxis = -1;
+    for (int axis = 0; axis < num; ++axis)
+    {
+        if (shrinkMask & (1 << axis))
+            lastShrinkAxis = axis;
+    }
+    std::string layerName = layerParams.name;
+    if (lastShrinkAxis != -1)
+    {
+        layerParams.name += "/slice";
+    }
+
    addLayer(layerParams, op);
+
+    for (int axis = 0; axis < num; ++axis)
+    {
+        if (!(shrinkMask & (1 << axis)))
+            continue;
+        std::string name = (axis == lastShrinkAxis) ? layerName : format("%s/shrink_axis_%d", layerName.c_str(), axis);
+        int layerId = addFlattenLayer(axis, axis + 1, name,
+            layerIds[op.outputs()->Get(0)], isInt8(op) ? CV_8S : CV_32F);
+        layerIds[op.inputs()->Get(0)] = std::make_pair(layerId, 0);
+    }
 }

 void TFLiteImporter::parseFullyConnected(const Operator& op, const std::string& opcode, LayerParams& layerParams) {
--- a/modules/dnn/test/test_tflite_importer.cpp
+++ b/modules/dnn/test/test_tflite_importer.cpp
@ -57,6 +57,7 @@ void Test_TFLite::testModel(Net& net, const std::string& modelName, const Mat& i

    ASSERT_EQ(outs.size(), outNames.size());
    for (int i = 0; i < outNames.size(); ++i) {
+        std::replace(outNames[i].begin(), outNames[i].end(), ':', '_');
        Mat ref = blobFromNPY(findDataFile(format("dnn/tflite/%s_out_%s.npy", modelName.c_str(), outNames[i].c_str())));
        // A workaround solution for the following cases due to inconsistent shape definitions.
        // The details please see: https://github.com/opencv/opencv/pull/25297#issuecomment-2039081369
@ -276,6 +277,12 @@ TEST_P(Test_TFLite, StridedSlice) {
    testLayer("strided_slice");
 }

+TEST_P(Test_TFLite, face_blendshapes)
+{
+    Mat inp = blobFromNPY(findDataFile("dnn/tflite/face_blendshapes_inp.npy"));
+    testModel("face_blendshapes", inp);
+}
+
 INSTANTIATE_TEST_CASE_P(/**/, Test_TFLite, dnnBackendsAndTargets());

 }}  // namespace