Merge pull request #27307 from dkurt:tflite_face_blendshape_model

TFLite fixes for Face Blendshapes V2 #27307

### Pull Request Readiness Checklist

* Scalars support
* Better handling of 1D tensors
* New ops import: SUB, SQRT, DIV, NEG, SQUARED_DIFFERENCE, SUM
* Number of NHWC<->NCHW layouts compatibility improvements

resolves #27211

**Merge with extra**: https://github.com/opencv/opencv_extra/pull/1257

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
Dmitry Kurtaev 2025-05-19 10:45:18 +03:00 committed by GitHub
parent eae77dae86
commit 1e3ab44cff
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 180 additions and 43 deletions

View File

@ -2015,7 +2015,7 @@ public:
if( weightsMat.empty() )
{
transpose(blobs[0].reshape(1, inpCn), weightsMat);
biasesMat = hasBias() ? blobs[1].reshape(1, outCn) : Mat::zeros(outCn, 1, CV_32F);
biasesMat = hasBias() ? blobs[1] : Mat::zeros(outCn, 1, CV_32F);
}
for (size_t ii = 0; ii < outputs.size(); ii++)
@ -2041,7 +2041,7 @@ public:
Mat convMat = convBlob.rowRange(_Range((g + n * ngroups) * inpGroupCn, inpGroupCn));
Mat wghtMat = weightsMat.colRange(_Range(g * inpGroupCn, inpGroupCn));
Mat curBiasMat = biasesMat.rowRange(_Range(g * outGroupCn, outGroupCn));
Mat curBiasMat = biasesMat.reshape(1, {outCn, 1}).rowRange(_Range(g * outGroupCn, outGroupCn));
//gemm(wghtMat, convMat, 1, colMat, 0, colMat, 0);
MatMulInvoker mminvoker(wghtMat, convMat, colMat, nstripes);

View File

@ -257,8 +257,8 @@ public:
{
const Mat &src = inputs[i];
Mat &dst = outputs[i];
CV_Assert(src.size == dst.size && src.type() == dst.type() &&
src.isContinuous() && dst.isContinuous() && src.type() == CV_32F);
CV_Assert_N(src.size == dst.size, src.type() == dst.type(),
src.isContinuous(), dst.isContinuous(), src.type() == CV_32F);
const int nstripes = getNumThreads();
PBody body(func, src, dst, nstripes);

View File

@ -106,7 +106,7 @@ public:
if (params.has("operation"))
{
String operation = toLowerCase(params.get<String>("operation"));
if (operation == "prod")
if (operation == "prod" || operation == "mul")
op = PROD;
else if (operation == "sum")
op = SUM;

View File

@ -213,6 +213,7 @@ public:
{
reuse(bestBlobPin, lp);
dst = bestBlob.reshape(1, 1).colRange(0, targetTotal).reshape(1, shape);
dst.dims = shape.size();
return;
}
}

View File

@ -72,7 +72,7 @@ private:
void parseSoftmax(const Operator& op, const std::string& opcode, LayerParams& layerParams);
void parseCast(const Operator& op, const std::string& opcode, LayerParams& layerParams);
void parseTranspose(const Operator& op, const std::string& opcode, LayerParams& layerParams);
void parseGlobalPooling(const Operator& op, const std::string& opcode, LayerParams& layerParams);
void parseReduce(const Operator& op, const std::string& opcode, LayerParams& layerParams);
void parseFusedActivation(const Operator& op, ActivationFunctionType activ);
void parseActivation(const Operator& op, const std::string& opcode, LayerParams& layerParams, bool isFused);
@ -81,6 +81,7 @@ private:
int addReshapeLayer(const std::vector<int>& shape, int axis, int num_axes,
const std::string& name, const std::pair<int, int>& inpId, int dtype);
int addFlattenLayer(int axis, int end_axis, const std::string& name, const std::pair<int, int>& inpId, int dtype);
int addConstLayer(const Mat& data, const std::string& name);
inline bool isInt8(const Operator& op);
inline void getQuantParams(const Operator& op, float& inpScale, int& inpZero, float& outScale, int& outZero);
@ -88,9 +89,12 @@ private:
Mat TFLiteImporter::parseTensor(const Tensor& tensor)
{
std::vector<int> shape;
const auto tensor_shape = tensor.shape();
CV_Assert(tensor_shape);
std::vector<int> shape(tensor_shape->begin(), tensor_shape->end());
if (tensor_shape && tensor_shape->size())
shape.assign(tensor_shape->begin(), tensor_shape->end());
else
shape.resize(1, 1);
int bufferIdx = tensor.buffer();
CV_Assert(bufferIdx != 0); // 0th buffer is a no-data buffer
const Buffer* buffer = model->buffers()->Get(bufferIdx);
@ -118,7 +122,11 @@ Mat TFLiteImporter::parseTensor(const Tensor& tensor)
default:
CV_Error(Error::StsNotImplemented, format("Parse tensor with type %s", EnumNameTensorType(tensor.type())));
}
return shape.empty() ? Mat() : Mat(shape, dtype, const_cast<void*>(data));
Mat res = Mat(shape, dtype, const_cast<void*>(data));
// workaround for scalars support
if (!tensor_shape || shape.size() == 1)
res.dims = 1;
return res;
}
TFLiteImporter::TFLiteImporter(Net& dstNet, const char* modelBuffer, size_t bufSize)
@ -237,6 +245,8 @@ void TFLiteImporter::populateNet()
// Dequantize a buffer
Mat dataFP32;
data.convertTo(dataFP32, CV_32F);
// workaround for scalars support
dataFP32.dims = data.dims;
allTensors[op_outputs->Get(0)] = dataFP32;
continue;
}
@ -259,6 +269,11 @@ void TFLiteImporter::populateNet()
}
throw;
}
// Uncomment to finish model build aftet specific node
// if (op_outputs->Get(0) == 90)
// {
// break;
// }
}
}
@ -270,7 +285,9 @@ TFLiteImporter::DispatchMap TFLiteImporter::buildDispatchMap()
dispatch["CONV_2D"] = &TFLiteImporter::parseConvolution;
dispatch["DEPTHWISE_CONV_2D"] = &TFLiteImporter::parseDWConvolution;
dispatch["ADD"] = dispatch["MUL"] = &TFLiteImporter::parseEltwise;
dispatch["ADD"] = dispatch["MUL"] = dispatch["SUB"] =
dispatch["SQRT"] = dispatch["DIV"] = dispatch["NEG"] =
dispatch["RSQRT"] = dispatch["SQUARED_DIFFERENCE"] = &TFLiteImporter::parseEltwise;
dispatch["RELU"] = dispatch["PRELU"] = dispatch["HARD_SWISH"] =
dispatch["LOGISTIC"] = dispatch["LEAKY_RELU"] = &TFLiteImporter::parseActivation;
dispatch["MAX_POOL_2D"] = dispatch["AVERAGE_POOL_2D"] = &TFLiteImporter::parsePooling;
@ -290,8 +307,8 @@ TFLiteImporter::DispatchMap TFLiteImporter::buildDispatchMap()
dispatch["CAST"] = &TFLiteImporter::parseCast;
dispatch["TFLite_Detection_PostProcess"] = &TFLiteImporter::parseDetectionPostProcess;
dispatch["TRANSPOSE"] = &TFLiteImporter::parseTranspose;
dispatch["MEAN"] = dispatch["REDUCE_MAX"] = &TFLiteImporter::parseGlobalPooling;
dispatch["STRIDED_SLICE"] = &TFLiteImporter::parseStridedSlice;
dispatch["REDUCE_MAX"] = dispatch["MEAN"] = dispatch["SUM"] = &TFLiteImporter::parseReduce;
return dispatch;
}
@ -374,6 +391,14 @@ void TFLiteImporter::addLayer(LayerParams& layerParams, const Operator& op) {
void TFLiteImporter::parseConvolution(const Operator& op, const std::string& opcode, LayerParams& layerParams) {
layerParams.type = "Convolution";
int inpId = op.inputs()->Get(0);
if (layouts[inpId] == DNN_LAYOUT_UNKNOWN && modelTensors->Get(inpId)->shape()->size() == 4)
{
int permId = addPermuteLayer({0, 3, 1, 2}, layerParams.name + "/permute_input", layerIds[inpId], isInt8(op) ? CV_8S : CV_32F); // NHWC -> NCHW
layerIds[inpId] = std::make_pair(permId, 0);
layouts[op.outputs()->Get(0)] = DNN_LAYOUT_NHWC;
}
auto options = reinterpret_cast<const Conv2DOptions*>(op.builtin_options());
layerParams.set("pad_mode", EnumNamePadding(options->padding()));
layerParams.set("stride_w", options->stride_w());
@ -517,8 +542,9 @@ void TFLiteImporter::parsePadding(const Operator& op, const std::string& opcode,
}
void TFLiteImporter::parseEltwise(const Operator& op, const std::string& opcode, LayerParams& layerParams) {
bool isOpInt8 = isInt8(op);
ActivationFunctionType activ = ActivationFunctionType_NONE;
layerParams.type = "Eltwise";
layerParams.type = isOpInt8 ? "Eltwise" : "NaryEltwise";
if (opcode == "ADD") {
auto options = reinterpret_cast<const AddOptions*>(op.builtin_options());
activ = options->fused_activation_function();
@ -527,12 +553,35 @@ void TFLiteImporter::parseEltwise(const Operator& op, const std::string& opcode,
else if (opcode == "MUL") {
auto options = reinterpret_cast<const MulOptions*>(op.builtin_options());
activ = options->fused_activation_function();
layerParams.set("operation", "prod");
layerParams.set("operation", "mul");
}
else if (opcode == "DIV") {
auto options = reinterpret_cast<const DivOptions*>(op.builtin_options());
activ = options->fused_activation_function();
layerParams.set("operation", "div");
}
else if (opcode == "SUB" && !isOpInt8) {
auto options = reinterpret_cast<const SubOptions*>(op.builtin_options());
activ = options->fused_activation_function();
layerParams.set("operation", "sub");
}
else if (opcode == "NEG") {
layerParams.type = "Scale";
layerParams.blobs.resize(1, Mat(1, 1, CV_32F, Scalar(-1)));
}
else if (opcode == "SQUARED_DIFFERENCE" && !isOpInt8) {
layerParams.set("operation", "sub");
}
else if (opcode == "RSQRT" && !isOpInt8) {
layerParams.type = "Sqrt";
}
else if (opcode == "SQRT" && !isOpInt8) {
layerParams.type = "Sqrt";
} else {
CV_Error(Error::StsNotImplemented, "Unknown opcode for Eltwise layer: " + opcode);
CV_Error(Error::StsNotImplemented, cv::format("DNN/TFLite: Unknown opcode for %s Eltwise layer '%s'", isOpInt8 ? "INT8" : "FP32", opcode.c_str()));
}
if (isInt8(op)) {
if (isOpInt8) {
const Tensor* out = modelTensors->Get(op.outputs()->Get(0));
float outScale = out->quantization()->scale()->Get(0);
int outZero = out->quantization()->zero_point()->Get(0);
@ -559,8 +608,35 @@ void TFLiteImporter::parseEltwise(const Operator& op, const std::string& opcode,
layerParams.set("scales", outScale);
layerParams.set("zeropoints", outZero);
}
// Force all inputs to be in graph, not as blobs
for (int idx : *op.inputs()) {
if (layerIds.find(idx) != layerIds.end()) {
continue; // Output from a different layer
}
Mat blob = allTensors[idx];
if (layouts[op.inputs()->Get(0)] == DNN_LAYOUT_NHWC && blob.dims == 1) {
blob = blob.reshape(1, {1, (int)blob.total(), 1, 1});
}
int constId = addConstLayer(blob, modelTensors->Get(idx)->name()->str());
layerIds[idx] = std::make_pair(constId, 0);
}
addLayer(layerParams, op);
parseFusedActivation(op, activ);
// Layers that split on multiple operations
if (opcode == "SQUARED_DIFFERENCE") {
LayerParams lp;
lp.set("power", 2);
int id = dstNet.addLayerToPrev(layerParams.name + "/square", "Power", isOpInt8 ? CV_8S : CV_32F, lp);
layerIds[op.outputs()->Get(0)] = std::make_pair(id, 0);
}
else if (opcode == "RSQRT") {
LayerParams lp;
int id = dstNet.addLayerToPrev(layerParams.name + "/inv", "Reciprocal", isOpInt8 ? CV_8S : CV_32F, lp);
layerIds[op.outputs()->Get(0)] = std::make_pair(id, 0);
}
}
void TFLiteImporter::parsePooling(const Operator& op, const std::string& opcode, LayerParams& layerParams) {
@ -654,14 +730,35 @@ void TFLiteImporter::parseConcat(const Operator& op, const std::string& opcode,
auto options = reinterpret_cast<const ConcatenationOptions*>(op.builtin_options());
int axis = options->axis();
DataLayout inpLayout = layouts[op.inputs()->Get(0)];
if (inpLayout == DNN_LAYOUT_NHWC) {
// OpenCV works in NCHW data layout. So change the axis correspondingly.
axis = normalize_axis(axis, 4);
static const int remap[] = {0, 2, 3, 1};
axis = remap[axis];
bool hasNHWCInput = false;
for (int idx : *op.inputs()) {
DataLayout inpLayout = layouts[idx];
if (inpLayout == DNN_LAYOUT_NHWC) {
// OpenCV works in NCHW data layout. So change the axis correspondingly.
axis = normalize_axis(axis, 4);
static const int remap[] = {0, 2, 3, 1};
axis = remap[axis];
hasNHWCInput = true;
break;
}
}
layerParams.set("axis", axis);
// Force all inputs to be in graph, not as blobs
for (int idx : *op.inputs()) {
if (layerIds.find(idx) != layerIds.end()) {
continue; // Output from a different layer
}
Mat blob = allTensors[idx];
if (hasNHWCInput && blob.dims == 4)
{
Mat nchwBlob;
transposeND(blob, {0, 3, 1, 2}, nchwBlob);
blob = nchwBlob;
}
int constId = addConstLayer(blob, modelTensors->Get(idx)->name()->str());
layerIds[idx] = std::make_pair(constId, 0);
}
addLayer(layerParams, op);
parseFusedActivation(op, options->fused_activation_function());
}
@ -770,35 +867,38 @@ void TFLiteImporter::parseTranspose(const Operator& op, const std::string& opcod
addLayer(layerParams, op);
}
void TFLiteImporter::parseGlobalPooling(const Operator& op, const std::string& opcode, LayerParams& layerParams)
void TFLiteImporter::parseReduce(const Operator& op, const std::string& opcode, LayerParams& layerParams)
{
layerParams.type = "Pooling";
if(opcode == "MEAN") {
layerParams.set("pool", "ave");
layerParams.type = "Reduce";
if (opcode == "REDUCE_MAX") {
layerParams.set("reduce", "max");
}
else if (opcode == "REDUCE_MAX") {
layerParams.set("pool", "max");
else if (opcode == "SUM") {
layerParams.set("reduce", "sum");
}
else if (opcode == "MEAN") {
layerParams.set("reduce", "mean");
}
else {
CV_Error(Error::StsNotImplemented, "Unsupported pooling " + opcode);
CV_Error(Error::StsNotImplemented, "Unsupported reducing " + opcode);
}
layerParams.set("global_pooling", true);
auto options = op.builtin_options_as_ReducerOptions();
bool keep_dims = options->keep_dims();
layerParams.set("keepdims", options->keep_dims());
if (!keep_dims) {
const auto name = layerParams.name;
layerParams.name += "/global_pooling";
addLayer(layerParams, op);
Mat axes = allTensors[op.inputs()->Get(1)].clone();
CV_CheckTypeEQ(axes.type(), CV_32S, "");
int out = op.outputs()->Get(0);
auto outId = layerIds[out];
int flattenId = addFlattenLayer(1, -1, name, outId, isInt8(op) ? CV_8S : CV_32F);
layerIds[out] = std::make_pair(flattenId, 0);
}
else {
addLayer(layerParams, op);
DataLayout inpLayout = layouts[op.inputs()->Get(0)];
if (inpLayout == DNN_LAYOUT_NHWC) {
static const int remap[] = {0, 2, 3, 1};
// OpenCV works in NCHW data layout. So change the axis correspondingly.
for (int i = 0; i < axes.total(); ++i) {
axes.at<int>(i) = remap[normalize_axis(axes.at<int>(i), 4)];
}
}
layerParams.set("axes", DictValue::arrayInt(axes.ptr<int>(), axes.total()));
addLayer(layerParams, op);
}
int TFLiteImporter::addPermuteLayer(const std::vector<int>& order, const std::string& permName,
@ -833,6 +933,13 @@ int TFLiteImporter::addFlattenLayer(int axis, int end_axis, const std::string& n
return id;
}
int TFLiteImporter::addConstLayer(const Mat& blob, const std::string& name)
{
LayerParams lp;
lp.blobs.push_back(blob.u ? blob : blob.clone()); // some tensors are owned by OpenCV
return dstNet.addLayer(name, "Const", lp);
}
void TFLiteImporter::parseDeconvolution(const Operator& op, const std::string& opcode, LayerParams& layerParams) {
layerParams.type = "Deconvolution";
@ -928,8 +1035,7 @@ void TFLiteImporter::parseStridedSlice(const Operator& op, const std::string& op
int endMask = options->end_mask();
if (options->new_axis_mask())
CV_Error(Error::StsNotImplemented, "New axis during StridedSlice");
if (options->shrink_axis_mask())
CV_Error(Error::StsNotImplemented, "Shrink axis during StridedSlice");
int shrinkMask = options->shrink_axis_mask();
Mat begins = allTensors[op.inputs()->Get(1)];
Mat ends = allTensors[op.inputs()->Get(2)];
@ -958,7 +1064,30 @@ void TFLiteImporter::parseStridedSlice(const Operator& op, const std::string& op
layerParams.set("begin", DictValue::arrayInt((int*)begins.data, begins.total()));
layerParams.set("end", DictValue::arrayInt((int*)ends.data, ends.total()));
layerParams.set("steps", DictValue::arrayInt((int*)strides.data, strides.total()));
int lastShrinkAxis = -1;
for (int axis = 0; axis < num; ++axis)
{
if (shrinkMask & (1 << axis))
lastShrinkAxis = axis;
}
std::string layerName = layerParams.name;
if (lastShrinkAxis != -1)
{
layerParams.name += "/slice";
}
addLayer(layerParams, op);
for (int axis = 0; axis < num; ++axis)
{
if (!(shrinkMask & (1 << axis)))
continue;
std::string name = (axis == lastShrinkAxis) ? layerName : format("%s/shrink_axis_%d", layerName.c_str(), axis);
int layerId = addFlattenLayer(axis, axis + 1, name,
layerIds[op.outputs()->Get(0)], isInt8(op) ? CV_8S : CV_32F);
layerIds[op.inputs()->Get(0)] = std::make_pair(layerId, 0);
}
}
void TFLiteImporter::parseFullyConnected(const Operator& op, const std::string& opcode, LayerParams& layerParams) {

View File

@ -57,6 +57,7 @@ void Test_TFLite::testModel(Net& net, const std::string& modelName, const Mat& i
ASSERT_EQ(outs.size(), outNames.size());
for (int i = 0; i < outNames.size(); ++i) {
std::replace(outNames[i].begin(), outNames[i].end(), ':', '_');
Mat ref = blobFromNPY(findDataFile(format("dnn/tflite/%s_out_%s.npy", modelName.c_str(), outNames[i].c_str())));
// A workaround solution for the following cases due to inconsistent shape definitions.
// The details please see: https://github.com/opencv/opencv/pull/25297#issuecomment-2039081369
@ -276,6 +277,12 @@ TEST_P(Test_TFLite, StridedSlice) {
testLayer("strided_slice");
}
TEST_P(Test_TFLite, face_blendshapes)
{
Mat inp = blobFromNPY(findDataFile("dnn/tflite/face_blendshapes_inp.npy"));
testModel("face_blendshapes", inp);
}
INSTANTIATE_TEST_CASE_P(/**/, Test_TFLite, dnnBackendsAndTargets());
}} // namespace