Merge pull request #20725 from mologie:fix-dnn-tf-on-arm

* dnn: fix unaligned memory access crash on armv7 The getTensorContent function would return a Mat pointing to some member of a Protobuf-encoded message. Protobuf does not make any alignment guarantees, which results in a crash on armv7 when loading models while bit 2 is set in /proc/cpu/alignment (or the relevant kernel feature for alignment compatibility is disabled). Any read attempt from the previously unaligned data member would send SIGBUS. As workaround, this commit makes an aligned copy via existing clone functionality in getTensorContent. The unsafe copy=false option is removed. Unfortunately, a rather crude hack in PReLUSubgraph in fact writes(!) to the Protobuf message. We limit ourselves to fixing the alignment issues in this commit, and add getTensorContentRefUnaligned to cover the write case with a safe memcpy. A FIXME marks the issue. * dnn: reduce amount of .clone() calls * dnn: update FIXME comment Co-authored-by: Alexander Alekhin <alexander.a.alekhin@gmail.com>
2025-07-24 14:06:27 +08:00 · 2021-10-06 18:41:05 +02:00 · 2021-10-06 18:41:05 +02:00 · a3d7811f24
commit a3d7811f24
parent 755e0143fb
3 changed files with 42 additions and 5 deletions
--- a/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp
+++ b/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp
@ -19,6 +19,16 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
 using ::google::protobuf::RepeatedField;
 using ::google::protobuf::MapPair;

+static Mat getTensorContentRef_(const tensorflow::TensorProto& tensor);
+static inline
+bool isAlignedMat(const Mat& m)
+{
+    int depth = m.depth();
+    int alignment = CV_ELEM_SIZE1(depth);
+    return (((size_t)m.data) & (alignment - 1)) == 0;
+}
+
+
 class TFNodeWrapper : public ImportNodeWrapper
 {
 public:
@ -719,8 +729,19 @@ public:
    {
        if (!negativeScales)
        {
-            Mat scales = getTensorContent(inputNodes[1]->attr().at("value").tensor(), /*copy*/false);
-            scales *= -1;
+            Mat scalesRef = getTensorContentRef_(inputNodes[1]->attr().at("value").tensor());
+            // FIXME: This breaks the const guarantees of tensor() by writing to scalesRef
+            if (isAlignedMat(scalesRef))
+            {
+                scalesRef *= -1;
+            }
+            else
+            {
+                Mat scales = scalesRef.clone() * -1;
+                CV_Assert(scalesRef.isContinuous());
+                CV_Assert(scales.isContinuous());
+                memcpy(scalesRef.data, scales.data, scales.total() * scales.elemSize());
+            }
        }
    }

@ -832,7 +853,8 @@ void RemoveIdentityOps(tensorflow::GraphDef& net)
    }
 }

-Mat getTensorContent(const tensorflow::TensorProto &tensor, bool copy)
+// NB: returned Mat::data pointer may be unaligned
+Mat getTensorContentRef_(const tensorflow::TensorProto& tensor)
 {
    const std::string& content = tensor.tensor_content();
    Mat m;
@ -904,7 +926,18 @@ Mat getTensorContent(const tensorflow::TensorProto &tensor, bool copy)
            CV_Error(Error::StsError, "Tensor's data type is not supported");
            break;
    }
-    return copy ? m.clone() : m;
+
+    return m;
+}
+
+Mat getTensorContent(const tensorflow::TensorProto& tensor, bool forceCopy)
+{
+    // If necessary clone m to have aligned data pointer
+    Mat m = getTensorContentRef_(tensor);
+    if (forceCopy || !isAlignedMat(m))
+        return m.clone();
+    else
+        return m;
 }

 void releaseTensor(tensorflow::TensorProto* tensor)
--- a/modules/dnn/src/tensorflow/tf_graph_simplifier.hpp
+++ b/modules/dnn/src/tensorflow/tf_graph_simplifier.hpp
@ -21,7 +21,7 @@ void RemoveIdentityOps(tensorflow::GraphDef& net);

 void simplifySubgraphs(tensorflow::GraphDef& net);

-Mat getTensorContent(const tensorflow::TensorProto &tensor, bool copy = true);
+Mat getTensorContent(const tensorflow::TensorProto& tensor, bool forceCopy = true);

 void releaseTensor(tensorflow::TensorProto* tensor);

--- a/modules/dnn/src/tensorflow/tf_importer.cpp
+++ b/modules/dnn/src/tensorflow/tf_importer.cpp
@ -122,8 +122,10 @@ void parseTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob)
    }

    dstBlob.create(shape, CV_32F);
+    CV_Assert(dstBlob.isContinuous());

    Mat tensorContent = getTensorContent(tensor, /*no copy*/false);
+    CV_Assert(tensorContent.isContinuous());
    int size = tensorContent.total();
    CV_Assert(size == (int)dstBlob.total());

@ -2522,8 +2524,10 @@ void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &ds
    out_c = shape[0]; input_c = shape[1];

    dstBlob.create(shape, CV_32F);
+    CV_Assert(dstBlob.isContinuous());

    Mat tensorContent = getTensorContent(tensor, /*no copy*/false);
+    CV_Assert(tensorContent.isContinuous());
    int size = tensorContent.total();
    CV_Assert(size == (int)dstBlob.total());