Merge pull request #24037 from Abdurrahheem:ash/dev_einsum

Add Support for Einsum Layer #24037 ### This PR adding support for [Einsum Layer](https://pytorch.org/docs/stable/generated/torch.einsum.html) (in progress). This PR is currently not to be merged but only reviewed. Test cases are located in [#1090](https://github.com/opencv/opencv_extra/pull/1090)RP in OpenCV extra **DONE**: - [x] 2-5D GMM support added - [x] Matrix transpose support added - [x] Reduction type comupte 'ij->j' - [x] 2nd shape computation - during forward **Next PRs**: - [ ] Broadcasting reduction "...ii ->...i" - [ ] Add lazy shape deduction. "...ij, ...jk->...ik" - [ ] Add implicit output computation support. "bij,bjk ->" (output subscripts should be "bik") - [ ] Add support for CUDA backend - [ ] BatchWiseMultiply optimize **Later in 5.x version (requires support for 1D matrices)**: - [ ] Add 1D vector multiplication support - [ ] Inter product "i, i" (problems with 1D shapes) ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
2025-06-07 01:13:28 +08:00 · 2023-09-22 12:25:02 +04:00 · 2023-09-22 12:25:02 +04:00 · 865e7cacca
commit 865e7cacca
parent b51a78d439
6 changed files with 1234 additions and 3 deletions
--- a/modules/dnn/include/opencv2/dnn/all_layers.hpp
+++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@ -241,6 +241,39 @@ CV__DNN_INLINE_NS_BEGIN

    };

+    /** @brief This function performs array summation based
+    * on the Einstein summation convention. The function
+    * allows for concise expressions of various mathematical
+    * operations using subscripts.
+    *
+    * By default, the labels are placed in alphabetical
+    * order at the end of the output.
+    * For example:
+    * if `c = einsum("i,j", a, b)`, then `c[i,j] == a[i]*b[j]`.
+    * However, if `c = einsum("j,i", a, b)`, then `c[i,j] = a[j]*b[i]`.
+    * Alternatively, you can control the output order or prevent
+    * an axis from being summed/force an axis to be summed
+    * by providing indices for the output.
+    * For example:
+    * `diag(a)`         -> `einsum("ii->i", a)`
+    * `sum(a, axis=0)`  -> `einsum("i...->", a)`
+    * Subscripts at the beginning and end may be specified
+    * by putting an ellipsis "..." in the middle.
+    * For instance, the function `einsum("i...i", a)` takes
+    * the diagonal of the first and last dimensions of
+    * the operand, and `einsum("ij...,jk...->ik...")` performs
+    * the matrix product using the first two indices
+    * of each operand instead of the last two.
+    * When there is only one operand, no axes being summed,
+    *  and no output parameter, this function returns
+    * a view into the operand instead of creating a copy.
+     */
+    class CV_EXPORTS EinsumLayer : public Layer
+    {
+    public:
+        static Ptr<EinsumLayer> create(const LayerParams& params);
+    };
+
    class CV_EXPORTS BaseConvolutionLayer : public Layer
    {
    public:
--- a/modules/dnn/src/init.cpp
+++ b/modules/dnn/src/init.cpp
@ -184,6 +184,7 @@ void initializeLayerFactory()
    CV_DNN_REGISTER_LAYER_CLASS(LSTM,           LSTMLayer);
    CV_DNN_REGISTER_LAYER_CLASS(GRU,            GRULayer);
    CV_DNN_REGISTER_LAYER_CLASS(CumSum,         CumSumLayer);
+    CV_DNN_REGISTER_LAYER_CLASS(Einsum,         EinsumLayer);

    CV_DNN_REGISTER_LAYER_CLASS(Scatter,        ScatterLayer);
    CV_DNN_REGISTER_LAYER_CLASS(ScatterND,      ScatterNDLayer);
--- a/modules/dnn/src/layers/einsum_layer.cpp
+++ b/modules/dnn/src/layers/einsum_layer.cpp
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@ -194,6 +194,7 @@ private:
    void parseTile                 (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
    void parseLayerNorm            (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
    void parseSimpleLayers         (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
+    void parseEinsum               (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);

    // Domain: com.microsoft
    // URL: https://github.com/microsoft/onnxruntime/blob/master/docs/ContribOperators.md
@ -3310,6 +3311,40 @@ void ONNXImporter::parseSimpleLayers(LayerParams& layerParams, const opencv_onnx
    addLayer(layerParams, node_proto);
 }

+
+void ONNXImporter::parseEinsum(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
+{
+    std::vector<MatShape> einsumInpShapes;
+    for (int j = 0; j < node_proto.input_size(); j++)
+    {
+        const auto& inputLayerName = node_proto.input(j);
+        auto it = outShapes.find(inputLayerName);
+        if (it != outShapes.end())
+        {
+            einsumInpShapes.emplace_back(it->second);
+        } else {
+            CV_Error(Error::StsAssert, "ERROR input shape not found");
+        }
+    }
+
+    CV_CheckFalse(einsumInpShapes.empty(), "ERROR no inputs shapes");
+    for (int i = 0; i < einsumInpShapes.size(); i++) {
+        layerParams.set("inputShapes" + cv::format("%d", i), DictValue::arrayInt(einsumInpShapes[i].begin(), einsumInpShapes[i].size()));
+    }
+
+    // Check if of eqution is valid
+    std::string equation = layerParams.get<std::string>("equation");
+    CV_CheckFalse(equation.empty(), "Equation is empty");
+
+    // Save number of inputs. We need it in layer initialization
+    layerParams.set("inputSize", node_proto.input_size());
+
+    // Save number of outputs. We need it in layer initialization
+    layerParams.set("outputSize", node_proto.output_size());
+
+    addLayer(layerParams, node_proto);
+}
+
 void ONNXImporter::parseCustomLayer(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
 {
    const std::string& name = layerParams.name;
@ -4016,6 +4051,7 @@ void ONNXImporter::buildDispatchMap_ONNX_AI(int opset_version)
    dispatch["Sum"] = dispatch["Min"] = dispatch["Max"] = &ONNXImporter::parseElementWise;
    dispatch["Where"] = &ONNXImporter::parseElementWise;
    dispatch["Range"] = &ONNXImporter::parseRange;
+    dispatch["Einsum"] = &ONNXImporter::parseEinsum;

    std::vector<std::string> simpleLayers{"Acos", "Acosh", "Asin", "Asinh", "Atan", "Atanh", "Ceil", "Celu", "Cos",
                                          "Cosh", "Dropout", "Erf", "Exp", "Floor", "HardSigmoid", "HardSwish",
--- a/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp
+++ b/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp
@ -111,10 +111,7 @@
 "test_dynamicquantizelinear_min_adjusted_expanded",
 "test_edge_pad",
 "test_einsum_batch_diagonal",
-"test_einsum_batch_matmul",
 "test_einsum_inner_prod",
-"test_einsum_sum",
-"test_einsum_transpose",
 "test_equal",
 "test_equal_bcast",
 "test_expand_dim_changed",
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@ -1415,6 +1415,56 @@ TEST_P(Test_ONNX_layers, LSTM_layout_batch)
    testONNXModels("lstm_layout_1", npy, 0.005, 0.005, false, false, 3);
 }

+TEST_P(Test_ONNX_layers, DISABLED_Einsum_1D)
+{
+    testONNXModels("einsum_1d", npy, 0, 0, false, false, 2);
+}
+
+TEST_P(Test_ONNX_layers, Einsum_2D)
+{
+    testONNXModels("einsum_2d", npy, 0, 0, false, false, 2);
+}
+
+TEST_P(Test_ONNX_layers, Einsum_3D)
+{
+    testONNXModels("einsum_3d", npy, 0, 0, false, false, 2);
+}
+
+TEST_P(Test_ONNX_layers, Einsum_4D)
+{
+    testONNXModels("einsum_4d", npy, 0, 0, false, false, 2);
+}
+
+TEST_P(Test_ONNX_layers, Einsum_5D)
+{
+    testONNXModels("einsum_5d", npy, 0, 0, false, false, 2);
+}
+
+TEST_P(Test_ONNX_layers, DISABLED_Einsum_InnerProduct)
+{
+    testONNXModels("einsum_inner", npy, 0, 0, false, false, 2);
+}
+
+TEST_P(Test_ONNX_layers, DISABLED_Einsum_HadamardProduct)
+{
+    testONNXModels("einsum_hadamard", npy, 0, 0, false, false, 2);
+}
+
+TEST_P(Test_ONNX_layers, DISABLED_Einsum_Batch_Diagonal)
+{
+    testONNXModels("einsum_batch_diagonal", npy, 0, 0, false, false, 1);
+}
+
+TEST_P(Test_ONNX_layers, Einsum_Sum)
+{
+    testONNXModels("einsum_sum", npy, 0, 0, false, false, 1);
+}
+
+TEST_P(Test_ONNX_layers, Einsum_transpose)
+{
+    testONNXModels("einsum_transpose", npy, 0, 0, false, false, 1);
+}
+
 TEST_P(Test_ONNX_layers, Pad2d_Unfused)
 {
    testONNXModels("ReflectionPad2d");