Merge pull request #25595 from Abdurrahheem:ash/01D-einsum-test

Add support for scalar and matrix multiplication in einsum #25595 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
2025-08-06 06:26:29 +08:00 · 2024-05-21 14:36:12 +04:00 · 2024-05-21 14:36:12 +04:00 · f676cb3c62
commit f676cb3c62
parent 9238eb2ab2
3 changed files with 44 additions and 32 deletions
--- a/modules/core/src/arithm.cpp
+++ b/modules/core/src/arithm.cpp
@ -678,7 +678,7 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
                     "(where arrays have the same size and the same number of channels), "
                     "nor 'array op scalar', nor 'scalar op array'" );
        haveScalar = true;
-        CV_Assert(type2 == CV_64F && (sz2.height == 1 || sz2.height == 4));
+        CV_Assert((type2 == CV_64F || type2 == CV_32F) && (sz2.height == 1 || sz2.height == 4));

        if (!muldiv)
        {
--- a/modules/dnn/src/layers/einsum_layer.cpp
+++ b/modules/dnn/src/layers/einsum_layer.cpp
@ -615,11 +615,12 @@ void LayerEinsumImpl::preProcessInputs(InputArrayOfArrays& inputs_arr)

        // variable to hold processed version of the original input
        MatShape input_dims = shape(input);
-
-        if (inputSubscriptIndices.empty()){
+        if (input_dims.empty()){
            homogenizedInputDims.emplace_back(MatShape(numLetterIndices, 1));
+            ++inputIter;
            continue;
        }
+
        const auto& currSubscriptIndices = inputSubscriptIndices[inputIter];

        // There should be subscript index (subscript label) for each dim of the input
@ -877,8 +878,10 @@ void LayerEinsumImpl::processEquation(const std::vector<MatShape>& inputs)
    if (lhs_eq_tokens.empty() || (lhs_eq_tokens.size() == 1 && lhs_eq_tokens[0].empty() && lhs_eq == ",") ) {
        return;
    }
-    CV_CheckEQ(static_cast<int>(lhs_eq_tokens.size()), num_input_tensors,
-        "Number of input tensors does not match the number of subscripts in the input equation");
+    // if we have only one token and two inputs lets skip the check
+    if (lhs_eq_tokens.size() > 1)
+        CV_CheckEQ(static_cast<int>(lhs_eq_tokens.size()), num_input_tensors,
+            "Number of input tensors does not match the number of subscripts in the input equation");

    int inputIdx = 0;
    for (const auto& token : lhs_eq_tokens)
@ -1370,7 +1373,9 @@ Mat LayerEinsumImpl::batchwiseMatMul(
        }

        output = Mat(M, N, reshapedInput1.type());
-        if (shape(reshapedInput1).empty() && shape(reshapedInput2).empty())
+        if ((shape(reshapedInput1).empty() && shape(reshapedInput2).empty())  ||
+            (shape(reshapedInput1).empty() && !shape(reshapedInput2).empty()) ||
+            (!shape(reshapedInput1).empty() && shape(reshapedInput2).empty()))
        {
            output = reshapedInput1.mul(reshapedInput2); // fastGemm does not support 0D * 0D multiplication
        } else {
--- a/modules/dnn/test/test_layers_1d.cpp
+++ b/modules/dnn/test/test_layers_1d.cpp
@ -824,12 +824,13 @@ INSTANTIATE_TEST_CASE_P(/*nothing*/, Layer_Tile_Test,
        std::vector<int>({2, 2})
        ));

-typedef testing::TestWithParam<tuple<std::vector<int>, std::string>> Layer_Einsum_Test;
+typedef testing::TestWithParam<tuple<std::vector<int>, std::vector<int>, std::string>> Layer_Einsum_Test;
 TEST_P(Layer_Einsum_Test, Accuracy_01D)
 {
    auto tup = GetParam();
-    std::vector<int> input_shape = std::get<0>(tup);
-    std::string equation = std::get<1>(tup);
+    std::vector<int> input_shape1 = std::get<0>(tup);
+    std::vector<int> input_shape2 = std::get<1>(tup);
+    std::string equation = std::get<2>(tup);

    LayerParams lp;
    lp.type = "Einsum";
@ -837,13 +838,13 @@ TEST_P(Layer_Einsum_Test, Accuracy_01D)
    lp.set("equation", equation);
    lp.set("inputSize", 2);
    lp.set("outputSize", 1);
-    lp.set("inputShapes0", DictValue::arrayInt(&input_shape[0], input_shape.size()));
-    lp.set("inputShapes1", DictValue::arrayInt(&input_shape[0], input_shape.size()));
+    lp.set("inputShapes0", DictValue::arrayInt(&input_shape1[0], input_shape1.size()));
+    lp.set("inputShapes1", DictValue::arrayInt(&input_shape2[0], input_shape2.size()));

    Ptr<Layer> layer = EinsumLayer::create(lp);

-    cv::Mat input1(input_shape.size(), input_shape.data(), CV_32F);
-    cv::Mat input2(input_shape.size(), input_shape.data(), CV_32F);
+    cv::Mat input1(input_shape1.size(), input_shape1.data(), CV_32F);
+    cv::Mat input2(input_shape2.size(), input_shape2.data(), CV_32F);
    cv::randn(input1, 0.0, 1.0); cv::randn(input2, 0.0, 1.0);

    std::vector<Mat> inputs = {input1, input2};
@ -854,27 +855,29 @@ TEST_P(Layer_Einsum_Test, Accuracy_01D)
    // create output_ref to compare with outputs
    cv::Mat output_ref;
    int size[] = {1};
-    if (equation == ",->"){
+    if(equation == ",->" || equation == "i,->i" || equation == ",i->i" || equation == "ij,->ij"){
        output_ref = input1.mul(input2);
-    }else if (equation == "i, i->i"){
+        if (equation == ",i->i")
+            output_ref = output_ref.reshape(1, 1, size);
+    } else if (equation == "i,i->i"){
        output_ref = input1.mul(input2);
-    } else if (equation == "i, i->"){
+    } else if (equation == "i,i->"){
        output_ref = input1.mul(input2);
        cv::Scalar sum = cv::sum(output_ref);
        output_ref = cv::Mat(0, nullptr, CV_32F, sum[0]);
-    } else if (equation == "ij, ij->ij"){
+    } else if (equation == "ij,ij->ij"){
        output_ref = input1.mul(input2);
-    } else if (equation == "ij, ij->i"){
+    } else if (equation == "ij,ij->i"){
        output_ref = input1.mul(input2);
-        if (input_shape[0] == 1){
+        if (input_shape1[0] == 1){
            cv::Scalar sum = cv::sum(output_ref);
            output_ref = cv::Mat(1, size, CV_32F, sum[0]);
-        } else if (input_shape[1] == 1){
-            size[0] = input_shape[0];
+        } else if (input_shape1[1] == 1){
+            size[0] = input_shape1[0];
            output_ref = output_ref.reshape(1, 1, size);
        } else {
            cv::reduce(output_ref, output_ref, 1, cv::REDUCE_SUM, CV_32F);
-            size[0] = input_shape[0];
+            size[0] = input_shape1[0];
            output_ref = output_ref.reshape(1, 1, size);
        }
    } else {
@ -886,16 +889,20 @@ TEST_P(Layer_Einsum_Test, Accuracy_01D)
 }

 INSTANTIATE_TEST_CASE_P(/*nothing*/, Layer_Einsum_Test, testing::Values(
-    std::make_tuple(std::vector<int>({}), std::string(",->")),
-    std::make_tuple(std::vector<int>({1}), std::string("i, i->i")),
-    std::make_tuple(std::vector<int>({1}), std::string("i, i->")),
-    std::make_tuple(std::vector<int>({4}), std::string("i, i->i")),
-    std::make_tuple(std::vector<int>({4}), std::string("i, i->")),
-    std::make_tuple(std::vector<int>({1, 4}), std::string("ij, ij->ij")),
-    std::make_tuple(std::vector<int>({4, 1}), std::string("ij, ij->ij")),
-    std::make_tuple(std::vector<int>({1, 4}), std::string("ij, ij->i")),
-    std::make_tuple(std::vector<int>({4, 1}), std::string("ij, ij->i")),
-    std::make_tuple(std::vector<int>({4, 4}), std::string("ij, ij->i"))
+    std::make_tuple(std::vector<int>({}), std::vector<int>({}), ",->"),
+    std::make_tuple(std::vector<int>({1}), std::vector<int>({}), "i,->i"),
+    std::make_tuple(std::vector<int>({}), std::vector<int>({1}), ",i->i"),
+    std::make_tuple(std::vector<int>({4, 1}), std::vector<int>({}), "ij,->ij"),
+    // std::make_tuple(std::vector<int>({}), std::vector<int>({4, 1}), ",ij->ij")), // mul function of arithm_op can not handle cases with different number of channels
+    std::make_tuple(std::vector<int>({1}), std::vector<int>({1}), "i,i->i"),
+    std::make_tuple(std::vector<int>({1}), std::vector<int>({1}), "i,i->"),
+    std::make_tuple(std::vector<int>({4}), std::vector<int>({4}), "i,i->i"),
+    std::make_tuple(std::vector<int>({4}), std::vector<int>({4}), "i,i->"),
+    std::make_tuple(std::vector<int>({1, 4}), std::vector<int>({1, 4}), "ij,ij->ij"),
+    std::make_tuple(std::vector<int>({4, 1}), std::vector<int>({4, 1}), "ij,ij->ij"),
+    std::make_tuple(std::vector<int>({1, 4}), std::vector<int>({1, 4}), "ij,ij->i"),
+    std::make_tuple(std::vector<int>({4, 1}), std::vector<int>({4, 1}), "ij,ij->i"),
+    std::make_tuple(std::vector<int>({4, 4}), std::vector<int>({4, 4}), "ij,ij->i")
    ));