diff --git a/modules/dnn/perf/perf_einsum.cpp b/modules/dnn/perf/perf_einsum.cpp index c3706d3153..bad9d956be 100644 --- a/modules/dnn/perf/perf_einsum.cpp +++ b/modules/dnn/perf/perf_einsum.cpp @@ -11,19 +11,16 @@ struct EinsumParams { int outputSize; std::string equation; std::vector einsumInpShapes; - EinsumParams(std::string equation_, int inputSize_, int outputSize_, std::vector einsumInpShapes_ = std::vector()) + EinsumParams(std::string equation_, std::vector einsumInpShapes_ = std::vector()) { - inputSize = inputSize_; - outputSize = outputSize_; + inputSize = einsumInpShapes_.size(); equation = equation_; einsumInpShapes = einsumInpShapes_; } }; static inline void PrintTo(const EinsumParams& params, ::std::ostream* os) { - (*os) << "Eqiation=" << params.equation << ", " - << "InputSize=" << params.inputSize << ", " - << "OutputSize=" << params.outputSize << ", "; + (*os) << "Equation=" << params.equation << " "; (*os) << "InputShape={"; for(int i = 0; i < params.einsumInpShapes.size(); i++) @@ -41,22 +38,22 @@ static inline void PrintTo(const EinsumParams& params, ::std::ostream* os) { // test cases static const EinsumParams testEinsumConfigs[] = { // TODO: Add tests with one input after ellips merge - {"ij, jk -> ik", 2, 1, {{2, 3}, {3, 2}}}, - {"ij, jk -> ik", 2, 1, {{20, 30}, {30, 20}}}, - {"ij, jk -> ik", 2, 1, {{113, 127}, {127, 113}}}, + {"ij, jk -> ik", {{2, 3}, {3, 2}}}, + {"ij, jk -> ik", {{20, 30}, {30, 20}}}, + {"ij, jk -> ik", {{113, 127}, {127, 113}}}, - {"imkj, injs -> imnks", 2, 1, {{1, 4, 7, 9}, {1, 5, 9, 8}}}, - {"imkj, injs -> imnks", 2, 1, {{1, 4, 70, 90}, {1, 5, 90, 80}}}, - {"imkj, injs -> imnks", 2, 1, {{1, 4, 73, 91}, {1, 5, 91, 57}}}, + {"imkj, injs -> imnks", {{1, 4, 7, 9}, {1, 5, 9, 8}}}, + {"imkj, injs -> imnks", {{1, 4, 70, 90}, {1, 5, 90, 80}}}, + {"imkj, injs -> imnks", {{1, 4, 73, 91}, {1, 5, 91, 57}}}, - {"ij -> i", 1, 1, {{30, 40}}}, - {"ij -> i", 1, 1, {{113, 374}}}, + {"ij -> i", {{30, 40}}}, + {"ij -> i", {{113, 374}}}, - {"...ij -> ...i", 1, 1, {{30, 40}}}, - {"...ij -> ...i", 1, 1, {{113, 374}}}, + {"...ij -> ...i", {{30, 40}}}, + {"...ij -> ...i", {{113, 374}}}, - {"...ij, ...jk -> ...ik", 2, 1, {{40, 50}, {50, 80}}}, - {"...ij, ...jk -> ...ik", 2, 1, {{47, 51}, {51, 83}}}, + {"...ij, ...jk -> ...ik", {{40, 50}, {50, 80}}}, + {"...ij, ...jk -> ...ik", {{47, 51}, {51, 83}}}, }; class Layer_Einsum: public TestBaseWithParam {}; @@ -68,7 +65,7 @@ PERF_TEST_P_(Layer_Einsum, einsum) { lp.name = "testEinsum"; lp.set("equation", params.equation); lp.set("inputSize", params.inputSize); - lp.set("outputSize", params.outputSize); + lp.set("outputSize", 1); CV_CheckFalse(params.einsumInpShapes.empty(), "ERROR no inputs shapes provided"); @@ -79,38 +76,27 @@ PERF_TEST_P_(Layer_Einsum, einsum) { Net net; std::vector inputs; std::vector input_names; - if (params.inputSize == 1){ + int id = net.addLayer(lp.name, lp.type, lp); + for (int i = 0; i < params.inputSize; ++i) { // create inputs - inputs.emplace_back(Mat(params.einsumInpShapes[0].size(), params.einsumInpShapes[0].data(), CV_32FC1)); + inputs.emplace_back(Mat(params.einsumInpShapes[i].size(), params.einsumInpShapes[i].data(), CV_32FC1)); - int id = net.addLayerToPrev(lp.name, lp.type, lp); - net.connect(0, 0, id, 0); + // connect each input to the layer + net.connect(0, i, id, i); - input_names.emplace_back("input1"); - - } else { - - // create inputs - inputs.emplace_back(Mat(params.einsumInpShapes[0].size(), params.einsumInpShapes[0].data(), CV_32FC1)); - inputs.emplace_back(Mat(params.einsumInpShapes[1].size(), params.einsumInpShapes[1].data(), CV_32FC1)); - - int id = net.addLayerToPrev(lp.name, lp.type, lp); - net.connect(0, 0, id, 0); - net.connect(0, 1, id, 1); - - input_names.emplace_back("input1"); - input_names.emplace_back("input2"); + // create input names dynamically, assuming input naming follows a consistent pattern + input_names.emplace_back("input" + std::to_string(i + 1)); } //warm up + std::vector outputs; net.setInputsNames(input_names); for (int i = 0; i < input_names.size(); i++){ net.setInput(inputs[i], input_names[i]); } - Mat out = net.forward(); + net.forward(outputs, "testEinsum"); - std::vector outputs; TEST_CYCLE() { net.forward(outputs, "testEinsum"); diff --git a/modules/dnn/src/layers/einsum_layer.cpp b/modules/dnn/src/layers/einsum_layer.cpp index baf4297c0e..c7f9aaca06 100644 --- a/modules/dnn/src/layers/einsum_layer.cpp +++ b/modules/dnn/src/layers/einsum_layer.cpp @@ -6,6 +6,7 @@ #include #include "../precomp.hpp" #include "layers_common.hpp" +#include "cpu_kernels/fast_gemm.hpp" namespace cv { @@ -32,111 +33,6 @@ static bool IsTransposeReshapeForEinsum(const std::vector& perm, return true; } -static Mat batchwiseMatMul( - const Mat& input1, - const MatShape& input1ShapeOverride, - const Mat& input2, - const MatShape& input2ShapeOverride) -{ - // Sanity checks before the actual MatMul - CV_CheckType(input1.type(), input2.type(), "Data types of the inputs must match for MatMul"); - CV_CheckEQ(input1ShapeOverride.size(), (size_t) 3, "Only 1 batch dimension is allowed for MatMul"); - CV_CheckEQ(input2ShapeOverride.size(), (size_t) 3, "Only 1 batch dimension is allowed for MatMul"); - CV_CheckEQ((size_t) input1ShapeOverride[0], (size_t) input2ShapeOverride[0], "Batch dimension should match for MatMul;"); - CV_CheckEQ((size_t) input1ShapeOverride[2], (size_t) input2ShapeOverride[1], "Incompatible matrix dimensions for matMul"); - - size_t batches = input1ShapeOverride[0]; - size_t M = input1ShapeOverride[1]; - size_t K = input1ShapeOverride[2]; - size_t N = input2ShapeOverride[2]; - - std::vector output; - if (batches > 1) - { - Mat reshapedInput1 = input1; - Mat reshapedInput2 = input2; - - // input1 should of size MxK - // check if input1 needs reshape, if need reshape - if (input1.size[0] != M || input1.size[1] != K) - { - int shape[] = {static_cast(batches), static_cast(M), static_cast(K)}; - reshapedInput1 = input1.reshape(1, 3, shape); - } - - // input2 should be of size KxN - // check if input2 needs reshape, if needs reshape - if (input2.size[0] != K || input2.size[1] != N) - { - int shape[] = {static_cast(batches), static_cast(K), static_cast(N)}; - reshapedInput2 = input2.reshape(1, 3, shape); - } - - for (size_t i=0; i < batches; i++) - { - std::vector ranges1 = {cv::Range(i, i+1)}; - for (int j = 1; j < reshapedInput1.dims; j++) - ranges1.emplace_back(cv::Range::all()); - - Mat part1 = reshapedInput1(ranges1); - int shape[] = {static_cast(M), static_cast(K)}; - part1 = part1.reshape(1, sizeof(shape)/sizeof(shape[0]), shape); - - std::vector ranges2 = {cv::Range(i, i+1)}; - for (int j = 1; j < reshapedInput2.dims; j++) - ranges2.emplace_back(cv::Range::all()); - - Mat part2 = reshapedInput2(ranges2); - int shape2[] = {static_cast(K), static_cast(N)}; - part2 = part2.reshape(1, sizeof(shape2)/sizeof(shape2[0]), shape2); - - Mat tmp_output; - cv::gemm(part1, part2, 1.0, cv::Mat(), 1.0, tmp_output); - int newShape[] = {1, static_cast(M), static_cast(N)}; - tmp_output = tmp_output.reshape(1, sizeof(newShape)/sizeof(newShape[0]), newShape); - - output.emplace_back(tmp_output); - } - - } else { - - Mat reshapedInput1 = input1; - Mat reshapedInput2 = input2; - - // input1 should of size MxK - // check if input1 needs reshape, if need reshape - if (input1.dims > 2 || input1.size[0] != M || input1.size[1] != K) - { - int shape[] = {static_cast(M), static_cast(K)}; - reshapedInput1 = input1.reshape(1, 2, shape); - } - - // input2 should be of size KxN - // check if input2 needs reshape, if needs reshape - if (input2.dims > 2 || input2.size[0] != K || input2.size[1] != N) - { - int shape2[] = {static_cast(K), static_cast(N)}; - reshapedInput2 = input2.reshape(1, 2, shape2); - } - - Mat tmp_output; - cv::gemm(reshapedInput1, reshapedInput2, 1.0, cv::Mat(), 1.0, tmp_output); - - int newShape[] = {1, static_cast(M), static_cast(N)}; - tmp_output = tmp_output.reshape(1, sizeof(newShape)/sizeof(newShape[0]), newShape); - output.emplace_back(tmp_output); - - } - - int outputDim[] = {static_cast(output.size()), static_cast(M), static_cast(N)}; - Mat output_buffer = Mat::zeros(3, outputDim, CV_32F); - - for (size_t i = 0; i < output.size(); i++) { - Mat output_slice = output_buffer.row(i); - output[i].copyTo(output_slice); - } - return output_buffer; -}; static Mat Transpose( const Mat& input, @@ -452,6 +348,8 @@ public: // The number of dimensions that are encompassed by an "ellipsis" - "...". size_t numOfEllipsisDims = 0; + // Backend for fastgemm + FastGemmOpt opt; void parseEquation(String equation); void processEquation(const std::vector& inputs); @@ -469,7 +367,12 @@ public: const MatShape& reduceDims, bool isFinalPair ); - + Mat batchwiseMatMul( + const Mat& input1, + const MatShape& input1ShapeOverride, + const Mat& input2, + const MatShape& input2ShapeOverride + ); // constructor LayerEinsumImpl(const LayerParams& params) @@ -491,6 +394,7 @@ public: einsumInpShapes.emplace_back(shape); } + opt.init(); // Maintains a mapping between input indices and their corresponding subscript labels for each input inputSubscriptIndices.reserve(numInputs); @@ -1389,6 +1293,112 @@ Mat LayerEinsumImpl::pairwiseOperandProcess( return output; }; +Mat LayerEinsumImpl::batchwiseMatMul( + const Mat& input1, + const MatShape& input1ShapeOverride, + const Mat& input2, + const MatShape& input2ShapeOverride) +{ + + // Sanity checks before the actual MatMul + CV_CheckType(input1.type(), input2.type(), "Data types of the inputs must match for MatMul"); + CV_CheckEQ(input1ShapeOverride.size(), (size_t) 3, "Only 1 batch dimension is allowed for MatMul"); + CV_CheckEQ(input2ShapeOverride.size(), (size_t) 3, "Only 1 batch dimension is allowed for MatMul"); + CV_CheckEQ((size_t) input1ShapeOverride[0], (size_t) input2ShapeOverride[0], "Batch dimension should match for MatMul;"); + CV_CheckEQ((size_t) input1ShapeOverride[2], (size_t) input2ShapeOverride[1], "Incompatible matrix dimensions for matMul"); + + int batches = input1ShapeOverride[0]; + int M = input1ShapeOverride[1]; + int K = input1ShapeOverride[2]; + int N = input2ShapeOverride[2]; + + std::vector output; + if (batches > 1) + { + Mat reshapedInput1 = input1; + Mat reshapedInput2 = input2; + + // input1 should of size MxK + // check if input1 needs reshape, if need reshape + if (input1.size[0] != M || input1.size[1] != K) + { + int shape[] = {batches, M, K}; + reshapedInput1 = input1.reshape(1, 3, shape); + } + + // input2 should be of size KxN + // check if input2 needs reshape, if needs reshape + if (input2.size[0] != K || input2.size[1] != N) + { + int shape[] = {batches, K, N}; + reshapedInput2 = input2.reshape(1, 3, shape); + } + + for (size_t i=0; i < batches; i++) + { + std::vector ranges1 = {cv::Range(i, i+1)}; + for (int j = 1; j < reshapedInput1.dims; j++) + ranges1.emplace_back(cv::Range::all()); + + Mat part1 = reshapedInput1(ranges1); + int shape[] = {M, K}; + part1 = part1.reshape(1, sizeof(shape)/sizeof(shape[0]), shape); + + std::vector ranges2 = {cv::Range(i, i+1)}; + for (int j = 1; j < reshapedInput2.dims; j++) + ranges2.emplace_back(cv::Range::all()); + + Mat part2 = reshapedInput2(ranges2); + int shape2[] = {K, N}; + part2 = part2.reshape(1, sizeof(shape2)/sizeof(shape2[0]), shape2); + + Mat tmp_output(M, N, part1.type()); + fastGemm(false, false, 1.0, part1, part2, 0.0, tmp_output, opt); + int newShape[] = {1, M, N}; + tmp_output = tmp_output.reshape(1, sizeof(newShape)/sizeof(newShape[0]), newShape); + + output.emplace_back(tmp_output); + } + + } else { + + Mat reshapedInput1 = input1; + Mat reshapedInput2 = input2; + + // input1 should of size MxK + // check if input1 needs reshape, if need reshape + if (input1.dims > 2 || input1.size[0] != M || input1.size[1] != K) + { + int shape[] = {M, K}; + reshapedInput1 = input1.reshape(1, 2, shape); + } + + // input2 should be of size KxN + // check if input2 needs reshape, if needs reshape + if (input2.dims > 2 || input2.size[0] != K || input2.size[1] != N) + { + int shape2[] = {K, N}; + reshapedInput2 = input2.reshape(1, 2, shape2); + } + + Mat tmp_output(M, N, reshapedInput1.type()); + fastGemm(false, false, 1.0, reshapedInput1, reshapedInput2, 0.0, tmp_output, opt); + + int newShape[] = {1, M, N}; + tmp_output = tmp_output.reshape(1, sizeof(newShape)/sizeof(newShape[0]), newShape); + output.emplace_back(tmp_output); + + } + + int outputDim[] = {static_cast(output.size()), M, N}; + Mat output_buffer = Mat::zeros(3, outputDim, CV_32F); + + for (size_t i = 0; i < output.size(); i++) { + Mat output_slice = output_buffer.row(i); + output[i].copyTo(output_slice); + } + return output_buffer; +}; Ptr EinsumLayer::create(const LayerParams& params) { return makePtr(params);