mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 09:25:45 +08:00
Merge pull request #24509 from Abdurrahheem:ash/dev_einsum_fast_gemm
Fast gemm for einsum #24509 ## This PR adds performance tests for Einsum Layer with FastGemm. See below results of performance test on different inputs ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
83d70b0f36
commit
8c10545d3c
@ -11,19 +11,16 @@ struct EinsumParams {
|
||||
int outputSize;
|
||||
std::string equation;
|
||||
std::vector<MatShape> einsumInpShapes;
|
||||
EinsumParams(std::string equation_, int inputSize_, int outputSize_, std::vector<MatShape> einsumInpShapes_ = std::vector<MatShape>())
|
||||
EinsumParams(std::string equation_, std::vector<MatShape> einsumInpShapes_ = std::vector<MatShape>())
|
||||
{
|
||||
inputSize = inputSize_;
|
||||
outputSize = outputSize_;
|
||||
inputSize = einsumInpShapes_.size();
|
||||
equation = equation_;
|
||||
einsumInpShapes = einsumInpShapes_;
|
||||
}
|
||||
};
|
||||
|
||||
static inline void PrintTo(const EinsumParams& params, ::std::ostream* os) {
|
||||
(*os) << "Eqiation=" << params.equation << ", "
|
||||
<< "InputSize=" << params.inputSize << ", "
|
||||
<< "OutputSize=" << params.outputSize << ", ";
|
||||
(*os) << "Equation=" << params.equation << " ";
|
||||
|
||||
(*os) << "InputShape={";
|
||||
for(int i = 0; i < params.einsumInpShapes.size(); i++)
|
||||
@ -41,22 +38,22 @@ static inline void PrintTo(const EinsumParams& params, ::std::ostream* os) {
|
||||
// test cases
|
||||
static const EinsumParams testEinsumConfigs[] = {
|
||||
// TODO: Add tests with one input after ellips merge
|
||||
{"ij, jk -> ik", 2, 1, {{2, 3}, {3, 2}}},
|
||||
{"ij, jk -> ik", 2, 1, {{20, 30}, {30, 20}}},
|
||||
{"ij, jk -> ik", 2, 1, {{113, 127}, {127, 113}}},
|
||||
{"ij, jk -> ik", {{2, 3}, {3, 2}}},
|
||||
{"ij, jk -> ik", {{20, 30}, {30, 20}}},
|
||||
{"ij, jk -> ik", {{113, 127}, {127, 113}}},
|
||||
|
||||
{"imkj, injs -> imnks", 2, 1, {{1, 4, 7, 9}, {1, 5, 9, 8}}},
|
||||
{"imkj, injs -> imnks", 2, 1, {{1, 4, 70, 90}, {1, 5, 90, 80}}},
|
||||
{"imkj, injs -> imnks", 2, 1, {{1, 4, 73, 91}, {1, 5, 91, 57}}},
|
||||
{"imkj, injs -> imnks", {{1, 4, 7, 9}, {1, 5, 9, 8}}},
|
||||
{"imkj, injs -> imnks", {{1, 4, 70, 90}, {1, 5, 90, 80}}},
|
||||
{"imkj, injs -> imnks", {{1, 4, 73, 91}, {1, 5, 91, 57}}},
|
||||
|
||||
{"ij -> i", 1, 1, {{30, 40}}},
|
||||
{"ij -> i", 1, 1, {{113, 374}}},
|
||||
{"ij -> i", {{30, 40}}},
|
||||
{"ij -> i", {{113, 374}}},
|
||||
|
||||
{"...ij -> ...i", 1, 1, {{30, 40}}},
|
||||
{"...ij -> ...i", 1, 1, {{113, 374}}},
|
||||
{"...ij -> ...i", {{30, 40}}},
|
||||
{"...ij -> ...i", {{113, 374}}},
|
||||
|
||||
{"...ij, ...jk -> ...ik", 2, 1, {{40, 50}, {50, 80}}},
|
||||
{"...ij, ...jk -> ...ik", 2, 1, {{47, 51}, {51, 83}}},
|
||||
{"...ij, ...jk -> ...ik", {{40, 50}, {50, 80}}},
|
||||
{"...ij, ...jk -> ...ik", {{47, 51}, {51, 83}}},
|
||||
};
|
||||
|
||||
class Layer_Einsum: public TestBaseWithParam<EinsumParams> {};
|
||||
@ -68,7 +65,7 @@ PERF_TEST_P_(Layer_Einsum, einsum) {
|
||||
lp.name = "testEinsum";
|
||||
lp.set("equation", params.equation);
|
||||
lp.set("inputSize", params.inputSize);
|
||||
lp.set("outputSize", params.outputSize);
|
||||
lp.set("outputSize", 1);
|
||||
|
||||
CV_CheckFalse(params.einsumInpShapes.empty(), "ERROR no inputs shapes provided");
|
||||
|
||||
@ -79,38 +76,27 @@ PERF_TEST_P_(Layer_Einsum, einsum) {
|
||||
Net net;
|
||||
std::vector<Mat> inputs;
|
||||
std::vector<std::string> input_names;
|
||||
if (params.inputSize == 1){
|
||||
int id = net.addLayer(lp.name, lp.type, lp);
|
||||
|
||||
for (int i = 0; i < params.inputSize; ++i) {
|
||||
// create inputs
|
||||
inputs.emplace_back(Mat(params.einsumInpShapes[0].size(), params.einsumInpShapes[0].data(), CV_32FC1));
|
||||
inputs.emplace_back(Mat(params.einsumInpShapes[i].size(), params.einsumInpShapes[i].data(), CV_32FC1));
|
||||
|
||||
int id = net.addLayerToPrev(lp.name, lp.type, lp);
|
||||
net.connect(0, 0, id, 0);
|
||||
// connect each input to the layer
|
||||
net.connect(0, i, id, i);
|
||||
|
||||
input_names.emplace_back("input1");
|
||||
|
||||
} else {
|
||||
|
||||
// create inputs
|
||||
inputs.emplace_back(Mat(params.einsumInpShapes[0].size(), params.einsumInpShapes[0].data(), CV_32FC1));
|
||||
inputs.emplace_back(Mat(params.einsumInpShapes[1].size(), params.einsumInpShapes[1].data(), CV_32FC1));
|
||||
|
||||
int id = net.addLayerToPrev(lp.name, lp.type, lp);
|
||||
net.connect(0, 0, id, 0);
|
||||
net.connect(0, 1, id, 1);
|
||||
|
||||
input_names.emplace_back("input1");
|
||||
input_names.emplace_back("input2");
|
||||
// create input names dynamically, assuming input naming follows a consistent pattern
|
||||
input_names.emplace_back("input" + std::to_string(i + 1));
|
||||
}
|
||||
|
||||
//warm up
|
||||
std::vector<Mat> outputs;
|
||||
net.setInputsNames(input_names);
|
||||
for (int i = 0; i < input_names.size(); i++){
|
||||
net.setInput(inputs[i], input_names[i]);
|
||||
}
|
||||
Mat out = net.forward();
|
||||
net.forward(outputs, "testEinsum");
|
||||
|
||||
std::vector<Mat> outputs;
|
||||
TEST_CYCLE()
|
||||
{
|
||||
net.forward(outputs, "testEinsum");
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include "cpu_kernels/fast_gemm.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
@ -32,111 +33,6 @@ static bool IsTransposeReshapeForEinsum(const std::vector<size_t>& perm,
|
||||
return true;
|
||||
}
|
||||
|
||||
static Mat batchwiseMatMul(
|
||||
const Mat& input1,
|
||||
const MatShape& input1ShapeOverride,
|
||||
const Mat& input2,
|
||||
const MatShape& input2ShapeOverride)
|
||||
{
|
||||
// Sanity checks before the actual MatMul
|
||||
CV_CheckType(input1.type(), input2.type(), "Data types of the inputs must match for MatMul");
|
||||
CV_CheckEQ(input1ShapeOverride.size(), (size_t) 3, "Only 1 batch dimension is allowed for MatMul");
|
||||
CV_CheckEQ(input2ShapeOverride.size(), (size_t) 3, "Only 1 batch dimension is allowed for MatMul");
|
||||
CV_CheckEQ((size_t) input1ShapeOverride[0], (size_t) input2ShapeOverride[0], "Batch dimension should match for MatMul;");
|
||||
CV_CheckEQ((size_t) input1ShapeOverride[2], (size_t) input2ShapeOverride[1], "Incompatible matrix dimensions for matMul");
|
||||
|
||||
size_t batches = input1ShapeOverride[0];
|
||||
size_t M = input1ShapeOverride[1];
|
||||
size_t K = input1ShapeOverride[2];
|
||||
size_t N = input2ShapeOverride[2];
|
||||
|
||||
std::vector<Mat> output;
|
||||
if (batches > 1)
|
||||
{
|
||||
Mat reshapedInput1 = input1;
|
||||
Mat reshapedInput2 = input2;
|
||||
|
||||
// input1 should of size MxK
|
||||
// check if input1 needs reshape, if need reshape
|
||||
if (input1.size[0] != M || input1.size[1] != K)
|
||||
{
|
||||
int shape[] = {static_cast<int>(batches), static_cast<int>(M), static_cast<int>(K)};
|
||||
reshapedInput1 = input1.reshape(1, 3, shape);
|
||||
}
|
||||
|
||||
// input2 should be of size KxN
|
||||
// check if input2 needs reshape, if needs reshape
|
||||
if (input2.size[0] != K || input2.size[1] != N)
|
||||
{
|
||||
int shape[] = {static_cast<int>(batches), static_cast<int>(K), static_cast<int>(N)};
|
||||
reshapedInput2 = input2.reshape(1, 3, shape);
|
||||
}
|
||||
|
||||
for (size_t i=0; i < batches; i++)
|
||||
{
|
||||
std::vector<Range> ranges1 = {cv::Range(i, i+1)};
|
||||
for (int j = 1; j < reshapedInput1.dims; j++)
|
||||
ranges1.emplace_back(cv::Range::all());
|
||||
|
||||
Mat part1 = reshapedInput1(ranges1);
|
||||
int shape[] = {static_cast<int>(M), static_cast<int>(K)};
|
||||
part1 = part1.reshape(1, sizeof(shape)/sizeof(shape[0]), shape);
|
||||
|
||||
std::vector<Range> ranges2 = {cv::Range(i, i+1)};
|
||||
for (int j = 1; j < reshapedInput2.dims; j++)
|
||||
ranges2.emplace_back(cv::Range::all());
|
||||
|
||||
Mat part2 = reshapedInput2(ranges2);
|
||||
int shape2[] = {static_cast<int>(K), static_cast<int>(N)};
|
||||
part2 = part2.reshape(1, sizeof(shape2)/sizeof(shape2[0]), shape2);
|
||||
|
||||
Mat tmp_output;
|
||||
cv::gemm(part1, part2, 1.0, cv::Mat(), 1.0, tmp_output);
|
||||
int newShape[] = {1, static_cast<int>(M), static_cast<int>(N)};
|
||||
tmp_output = tmp_output.reshape(1, sizeof(newShape)/sizeof(newShape[0]), newShape);
|
||||
|
||||
output.emplace_back(tmp_output);
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
Mat reshapedInput1 = input1;
|
||||
Mat reshapedInput2 = input2;
|
||||
|
||||
// input1 should of size MxK
|
||||
// check if input1 needs reshape, if need reshape
|
||||
if (input1.dims > 2 || input1.size[0] != M || input1.size[1] != K)
|
||||
{
|
||||
int shape[] = {static_cast<int>(M), static_cast<int>(K)};
|
||||
reshapedInput1 = input1.reshape(1, 2, shape);
|
||||
}
|
||||
|
||||
// input2 should be of size KxN
|
||||
// check if input2 needs reshape, if needs reshape
|
||||
if (input2.dims > 2 || input2.size[0] != K || input2.size[1] != N)
|
||||
{
|
||||
int shape2[] = {static_cast<int>(K), static_cast<int>(N)};
|
||||
reshapedInput2 = input2.reshape(1, 2, shape2);
|
||||
}
|
||||
|
||||
Mat tmp_output;
|
||||
cv::gemm(reshapedInput1, reshapedInput2, 1.0, cv::Mat(), 1.0, tmp_output);
|
||||
|
||||
int newShape[] = {1, static_cast<int>(M), static_cast<int>(N)};
|
||||
tmp_output = tmp_output.reshape(1, sizeof(newShape)/sizeof(newShape[0]), newShape);
|
||||
output.emplace_back(tmp_output);
|
||||
|
||||
}
|
||||
|
||||
int outputDim[] = {static_cast<int>(output.size()), static_cast<int>(M), static_cast<int>(N)};
|
||||
Mat output_buffer = Mat::zeros(3, outputDim, CV_32F);
|
||||
|
||||
for (size_t i = 0; i < output.size(); i++) {
|
||||
Mat output_slice = output_buffer.row(i);
|
||||
output[i].copyTo(output_slice);
|
||||
}
|
||||
return output_buffer;
|
||||
};
|
||||
|
||||
static Mat Transpose(
|
||||
const Mat& input,
|
||||
@ -452,6 +348,8 @@ public:
|
||||
// The number of dimensions that are encompassed by an "ellipsis" - "...".
|
||||
size_t numOfEllipsisDims = 0;
|
||||
|
||||
// Backend for fastgemm
|
||||
FastGemmOpt opt;
|
||||
|
||||
void parseEquation(String equation);
|
||||
void processEquation(const std::vector<MatShape>& inputs);
|
||||
@ -469,7 +367,12 @@ public:
|
||||
const MatShape& reduceDims,
|
||||
bool isFinalPair
|
||||
);
|
||||
|
||||
Mat batchwiseMatMul(
|
||||
const Mat& input1,
|
||||
const MatShape& input1ShapeOverride,
|
||||
const Mat& input2,
|
||||
const MatShape& input2ShapeOverride
|
||||
);
|
||||
|
||||
// constructor
|
||||
LayerEinsumImpl(const LayerParams& params)
|
||||
@ -491,6 +394,7 @@ public:
|
||||
einsumInpShapes.emplace_back(shape);
|
||||
}
|
||||
|
||||
opt.init();
|
||||
|
||||
// Maintains a mapping between input indices and their corresponding subscript labels for each input
|
||||
inputSubscriptIndices.reserve(numInputs);
|
||||
@ -1389,6 +1293,112 @@ Mat LayerEinsumImpl::pairwiseOperandProcess(
|
||||
return output;
|
||||
};
|
||||
|
||||
Mat LayerEinsumImpl::batchwiseMatMul(
|
||||
const Mat& input1,
|
||||
const MatShape& input1ShapeOverride,
|
||||
const Mat& input2,
|
||||
const MatShape& input2ShapeOverride)
|
||||
{
|
||||
|
||||
// Sanity checks before the actual MatMul
|
||||
CV_CheckType(input1.type(), input2.type(), "Data types of the inputs must match for MatMul");
|
||||
CV_CheckEQ(input1ShapeOverride.size(), (size_t) 3, "Only 1 batch dimension is allowed for MatMul");
|
||||
CV_CheckEQ(input2ShapeOverride.size(), (size_t) 3, "Only 1 batch dimension is allowed for MatMul");
|
||||
CV_CheckEQ((size_t) input1ShapeOverride[0], (size_t) input2ShapeOverride[0], "Batch dimension should match for MatMul;");
|
||||
CV_CheckEQ((size_t) input1ShapeOverride[2], (size_t) input2ShapeOverride[1], "Incompatible matrix dimensions for matMul");
|
||||
|
||||
int batches = input1ShapeOverride[0];
|
||||
int M = input1ShapeOverride[1];
|
||||
int K = input1ShapeOverride[2];
|
||||
int N = input2ShapeOverride[2];
|
||||
|
||||
std::vector<Mat> output;
|
||||
if (batches > 1)
|
||||
{
|
||||
Mat reshapedInput1 = input1;
|
||||
Mat reshapedInput2 = input2;
|
||||
|
||||
// input1 should of size MxK
|
||||
// check if input1 needs reshape, if need reshape
|
||||
if (input1.size[0] != M || input1.size[1] != K)
|
||||
{
|
||||
int shape[] = {batches, M, K};
|
||||
reshapedInput1 = input1.reshape(1, 3, shape);
|
||||
}
|
||||
|
||||
// input2 should be of size KxN
|
||||
// check if input2 needs reshape, if needs reshape
|
||||
if (input2.size[0] != K || input2.size[1] != N)
|
||||
{
|
||||
int shape[] = {batches, K, N};
|
||||
reshapedInput2 = input2.reshape(1, 3, shape);
|
||||
}
|
||||
|
||||
for (size_t i=0; i < batches; i++)
|
||||
{
|
||||
std::vector<Range> ranges1 = {cv::Range(i, i+1)};
|
||||
for (int j = 1; j < reshapedInput1.dims; j++)
|
||||
ranges1.emplace_back(cv::Range::all());
|
||||
|
||||
Mat part1 = reshapedInput1(ranges1);
|
||||
int shape[] = {M, K};
|
||||
part1 = part1.reshape(1, sizeof(shape)/sizeof(shape[0]), shape);
|
||||
|
||||
std::vector<Range> ranges2 = {cv::Range(i, i+1)};
|
||||
for (int j = 1; j < reshapedInput2.dims; j++)
|
||||
ranges2.emplace_back(cv::Range::all());
|
||||
|
||||
Mat part2 = reshapedInput2(ranges2);
|
||||
int shape2[] = {K, N};
|
||||
part2 = part2.reshape(1, sizeof(shape2)/sizeof(shape2[0]), shape2);
|
||||
|
||||
Mat tmp_output(M, N, part1.type());
|
||||
fastGemm(false, false, 1.0, part1, part2, 0.0, tmp_output, opt);
|
||||
int newShape[] = {1, M, N};
|
||||
tmp_output = tmp_output.reshape(1, sizeof(newShape)/sizeof(newShape[0]), newShape);
|
||||
|
||||
output.emplace_back(tmp_output);
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
Mat reshapedInput1 = input1;
|
||||
Mat reshapedInput2 = input2;
|
||||
|
||||
// input1 should of size MxK
|
||||
// check if input1 needs reshape, if need reshape
|
||||
if (input1.dims > 2 || input1.size[0] != M || input1.size[1] != K)
|
||||
{
|
||||
int shape[] = {M, K};
|
||||
reshapedInput1 = input1.reshape(1, 2, shape);
|
||||
}
|
||||
|
||||
// input2 should be of size KxN
|
||||
// check if input2 needs reshape, if needs reshape
|
||||
if (input2.dims > 2 || input2.size[0] != K || input2.size[1] != N)
|
||||
{
|
||||
int shape2[] = {K, N};
|
||||
reshapedInput2 = input2.reshape(1, 2, shape2);
|
||||
}
|
||||
|
||||
Mat tmp_output(M, N, reshapedInput1.type());
|
||||
fastGemm(false, false, 1.0, reshapedInput1, reshapedInput2, 0.0, tmp_output, opt);
|
||||
|
||||
int newShape[] = {1, M, N};
|
||||
tmp_output = tmp_output.reshape(1, sizeof(newShape)/sizeof(newShape[0]), newShape);
|
||||
output.emplace_back(tmp_output);
|
||||
|
||||
}
|
||||
|
||||
int outputDim[] = {static_cast<int>(output.size()), M, N};
|
||||
Mat output_buffer = Mat::zeros(3, outputDim, CV_32F);
|
||||
|
||||
for (size_t i = 0; i < output.size(); i++) {
|
||||
Mat output_slice = output_buffer.row(i);
|
||||
output[i].copyTo(output_slice);
|
||||
}
|
||||
return output_buffer;
|
||||
};
|
||||
Ptr<EinsumLayer> EinsumLayer::create(const LayerParams& params)
|
||||
{
|
||||
return makePtr<LayerEinsumImpl>(params);
|
||||
|
Loading…
Reference in New Issue
Block a user