mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 01:13:28 +08:00
Merge pull request #24037 from Abdurrahheem:ash/dev_einsum
Add Support for Einsum Layer #24037 ### This PR adding support for [Einsum Layer](https://pytorch.org/docs/stable/generated/torch.einsum.html) (in progress). This PR is currently not to be merged but only reviewed. Test cases are located in [#1090](https://github.com/opencv/opencv_extra/pull/1090)RP in OpenCV extra **DONE**: - [x] 2-5D GMM support added - [x] Matrix transpose support added - [x] Reduction type comupte 'ij->j' - [x] 2nd shape computation - during forward **Next PRs**: - [ ] Broadcasting reduction "...ii ->...i" - [ ] Add lazy shape deduction. "...ij, ...jk->...ik" - [ ] Add implicit output computation support. "bij,bjk ->" (output subscripts should be "bik") - [ ] Add support for CUDA backend - [ ] BatchWiseMultiply optimize **Later in 5.x version (requires support for 1D matrices)**: - [ ] Add 1D vector multiplication support - [ ] Inter product "i, i" (problems with 1D shapes) ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
b51a78d439
commit
865e7cacca
@ -241,6 +241,39 @@ CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
};
|
||||
|
||||
/** @brief This function performs array summation based
|
||||
* on the Einstein summation convention. The function
|
||||
* allows for concise expressions of various mathematical
|
||||
* operations using subscripts.
|
||||
*
|
||||
* By default, the labels are placed in alphabetical
|
||||
* order at the end of the output.
|
||||
* For example:
|
||||
* if `c = einsum("i,j", a, b)`, then `c[i,j] == a[i]*b[j]`.
|
||||
* However, if `c = einsum("j,i", a, b)`, then `c[i,j] = a[j]*b[i]`.
|
||||
* Alternatively, you can control the output order or prevent
|
||||
* an axis from being summed/force an axis to be summed
|
||||
* by providing indices for the output.
|
||||
* For example:
|
||||
* `diag(a)` -> `einsum("ii->i", a)`
|
||||
* `sum(a, axis=0)` -> `einsum("i...->", a)`
|
||||
* Subscripts at the beginning and end may be specified
|
||||
* by putting an ellipsis "..." in the middle.
|
||||
* For instance, the function `einsum("i...i", a)` takes
|
||||
* the diagonal of the first and last dimensions of
|
||||
* the operand, and `einsum("ij...,jk...->ik...")` performs
|
||||
* the matrix product using the first two indices
|
||||
* of each operand instead of the last two.
|
||||
* When there is only one operand, no axes being summed,
|
||||
* and no output parameter, this function returns
|
||||
* a view into the operand instead of creating a copy.
|
||||
*/
|
||||
class CV_EXPORTS EinsumLayer : public Layer
|
||||
{
|
||||
public:
|
||||
static Ptr<EinsumLayer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
class CV_EXPORTS BaseConvolutionLayer : public Layer
|
||||
{
|
||||
public:
|
||||
|
@ -184,6 +184,7 @@ void initializeLayerFactory()
|
||||
CV_DNN_REGISTER_LAYER_CLASS(LSTM, LSTMLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(GRU, GRULayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(CumSum, CumSumLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Einsum, EinsumLayer);
|
||||
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Scatter, ScatterLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(ScatterND, ScatterNDLayer);
|
||||
|
1114
modules/dnn/src/layers/einsum_layer.cpp
Normal file
1114
modules/dnn/src/layers/einsum_layer.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -194,6 +194,7 @@ private:
|
||||
void parseTile (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
||||
void parseLayerNorm (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
||||
void parseSimpleLayers (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
||||
void parseEinsum (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
||||
|
||||
// Domain: com.microsoft
|
||||
// URL: https://github.com/microsoft/onnxruntime/blob/master/docs/ContribOperators.md
|
||||
@ -3310,6 +3311,40 @@ void ONNXImporter::parseSimpleLayers(LayerParams& layerParams, const opencv_onnx
|
||||
addLayer(layerParams, node_proto);
|
||||
}
|
||||
|
||||
|
||||
void ONNXImporter::parseEinsum(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
||||
{
|
||||
std::vector<MatShape> einsumInpShapes;
|
||||
for (int j = 0; j < node_proto.input_size(); j++)
|
||||
{
|
||||
const auto& inputLayerName = node_proto.input(j);
|
||||
auto it = outShapes.find(inputLayerName);
|
||||
if (it != outShapes.end())
|
||||
{
|
||||
einsumInpShapes.emplace_back(it->second);
|
||||
} else {
|
||||
CV_Error(Error::StsAssert, "ERROR input shape not found");
|
||||
}
|
||||
}
|
||||
|
||||
CV_CheckFalse(einsumInpShapes.empty(), "ERROR no inputs shapes");
|
||||
for (int i = 0; i < einsumInpShapes.size(); i++) {
|
||||
layerParams.set("inputShapes" + cv::format("%d", i), DictValue::arrayInt(einsumInpShapes[i].begin(), einsumInpShapes[i].size()));
|
||||
}
|
||||
|
||||
// Check if of eqution is valid
|
||||
std::string equation = layerParams.get<std::string>("equation");
|
||||
CV_CheckFalse(equation.empty(), "Equation is empty");
|
||||
|
||||
// Save number of inputs. We need it in layer initialization
|
||||
layerParams.set("inputSize", node_proto.input_size());
|
||||
|
||||
// Save number of outputs. We need it in layer initialization
|
||||
layerParams.set("outputSize", node_proto.output_size());
|
||||
|
||||
addLayer(layerParams, node_proto);
|
||||
}
|
||||
|
||||
void ONNXImporter::parseCustomLayer(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
||||
{
|
||||
const std::string& name = layerParams.name;
|
||||
@ -4016,6 +4051,7 @@ void ONNXImporter::buildDispatchMap_ONNX_AI(int opset_version)
|
||||
dispatch["Sum"] = dispatch["Min"] = dispatch["Max"] = &ONNXImporter::parseElementWise;
|
||||
dispatch["Where"] = &ONNXImporter::parseElementWise;
|
||||
dispatch["Range"] = &ONNXImporter::parseRange;
|
||||
dispatch["Einsum"] = &ONNXImporter::parseEinsum;
|
||||
|
||||
std::vector<std::string> simpleLayers{"Acos", "Acosh", "Asin", "Asinh", "Atan", "Atanh", "Ceil", "Celu", "Cos",
|
||||
"Cosh", "Dropout", "Erf", "Exp", "Floor", "HardSigmoid", "HardSwish",
|
||||
|
@ -111,10 +111,7 @@
|
||||
"test_dynamicquantizelinear_min_adjusted_expanded",
|
||||
"test_edge_pad",
|
||||
"test_einsum_batch_diagonal",
|
||||
"test_einsum_batch_matmul",
|
||||
"test_einsum_inner_prod",
|
||||
"test_einsum_sum",
|
||||
"test_einsum_transpose",
|
||||
"test_equal",
|
||||
"test_equal_bcast",
|
||||
"test_expand_dim_changed",
|
||||
|
@ -1415,6 +1415,56 @@ TEST_P(Test_ONNX_layers, LSTM_layout_batch)
|
||||
testONNXModels("lstm_layout_1", npy, 0.005, 0.005, false, false, 3);
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, DISABLED_Einsum_1D)
|
||||
{
|
||||
testONNXModels("einsum_1d", npy, 0, 0, false, false, 2);
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, Einsum_2D)
|
||||
{
|
||||
testONNXModels("einsum_2d", npy, 0, 0, false, false, 2);
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, Einsum_3D)
|
||||
{
|
||||
testONNXModels("einsum_3d", npy, 0, 0, false, false, 2);
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, Einsum_4D)
|
||||
{
|
||||
testONNXModels("einsum_4d", npy, 0, 0, false, false, 2);
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, Einsum_5D)
|
||||
{
|
||||
testONNXModels("einsum_5d", npy, 0, 0, false, false, 2);
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, DISABLED_Einsum_InnerProduct)
|
||||
{
|
||||
testONNXModels("einsum_inner", npy, 0, 0, false, false, 2);
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, DISABLED_Einsum_HadamardProduct)
|
||||
{
|
||||
testONNXModels("einsum_hadamard", npy, 0, 0, false, false, 2);
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, DISABLED_Einsum_Batch_Diagonal)
|
||||
{
|
||||
testONNXModels("einsum_batch_diagonal", npy, 0, 0, false, false, 1);
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, Einsum_Sum)
|
||||
{
|
||||
testONNXModels("einsum_sum", npy, 0, 0, false, false, 1);
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, Einsum_transpose)
|
||||
{
|
||||
testONNXModels("einsum_transpose", npy, 0, 0, false, false, 1);
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, Pad2d_Unfused)
|
||||
{
|
||||
testONNXModels("ReflectionPad2d");
|
||||
|
Loading…
Reference in New Issue
Block a user