Merge pull request #24037 from Abdurrahheem:ash/dev_einsum

Add Support for Einsum Layer #24037

### This PR adding support for [Einsum Layer](https://pytorch.org/docs/stable/generated/torch.einsum.html) (in progress). 

This PR is currently not to be merged but only reviewed. Test cases are located in [#1090](https://github.com/opencv/opencv_extra/pull/1090)RP in OpenCV extra

**DONE**: 
 - [x] 2-5D GMM support added
 - [x] Matrix transpose support added
 - [x] Reduction type comupte  'ij->j'
 - [x] 2nd shape computation - during forward 

**Next PRs**:
- [ ] Broadcasting reduction "...ii ->...i"
- [ ] Add lazy shape deduction. "...ij, ...jk->...ik"
- [ ] Add implicit output computation support. "bij,bjk ->" (output subscripts should be "bik")
- [ ] Add support for CUDA backend 
- [ ] BatchWiseMultiply optimize

**Later in 5.x version (requires support for 1D matrices)**: 
- [ ] Add 1D vector multiplication support 
- [ ] Inter product "i, i" (problems with 1D shapes)

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
Abduragim Shtanchaev 2023-09-22 12:25:02 +04:00 committed by GitHub
parent b51a78d439
commit 865e7cacca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 1234 additions and 3 deletions

View File

@ -241,6 +241,39 @@ CV__DNN_INLINE_NS_BEGIN
};
/** @brief This function performs array summation based
* on the Einstein summation convention. The function
* allows for concise expressions of various mathematical
* operations using subscripts.
*
* By default, the labels are placed in alphabetical
* order at the end of the output.
* For example:
* if `c = einsum("i,j", a, b)`, then `c[i,j] == a[i]*b[j]`.
* However, if `c = einsum("j,i", a, b)`, then `c[i,j] = a[j]*b[i]`.
* Alternatively, you can control the output order or prevent
* an axis from being summed/force an axis to be summed
* by providing indices for the output.
* For example:
* `diag(a)` -> `einsum("ii->i", a)`
* `sum(a, axis=0)` -> `einsum("i...->", a)`
* Subscripts at the beginning and end may be specified
* by putting an ellipsis "..." in the middle.
* For instance, the function `einsum("i...i", a)` takes
* the diagonal of the first and last dimensions of
* the operand, and `einsum("ij...,jk...->ik...")` performs
* the matrix product using the first two indices
* of each operand instead of the last two.
* When there is only one operand, no axes being summed,
* and no output parameter, this function returns
* a view into the operand instead of creating a copy.
*/
class CV_EXPORTS EinsumLayer : public Layer
{
public:
static Ptr<EinsumLayer> create(const LayerParams& params);
};
class CV_EXPORTS BaseConvolutionLayer : public Layer
{
public:

View File

@ -184,6 +184,7 @@ void initializeLayerFactory()
CV_DNN_REGISTER_LAYER_CLASS(LSTM, LSTMLayer);
CV_DNN_REGISTER_LAYER_CLASS(GRU, GRULayer);
CV_DNN_REGISTER_LAYER_CLASS(CumSum, CumSumLayer);
CV_DNN_REGISTER_LAYER_CLASS(Einsum, EinsumLayer);
CV_DNN_REGISTER_LAYER_CLASS(Scatter, ScatterLayer);
CV_DNN_REGISTER_LAYER_CLASS(ScatterND, ScatterNDLayer);

File diff suppressed because it is too large Load Diff

View File

@ -194,6 +194,7 @@ private:
void parseTile (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseLayerNorm (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseSimpleLayers (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseEinsum (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
// Domain: com.microsoft
// URL: https://github.com/microsoft/onnxruntime/blob/master/docs/ContribOperators.md
@ -3310,6 +3311,40 @@ void ONNXImporter::parseSimpleLayers(LayerParams& layerParams, const opencv_onnx
addLayer(layerParams, node_proto);
}
void ONNXImporter::parseEinsum(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
{
std::vector<MatShape> einsumInpShapes;
for (int j = 0; j < node_proto.input_size(); j++)
{
const auto& inputLayerName = node_proto.input(j);
auto it = outShapes.find(inputLayerName);
if (it != outShapes.end())
{
einsumInpShapes.emplace_back(it->second);
} else {
CV_Error(Error::StsAssert, "ERROR input shape not found");
}
}
CV_CheckFalse(einsumInpShapes.empty(), "ERROR no inputs shapes");
for (int i = 0; i < einsumInpShapes.size(); i++) {
layerParams.set("inputShapes" + cv::format("%d", i), DictValue::arrayInt(einsumInpShapes[i].begin(), einsumInpShapes[i].size()));
}
// Check if of eqution is valid
std::string equation = layerParams.get<std::string>("equation");
CV_CheckFalse(equation.empty(), "Equation is empty");
// Save number of inputs. We need it in layer initialization
layerParams.set("inputSize", node_proto.input_size());
// Save number of outputs. We need it in layer initialization
layerParams.set("outputSize", node_proto.output_size());
addLayer(layerParams, node_proto);
}
void ONNXImporter::parseCustomLayer(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
{
const std::string& name = layerParams.name;
@ -4016,6 +4051,7 @@ void ONNXImporter::buildDispatchMap_ONNX_AI(int opset_version)
dispatch["Sum"] = dispatch["Min"] = dispatch["Max"] = &ONNXImporter::parseElementWise;
dispatch["Where"] = &ONNXImporter::parseElementWise;
dispatch["Range"] = &ONNXImporter::parseRange;
dispatch["Einsum"] = &ONNXImporter::parseEinsum;
std::vector<std::string> simpleLayers{"Acos", "Acosh", "Asin", "Asinh", "Atan", "Atanh", "Ceil", "Celu", "Cos",
"Cosh", "Dropout", "Erf", "Exp", "Floor", "HardSigmoid", "HardSwish",

View File

@ -111,10 +111,7 @@
"test_dynamicquantizelinear_min_adjusted_expanded",
"test_edge_pad",
"test_einsum_batch_diagonal",
"test_einsum_batch_matmul",
"test_einsum_inner_prod",
"test_einsum_sum",
"test_einsum_transpose",
"test_equal",
"test_equal_bcast",
"test_expand_dim_changed",

View File

@ -1415,6 +1415,56 @@ TEST_P(Test_ONNX_layers, LSTM_layout_batch)
testONNXModels("lstm_layout_1", npy, 0.005, 0.005, false, false, 3);
}
TEST_P(Test_ONNX_layers, DISABLED_Einsum_1D)
{
testONNXModels("einsum_1d", npy, 0, 0, false, false, 2);
}
TEST_P(Test_ONNX_layers, Einsum_2D)
{
testONNXModels("einsum_2d", npy, 0, 0, false, false, 2);
}
TEST_P(Test_ONNX_layers, Einsum_3D)
{
testONNXModels("einsum_3d", npy, 0, 0, false, false, 2);
}
TEST_P(Test_ONNX_layers, Einsum_4D)
{
testONNXModels("einsum_4d", npy, 0, 0, false, false, 2);
}
TEST_P(Test_ONNX_layers, Einsum_5D)
{
testONNXModels("einsum_5d", npy, 0, 0, false, false, 2);
}
TEST_P(Test_ONNX_layers, DISABLED_Einsum_InnerProduct)
{
testONNXModels("einsum_inner", npy, 0, 0, false, false, 2);
}
TEST_P(Test_ONNX_layers, DISABLED_Einsum_HadamardProduct)
{
testONNXModels("einsum_hadamard", npy, 0, 0, false, false, 2);
}
TEST_P(Test_ONNX_layers, DISABLED_Einsum_Batch_Diagonal)
{
testONNXModels("einsum_batch_diagonal", npy, 0, 0, false, false, 1);
}
TEST_P(Test_ONNX_layers, Einsum_Sum)
{
testONNXModels("einsum_sum", npy, 0, 0, false, false, 1);
}
TEST_P(Test_ONNX_layers, Einsum_transpose)
{
testONNXModels("einsum_transpose", npy, 0, 0, false, false, 1);
}
TEST_P(Test_ONNX_layers, Pad2d_Unfused)
{
testONNXModels("ReflectionPad2d");