From 240b245105bdb69b2b624ce24f08373c2b16bfde Mon Sep 17 00:00:00 2001 From: Aser Atawya <47282229+Aser-Abdelfatah@users.noreply.github.com> Date: Wed, 18 Oct 2023 00:41:47 -0700 Subject: [PATCH] Merge pull request #24092 from Aser-Abdelfatah:GSoC_Support_GatherElements_ONNX GSoC Add ONNX Support for GatherElements #24092 Merge with: https://github.com/opencv/opencv_extra/pull/1082 Adds support to the ONNX operator GatherElements [operator docs](https://github.com/onnx/onnx/blob/main/docs/Operators.md#GatherElements) Added tests to opencv_extra at pull request https://github.com/opencv/opencv_extra/pull/1082 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake --- .../dnn/include/opencv2/dnn/all_layers.hpp | 16 ++ modules/dnn/perf/perf_layer.cpp | 51 ++++++ modules/dnn/src/init.cpp | 1 + .../dnn/src/layers/gather_elements_layer.cpp | 154 ++++++++++++++++++ modules/dnn/src/onnx/onnx_importer.cpp | 49 ++++++ ...ance_layer_filter__vulkan_denylist.inl.hpp | 3 + ..._conformance_layer_parser_denylist.inl.hpp | 3 - modules/dnn/test/test_onnx_importer.cpp | 29 ++++ 8 files changed, 303 insertions(+), 3 deletions(-) create mode 100644 modules/dnn/src/layers/gather_elements_layer.cpp diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 587eda102f..0f1518f08c 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -343,6 +343,22 @@ CV__DNN_INLINE_NS_BEGIN static Ptr create(const LayerParams& params); }; + /** @brief GatherElements layer + * GatherElements takes two inputs data and indices of the same rank r >= 1 and an optional attribute axis and works such that: + * output[i][j][k] = data[index[i][j][k]][j][k] if axis = 0 and r = 3 + * output[i][j][k] = data[i][index[i][j][k]][k] if axis = 1 and r = 3 + * output[i][j][k] = data[i][j][index[i][j][k]] if axis = 2 and r = 3 + * + * Gather, on the other hand, takes a data tensor of rank r >= 1, and indices tensor of rank q, and works such that: + * it gathers the enteries along axis dimension of the input data indexed by indices and concatenates them in an output tensor of rank q + (r - 1) + * e.g. If axis = 0, let k = indices[i_{0}, ..., i_{q-1}] then output[i_{0}, ..., i_{q-1}, j_{0}, ..., j_{r-2}] = input[k , j_{0}, ..., j_{r-2}]: + **/ + class CV_EXPORTS GatherElementsLayer : public Layer + { + public: + static Ptr create(const LayerParams& params); + }; + class CV_EXPORTS PoolingLayer : public Layer { public: diff --git a/modules/dnn/perf/perf_layer.cpp b/modules/dnn/perf/perf_layer.cpp index 3020dbea66..dbedc4319b 100644 --- a/modules/dnn/perf/perf_layer.cpp +++ b/modules/dnn/perf/perf_layer.cpp @@ -633,6 +633,56 @@ PERF_TEST_P_(Layer_LayerNormExpanded, DISABLED_LayerNormExpanded) test_layer({N, H ,W}); } +struct Layer_GatherElements : public TestBaseWithParam > +{ + void test_layer(const std::vector& data_shape, const std::vector& indices_shape, int axis = 0) + { + int backendId = get<0>(GetParam()); + int targetId = get<1>(GetParam()); + + Mat data(data_shape, CV_32FC1); + Mat indices(indices_shape, CV_32FC1); + + randu(data, 0.f, 1.f); + randu(indices, 0, data_shape[axis]); + + Net net; + LayerParams lp; + lp.type = "GatherElements"; + lp.name = "testLayer"; + lp.set("axis", axis); + int id = net.addLayerToPrev(lp.name, lp.type, lp); + net.connect(0, 0, id, 0); + net.connect(0, 1, id, 1); + + // warmup + { + std::vector inpNames(3); + inpNames[0] = "data"; + inpNames[1] = "indices"; + net.setInputsNames(inpNames); + net.setInput(data, inpNames[0]); + net.setInput(indices, inpNames[1]); + + net.setPreferableBackend(backendId); + net.setPreferableTarget(targetId); + Mat out = net.forward(); + } + + TEST_CYCLE() + { + Mat res = net.forward(); + } + + SANITY_CHECK_NOTHING(); + } +}; + +PERF_TEST_P_(Layer_GatherElements, GatherElements) +{ + test_layer({2700, 1, 2914}, {2700, 1, 81}, 2); +} + INSTANTIATE_TEST_CASE_P(/**/, Layer_Slice, dnnBackendsAndTargets(false, false)); INSTANTIATE_TEST_CASE_P(/**/, Layer_NaryEltwise, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU))); #ifdef HAVE_CUDA @@ -642,6 +692,7 @@ INSTANTIATE_TEST_CASE_P(/**/, Layer_Scatter, testing::Values(std::make_tuple(DNN INSTANTIATE_TEST_CASE_P(/**/, Layer_ScatterND, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU))); INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU))); INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNormExpanded, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU))); +INSTANTIATE_TEST_CASE_P(/**/, Layer_GatherElements, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU))); typedef TestBaseWithParam > > Layer_FullyConnected; diff --git a/modules/dnn/src/init.cpp b/modules/dnn/src/init.cpp index 3183d71f0b..e70d5dad47 100644 --- a/modules/dnn/src/init.cpp +++ b/modules/dnn/src/init.cpp @@ -157,6 +157,7 @@ void initializeLayerFactory() CV_DNN_REGISTER_LAYER_CLASS(Arg, ArgLayer); CV_DNN_REGISTER_LAYER_CLASS(Reciprocal, ReciprocalLayer); CV_DNN_REGISTER_LAYER_CLASS(Gather, GatherLayer); + CV_DNN_REGISTER_LAYER_CLASS(GatherElements, GatherElementsLayer); CV_DNN_REGISTER_LAYER_CLASS(LayerNormalization, LayerNormLayer); CV_DNN_REGISTER_LAYER_CLASS(Expand, ExpandLayer); diff --git a/modules/dnn/src/layers/gather_elements_layer.cpp b/modules/dnn/src/layers/gather_elements_layer.cpp new file mode 100644 index 0000000000..8c68200850 --- /dev/null +++ b/modules/dnn/src/layers/gather_elements_layer.cpp @@ -0,0 +1,154 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "../precomp.hpp" +#include + +namespace cv { namespace dnn { + +static inline int calculateOffset(int outer_dim, const MatShape &shape_indices, int axis_skip, const MatStep &step_data) { + int offset = 0; + for (int axis = static_cast(shape_indices.size()) - 2; axis >= 0; axis--) { + int dim = shape_indices[axis]; + if (axis != axis_skip) { + offset += (outer_dim % dim) * step_data[axis]; + } + outer_dim /= dim; + } + return offset; +} + +class GatherElementsLayerImpl CV_FINAL : public GatherElementsLayer +{ +public: + GatherElementsLayerImpl(const LayerParams& params) + { + setParamsFrom(params); + axis = params.get("axis", 0); + } + + virtual bool supportBackend(int backendId) CV_OVERRIDE + { + return backendId == DNN_BACKEND_OPENCV; + } + + virtual bool getMemoryShapes(const std::vector &inputs, + const int requiredOutputs, + std::vector &outputs, + std::vector &internals) const CV_OVERRIDE + { + CV_CheckEQ(inputs.size(), 2ull, "GatherElements: requires two inputs"); + + const auto &data = inputs[0]; + const auto &indices = inputs[1]; + CV_CheckEQ(data.size(), indices.size(), "GatherElements: data and indices should have the same dimension"); + + int normalized_axis = normalize_axis(axis, static_cast(data.size())); + CV_CheckGE(normalized_axis, 0, "GatherElements: axis out of range"); + CV_CheckLT(normalized_axis, static_cast(data.size()), "GatherElements: axis out of range"); + for (size_t i = 0; i < data.size(); i++) { + if (i != normalized_axis) { + CV_CheckEQ(data[i], indices[i], "GatherElements: shape mismatched"); + } + } + + outputs.assign(1, inputs[1]); // shape of output is same as indices + return false; + } + + virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE { + std::vector inputs; + inputs_arr.getMatVector(inputs); + + const auto &data = inputs[0]; + axis = normalize_axis(axis, data.dims); + } + + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + + std::vector inputs, outputs; + inputs_arr.getMatVector(inputs); + outputs_arr.getMatVector(outputs); + + const Mat& data = inputs[0]; + const Mat& indices = inputs[1]; + Mat& out = outputs[0]; + + typeDispatch(outputs[0].type(), data, indices, out); + } + + template + void forward_impl(const Mat& data_, const Mat& indices_, Mat& out_) + { + const auto *ptr_data = data_.ptr(); + const auto *ptr_indices = indices_.ptr(); + auto *ptr_out = out_.ptr(); + + const auto shape_data = shape(data_); + const auto &step_data = data_.step; + const auto shape_indices = shape(indices_); + + int inner_most_dim = shape_indices.back(); + int axis_dim = shape_data[axis]; + size_t axis_step = static_cast(step_data[axis] / sizeof(T)); + + bool innermost_axis = axis == static_cast(shape_data.size() - 1); + + auto fn = [&](const Range &r) { + for (int i = r.start; i < r.end; i++) { + auto *data = ptr_data + static_cast(calculateOffset(i, shape_indices, axis, step_data) / sizeof(T)); + auto *indices = ptr_indices + i * inner_most_dim; + auto *out = ptr_out + i * inner_most_dim; + + if (innermost_axis) { + for (int j = 0; j < inner_most_dim; j++) { + int index = static_cast((indices[j] + axis_dim)) % axis_dim; // TODO: Check out-of-range index + out[j] = data[index]; + } + } else { + for (int j = 0; j < inner_most_dim; j++) { + int index = static_cast(indices[j] + axis_dim) % axis_dim; // TODO: Check out-of-range index + out[j] = data[index * axis_step + j]; + } + } + } + }; + + int outer_dims = total(shape_indices, 0, shape_indices.size() - 1); + double nstripes = static_cast(outer_dims * inner_most_dim * (1 / 1024.0)); + parallel_for_(Range(0, outer_dims), fn, nstripes); + } + + template + inline void typeDispatch(const int type, Args&&... args) + { + switch (type) + { + case CV_8U: + forward_impl(std::forward(args)...); + break; + case CV_32S: + forward_impl(std::forward(args)...); + break; + case CV_32F: + forward_impl(std::forward(args)...); + break; + default: + CV_Error(cv::Error::BadDepth, "DNN/GatherElements: Unsupported type."); + }; + } + +private: + int axis; +}; + +Ptr GatherElementsLayer::create(const LayerParams& params) +{ + return makePtr(params); +} + +}} // namespace cv::dnn diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index d1bf278133..db78607dca 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -179,6 +179,7 @@ private: void parseCast (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseConstantFill (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseGather (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseGatherElements (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseConcat (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseResize (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseUpsample (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); @@ -2553,6 +2554,53 @@ void ONNXImporter::parseGather(LayerParams& layerParams, const opencv_onnx::Node addLayer(layerParams, node_proto); } +void ONNXImporter::parseGatherElements(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + CV_CheckEQ(node_proto.input_size(), 2, "GatherElements: two inputs are required"); + + size_t num_const = 0; + for (size_t i = 0; i < node_proto.input_size(); ++i){ + if (constBlobs.find(node_proto.input(i)) != constBlobs.end()) + ++num_const; + } + + if (num_const == node_proto.input_size()) + { + std::vector inputs, output; + for (size_t i = 0; i < node_proto.input_size(); i++) { + Mat blob = getBlob(node_proto, i); + if (i == 1) { // indices, from int32/int64 to float32 for compatibility + blob.convertTo(blob, CV_32F); + } + inputs.push_back(blob); + } + runLayer(layerParams, inputs, output); + CV_Assert(output.size() == 1); + addConstant(node_proto.output(0), output[0]); + return; + } else if (num_const > 0) { + for (size_t i = 0; i < node_proto.input_size(); i++) { + if (constBlobs.find(node_proto.input(i)) != constBlobs.end()) { + Mat blob = getBlob(node_proto, i); + if (i == 1) { // indices, from int32/int64 to float32 for compatibility + blob.convertTo(blob, CV_32F); + } + + LayerParams constParams; + constParams.name = node_proto.input(i); + constParams.type = "Const"; + constParams.blobs.push_back(blob); + + opencv_onnx::NodeProto proto; + proto.add_output(constParams.name); + addLayer(constParams, proto); + } + } + } + + addLayer(layerParams, node_proto); +} + void ONNXImporter::parseConcat(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { bool hasVariableInps = false; @@ -3901,6 +3949,7 @@ void ONNXImporter::buildDispatchMap_ONNX_AI(int opset_version) dispatch["Cast"] = &ONNXImporter::parseCast; dispatch["ConstantFill"] = dispatch["ConstantOfShape"] = &ONNXImporter::parseConstantFill; dispatch["Gather"] = &ONNXImporter::parseGather; + dispatch["GatherElements"] = &ONNXImporter::parseGatherElements; dispatch["Concat"] = &ONNXImporter::parseConcat; dispatch["Resize"] = &ONNXImporter::parseResize; dispatch["Upsample"] = &ONNXImporter::parseUpsample; diff --git a/modules/dnn/test/test_onnx_conformance_layer_filter__vulkan_denylist.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_filter__vulkan_denylist.inl.hpp index 6f8d7aef20..f87e16a42f 100644 --- a/modules/dnn/test/test_onnx_conformance_layer_filter__vulkan_denylist.inl.hpp +++ b/modules/dnn/test/test_onnx_conformance_layer_filter__vulkan_denylist.inl.hpp @@ -55,6 +55,9 @@ "test_flatten_negative_axis1", "test_flatten_negative_axis2", "test_flatten_negative_axis4", +"test_gather_elements_0", +"test_gather_elements_1", +"test_gather_elements_negative_indices", "test_logsoftmax_default_axis", "test_maxpool_2d_dilations", "test_maxpool_2d_same_lower", diff --git a/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp index 0556e63c37..34a77a9a2e 100644 --- a/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp +++ b/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp @@ -115,9 +115,6 @@ "test_gather_0", "test_gather_1", "test_gather_2d_indices", -"test_gather_elements_0", -"test_gather_elements_1", -"test_gather_elements_negative_indices", "test_gather_negative_indices", "test_gathernd_example_float32", "test_gathernd_example_int32", diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 2865a04119..7fe9f8ccda 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -9,6 +9,7 @@ #include "test_precomp.hpp" #include "npy_blob.hpp" #include +#include namespace opencv_test { namespace { template @@ -2134,6 +2135,34 @@ TEST_P(Test_ONNX_nets, Alexnet) expectNoFallbacksFromIE(net); } +TEST_P(Test_ONNX_nets, RAFT) +{ + applyTestTag(CV_TEST_TAG_LONG, CV_TEST_TAG_DEBUG_VERYLONG, CV_TEST_TAG_MEMORY_2GB); + + std::string weight_path = _tf("models/optical_flow_estimation_raft_2023aug.onnx", false); + std::string img0_path = findDataFile(std::string("gpu/opticalflow/frame0.png")); + std::string img1_path = findDataFile(std::string("gpu/opticalflow/frame1.png")); + + Size target_size{480, 360}; + auto img0 = imread(img0_path); + auto img1 = imread(img1_path); + auto blob0 = blobFromImage(img0, 1.0, target_size, 0, true); + auto blob1 = blobFromImage(img1, 1.0, target_size, 0, true); + + auto net = readNet(weight_path); + net.setInput(blob0, "0"); + net.setInput(blob1, "1"); + std::vector outnames{"12007", "12006"}; + std::vector outs; + net.forward(outs, outnames); + + // output 12006 is not checked to save space in opencv_extra since its ref is > 1MB, + // and output 12006 is calculated from 12007 so checking 12007 is sufficient. + std::string ref_12700_path = _tf("data/output_optical_flow_estimation_raft_2023aug.npy"); + auto ref0 = blobFromNPY(ref_12700_path); + normAssert(ref0, outs[0], "", 1e-5, 1.8e-4); +} + TEST_P(Test_ONNX_nets, Squeezenet) { testONNXModels("squeezenet", pb);