diff --git a/modules/dnn/perf/perf_layer.cpp b/modules/dnn/perf/perf_layer.cpp index 66b5ad62c2..04e7d04153 100644 --- a/modules/dnn/perf/perf_layer.cpp +++ b/modules/dnn/perf/perf_layer.cpp @@ -258,76 +258,71 @@ PERF_TEST_P_(Layer_Slice, FastNeuralStyle_eccv16) test_slice<4>(inputShape, begin, end); } -struct Layer_Scatter : public TestBaseWithParam > -{ - void test_layer(const std::vector& shape, const String reduction = "none", int axis = 0) +using Layer_Scatter = TestBaseWithParam, std::string, int, tuple>>; +PERF_TEST_P_(Layer_Scatter, scatter) { + std::vector shape = get<0>(GetParam()); + std::string reduction = get<1>(GetParam()); + int axis = get<2>(GetParam()); + int backend_id = get<0>(get<3>(GetParam())); + int target_id = get<1>(get<3>(GetParam())); + + Mat data(shape, CV_32FC1); + Mat indices(shape, CV_32FC1); + Mat updates(shape, CV_32FC1); + + randn(data, 0.f, 1.f); + randu(indices, 0, shape[axis]); + randn(updates, 0.f, 1.f); + + indices.convertTo(indices, CV_32SC1, 1, -1); + + Net net; + LayerParams lp; + lp.type = "Scatter"; + lp.name = "testLayer"; + lp.set("reduction", reduction); + lp.set("axis", axis); + + int id = net.addLayerToPrev(lp.name, lp.type, lp); + net.connect(0, 0, id, 0); + net.connect(0, 1, id, 1); + net.connect(0, 2, id, 2); + + // warmup { - int backendId = get<0>(GetParam()); - int targetId = get<1>(GetParam()); + std::vector input_names{"data", "indices", "updates"}; + net.setInputsNames(input_names); + net.setInput(data, input_names[0]); + net.setInput(indices, input_names[1]); + net.setInput(updates, input_names[2]); - Mat data(shape, CV_32FC1); - Mat indices(shape, CV_32FC1); - Mat updates(shape, CV_32FC1); - - Scalar mean = 0.f; - Scalar std = 1.f; - randn(data, mean, std); - randu(indices, 0, shape[axis]); - randn(updates, mean, std); - - indices.convertTo(indices, CV_32SC1, 1, -1); - - Net net; - LayerParams lp; - lp.type = "Scatter"; - lp.name = "testLayer"; - lp.set("reduction", reduction); - lp.set("axis", axis); - - int id = net.addLayerToPrev(lp.name, lp.type, lp); - net.connect(0, 0, id, 0); - net.connect(0, 1, id, 1); - net.connect(0, 2, id, 2); - - // warmup - { - std::vector inpNames(3); - inpNames[0] = "data"; - inpNames[1] = "indices"; - inpNames[2] = "updates"; - net.setInputsNames(inpNames); - net.setInput(data, inpNames[0]); - net.setInput(indices, inpNames[1]); - net.setInput(updates, inpNames[2]); - - net.setPreferableBackend(backendId); - net.setPreferableTarget(targetId); - Mat out = net.forward(); - } - - TEST_CYCLE() - { - Mat res = net.forward(); - } - - SANITY_CHECK_NOTHING(); + net.setPreferableBackend(backend_id); + net.setPreferableTarget(target_id); + Mat out = net.forward(); } - int N = 8; - int C = 256; - int H = 128; - int W = 100; -}; + // perf + TEST_CYCLE() + { + Mat res = net.forward(); + } -PERF_TEST_P_(Layer_Scatter, DISABLED_Scatter) -{ - test_layer({N, C, H, W}); + SANITY_CHECK_NOTHING(); } -PERF_TEST_P_(Layer_Scatter, DISABLED_Scatter_add) -{ - test_layer({N, C, H, W}, "add"); -} +INSTANTIATE_TEST_CASE_P(/**/, Layer_Scatter, Combine( + Values(std::vector{2, 128, 64, 50}), + Values(std::string("none"), std::string("add")), + Values(0), // use Values(0, 1, 2, 3) for more details + dnnBackendsAndTargets(/* withInferenceEngine= */ false, + /* withHalide= */ false, + /* withCpuOCV= */ true, + /* withVkCom= */ false, + /* withCUDA= */ false, + /* withNgraph= */ false, + /* withWebnn= */ false, + /* withCann= */ false) // only test on CPU +)); struct Layer_ScatterND : public TestBaseWithParam > { @@ -800,7 +795,7 @@ INSTANTIATE_TEST_CASE_P(/**/, Layer_NaryEltwise, testing::Values(std::make_tuple #ifdef HAVE_CUDA INSTANTIATE_TEST_CASE_P(CUDA, Layer_NaryEltwise, testing::Values(std::make_tuple(DNN_BACKEND_CUDA, DNN_TARGET_CUDA))); #endif -INSTANTIATE_TEST_CASE_P(/**/, Layer_Scatter, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU))); +// INSTANTIATE_TEST_CASE_P(/**/, Layer_Scatter, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU))); INSTANTIATE_TEST_CASE_P(/**/, Layer_ScatterND, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU))); INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU))); INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNormExpanded, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU))); diff --git a/modules/dnn/src/layers/scatter_layer.cpp b/modules/dnn/src/layers/scatter_layer.cpp index 084eecb03c..3e0ee2affb 100644 --- a/modules/dnn/src/layers/scatter_layer.cpp +++ b/modules/dnn/src/layers/scatter_layer.cpp @@ -81,59 +81,63 @@ public: } template - void forward_impl(const Functor& rd, const Mat& data, const Mat& indices, const Mat& updates, Mat& out) + void forward_impl(const Functor& reduce_operation, const Mat& input_mat, const Mat& indices_mat, const Mat& updates_mat, Mat& output_mat) { - data.copyTo(out); + input_mat.copyTo(output_mat); - const int ndims = data.dims; - const int* shape = data.size.p; - const size_t* step = data.step.p; + const int ndims = input_mat.dims; + const auto &input_mat_shape = shape(input_mat); + std::vector input_mat_step(ndims); - const int* ind_shape = indices.size.p; - const size_t* ind_step = indices.step.p; + const auto &indices_mat_shape = shape(indices_mat); + // const auto &indices_mat_step = indices_mat.step; + std::vector indices_mat_step(ndims); - size_t inp_offset = 0; - size_t ind_offset = 0; - const T* p_index = indices.ptr(); - const T* p_update = updates.ptr(); - T* p_out = out.ptr(); - - size_t total = indices.total(); - - int j, offset_at_idx, index; - size_t t, idx; - for (size_t i = 0; i < total; i++) - { - t = i; - inp_offset = 0; - ind_offset = 0; - int offset_at_axis = 0; - for (j = ndims - 1; j >= 0; j--) - { - idx = t / ind_shape[j]; - offset_at_idx = (int)(t - idx * ind_shape[j]); - ind_offset += offset_at_idx * ind_step[j]; - inp_offset += offset_at_idx * step[j]; - t = idx; - if (j == axis) - { - offset_at_axis = offset_at_idx * step[j]; - } - } - ind_offset /= sizeof(T); - - // get index and overwrite current indices - const T* tmp_p_index = p_index + ind_offset; - index = (int)(*tmp_p_index); - CV_Assert(index < shape[axis] && index > -shape[axis]); - - inp_offset = inp_offset - offset_at_axis + ((index + shape[axis]) % shape[axis]) * step[axis]; - inp_offset /= sizeof(T); - - const T* tmp_p_update = p_update + ind_offset; - T* tmp_p_out = p_out + inp_offset; - *tmp_p_out = rd(*tmp_p_out, *tmp_p_update); + for (int i = 0; i < ndims; i++) { + input_mat_step[i] = static_cast(input_mat.step.p[i] / sizeof(T)); + indices_mat_step[i] = static_cast(indices_mat.step.p[i] / sizeof(T)); } + + const T* indices = indices_mat.ptr(); + const T* updates = updates_mat.ptr(); + T* output = output_mat.ptr(); + + auto fn = [&](const Range &r) { + size_t input_offset = 0, indices_offset = 0; + + int indices_index, index; + size_t axis_offset, tmp_index, j_index; + for (int i = r.start; i < r.end; i++) { + input_offset = 0; + indices_offset = 0; + indices_index = i; + axis_offset = 0; + for (int j = ndims - 1; j >= 0; j--) { + tmp_index = indices_index / indices_mat_shape[j]; + j_index = (size_t)(indices_index - tmp_index * indices_mat_shape[j]); + input_offset += j_index * input_mat_step[j]; + indices_offset += j_index * indices_mat_step[j]; + indices_index = tmp_index; + if (j == axis) { + axis_offset = j_index * input_mat_step[j]; + } + } + + // get index and overwrite current indices + index = static_cast(*(indices + indices_offset)); + index = (index + input_mat_shape[axis]) % input_mat_shape[axis]; + CV_Assert(index < input_mat_shape[axis] && index >= 0); + input_offset = input_offset - axis_offset + index * input_mat_step[axis]; + + const T* update = updates + indices_offset; + T* y = output + input_offset; + *y = reduce_operation(*y, *update); + } + }; + + size_t total = indices_mat.total(); + double nstripes = (size_t)total * ndims * (1 / 1024.0); + parallel_for_(Range(0, total), fn, nstripes); } template