diff --git a/modules/dnn/src/layers/normalize_bbox_layer.cpp b/modules/dnn/src/layers/normalize_bbox_layer.cpp index 9d2940d6da..43445890f0 100644 --- a/modules/dnn/src/layers/normalize_bbox_layer.cpp +++ b/modules/dnn/src/layers/normalize_bbox_layer.cpp @@ -69,11 +69,74 @@ public: return true; } +#ifdef HAVE_OPENCL + bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_) + { + std::vector inputs; + std::vector outputs; + std::vector internals; + + inputs_.getUMatVector(inputs); + outputs_.getUMatVector(outputs); + internals_.getUMatVector(internals); + + CV_Assert(inputs.size() == 1 && outputs.size() == 1); + CV_Assert(inputs[0].total() == outputs[0].total()); + + const UMat& inp0 = inputs[0]; + UMat& buffer = internals[0]; + size_t num = inp0.size[0]; + size_t channels = inp0.size[1]; + size_t channelSize = inp0.total() / (num * channels); + for (size_t i = 0; i < num; ++i) + { + MatShape s = shape(channels, channelSize); + UMat src = inputs[i].reshape(1, s.size(), &s[0]); + UMat dst = outputs[i].reshape(1, s.size(), &s[0]); + + UMat abs_mat; + absdiff(src, cv::Scalar::all(0), abs_mat); + pow(abs_mat, pnorm, buffer); + + if (acrossSpatial) + { + // add eps to avoid overflow + float absSum = sum(buffer)[0] + epsilon; + float norm = pow(absSum, 1.0f / pnorm); + multiply(src, 1.0f / norm, dst); + } + + if (!blobs.empty()) + { + // scale the output + Mat scale = blobs[0]; + if (scale.total() == 1) + { + // _scale: 1 x 1 + multiply(dst, scale.at(0, 0), dst); + } + else + { + // _scale: _channels x 1 + CV_Assert(scale.total() == channels); + repeat(scale, 1, dst.cols, buffer); + multiply(dst, buffer, dst); + } + } + } + return true; + } +#endif + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) && + OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + forward_ocl(inputs_arr, outputs_arr, internals_arr)) + Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr); } diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp index cbac6c5525..8ae0ca2a48 100644 --- a/modules/dnn/test/test_torch_importer.cpp +++ b/modules/dnn/test/test_torch_importer.cpp @@ -225,6 +225,11 @@ TEST(Torch_Importer, net_normalize) runTorchNet("net_normalize", DNN_TARGET_CPU, "", false, true); } +OCL_TEST(Torch_Importer, net_normalize) +{ + runTorchNet("net_normalize", DNN_TARGET_OPENCL, "", false, true); +} + TEST(Torch_Importer, net_padding) { runTorchNet("net_padding", DNN_TARGET_CPU, "", false, true);