From 09fa7587258a8f9085255a27e8a787a2a383d96d Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Tue, 4 Sep 2018 10:55:54 +0300 Subject: [PATCH] Replace Darknet's Reorg to permute layer --- modules/dnn/src/layers/permute_layer.cpp | 36 ++---- modules/dnn/src/layers/reorg_layer.cpp | 117 +++++++++--------- .../dnn/src/layers/shuffle_channel_layer.cpp | 29 +++++ modules/dnn/src/opencl/reorg.cl | 70 ----------- modules/dnn/test/test_layers.cpp | 12 +- 5 files changed, 108 insertions(+), 156 deletions(-) delete mode 100644 modules/dnn/src/opencl/reorg.cl diff --git a/modules/dnn/src/layers/permute_layer.cpp b/modules/dnn/src/layers/permute_layer.cpp index a8fe9dd861..65e4f049e3 100644 --- a/modules/dnn/src/layers/permute_layer.cpp +++ b/modules/dnn/src/layers/permute_layer.cpp @@ -57,23 +57,6 @@ namespace dnn class PermuteLayerImpl CV_FINAL : public PermuteLayer { public: - void checkCurrentOrder(int currentOrder) - { - if(currentOrder < 0 || currentOrder > 3) - { - CV_Error( - Error::StsBadArg, - "Orders of dimensions in Permute layer parameter" - "must be in [0...3] interval"); - } - - if(std::find(_order.begin(), _order.end(), currentOrder) != _order.end()) - { - CV_Error(Error::StsBadArg, - "Permute layer parameter contains duplicated orders."); - } - } - void checkNeedForPermutation() { _needsPermute = false; @@ -96,19 +79,22 @@ public: } DictValue paramOrder = params.get("order"); - if(paramOrder.size() > 4) - { - CV_Error( - Error::StsBadArg, - "Too many (> 4) orders of dimensions in Permute layer"); - } - _numAxes = paramOrder.size(); for (size_t i = 0; i < _numAxes; i++) { int currentOrder = paramOrder.get(i); - checkCurrentOrder(currentOrder); + if (currentOrder < 0 || currentOrder > _numAxes) + { + CV_Error(Error::StsBadArg, + format("Orders of dimensions in Permute layer parameter" + "must be in [0...%d]", _numAxes - 1)); + } + if (std::find(_order.begin(), _order.end(), currentOrder) != _order.end()) + { + CV_Error(Error::StsBadArg, + "Permute layer parameter contains duplicated orders."); + } _order.push_back(currentOrder); } diff --git a/modules/dnn/src/layers/reorg_layer.cpp b/modules/dnn/src/layers/reorg_layer.cpp index c0defb36d2..6f0d55cd2f 100644 --- a/modules/dnn/src/layers/reorg_layer.cpp +++ b/modules/dnn/src/layers/reorg_layer.cpp @@ -85,6 +85,54 @@ public: return false; } + virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE + { + std::vector inputs, outputs; + inputs_arr.getMatVector(inputs); + outputs_arr.getMatVector(outputs); + + Mat inp = inputs[0]; + Mat out = outputs[0]; + int batchSize = inp.size[0]; + + LayerParams permParams; + if (batchSize == 1) + { + int order[] = {1, 3, 0, 2}; + permParams.set("order", DictValue::arrayInt(&order[0], 4)); + + permuteInpShape.resize(4); + permuteInpShape[0] = inp.size[1] * inp.size[2] / (reorgStride * reorgStride); // (channels*height)/(r*r) + permuteInpShape[1] = reorgStride; + permuteInpShape[2] = inp.size[3]; // width + permuteInpShape[3] = reorgStride; + + permuteOutShape.resize(4); + for (int i = 0; i < 4; ++i) + permuteOutShape[i] = permuteInpShape[order[i]]; + } + else + { + int order[] = {0, 2, 4, 1, 3}; + permParams.set("order", DictValue::arrayInt(&order[0], 5)); + + permuteInpShape.resize(5); + permuteInpShape[0] = batchSize; + permuteInpShape[1] = inp.size[1] * inp.size[2] / (reorgStride * reorgStride); // (channels*height)/(r*r) + permuteInpShape[2] = reorgStride; + permuteInpShape[3] = inp.size[3]; // width + permuteInpShape[4] = reorgStride; + + permuteOutShape.resize(5); + for (int i = 0; i < 5; ++i) + permuteOutShape[i] = permuteInpShape[order[i]]; + } + permute = PermuteLayer::create(permParams); + std::vector permuteInputs(1, inp.reshape(1, permuteInpShape)); + std::vector permuteOutputs(1, out.reshape(1, permuteOutShape)); + permute->finalize(permuteInputs, permuteOutputs); + } + virtual bool supportBackend(int backendId) CV_OVERRIDE { return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_INFERENCE_ENGINE; @@ -96,39 +144,13 @@ public: std::vector inputs; std::vector outputs; - bool use_half = (inps.depth() == CV_16S); inps.getUMatVector(inputs); outs.getUMatVector(outputs); - String buildopt= format("-DDtype=%s ", use_half ? "half" : "float"); - - for (size_t i = 0; i < inputs.size(); i++) - { - ocl::Kernel kernel("reorg", ocl::dnn::reorg_oclsrc, buildopt); - if (kernel.empty()) - return false; - - UMat& srcBlob = inputs[i]; - UMat& dstBlob = outputs[0]; - - int batch_size = srcBlob.size[0]; - int channels = srcBlob.size[1]; - int height = srcBlob.size[2]; - int width = srcBlob.size[3]; - - size_t nthreads = batch_size * channels * height * width; - - kernel.set(0, (int)nthreads); - kernel.set(1, ocl::KernelArg::PtrReadOnly(srcBlob)); - kernel.set(2, (int)channels); - kernel.set(3, (int)height); - kernel.set(4, (int)width); - kernel.set(5, (int)reorgStride); - kernel.set(6, ocl::KernelArg::PtrWriteOnly(dstBlob)); - - if (!kernel.run(1, &nthreads, NULL, false)) - return false; - } + inputs[0] = inputs[0].reshape(1, permuteInpShape.size(), &permuteInpShape[0]); + outputs[0] = outputs[0].reshape(1, permuteOutShape.size(), &permuteOutShape[0]); + permute->preferableTarget = preferableTarget; + permute->forward(inputs, outputs, internals); return true; } #endif @@ -152,34 +174,9 @@ public: inputs_arr.getMatVector(inputs); outputs_arr.getMatVector(outputs); - for (size_t i = 0; i < inputs.size(); i++) - { - Mat srcBlob = inputs[i]; - MatShape inputShape = shape(srcBlob), outShape = shape(outputs[i]); - float *dstData = outputs[0].ptr(); - const float *srcData = srcBlob.ptr(); - - int channels = inputShape[1], height = inputShape[2], width = inputShape[3]; - int sample_size = channels*height*width; - int batch_size = inputShape[0]; - - int out_c = channels / (reorgStride*reorgStride); - for (int b = 0; b < batch_size; ++b) { - for (int k = 0; k < channels; ++k) { - for (int j = 0; j < height; ++j) { - for (int i = 0; i < width; ++i) { - int out_index = i + width*(j + height*k); - int c2 = k % out_c; - int offset = k / out_c; - int w2 = i*reorgStride + offset % reorgStride; - int h2 = j*reorgStride + offset / reorgStride; - int in_index = w2 + width*reorgStride*(h2 + height*reorgStride*c2); - dstData[b*sample_size + out_index] = srcData[b*sample_size + in_index]; - } - } - } - } - } + inputs[0] = inputs[0].reshape(1, permuteInpShape); + outputs[0] = outputs[0].reshape(1, permuteOutShape); + permute->forward(inputs, outputs, internals_arr); } virtual Ptr initInfEngine(const std::vector >&) CV_OVERRIDE @@ -208,6 +205,10 @@ public: } return flops; } + +private: + Ptr permute; + std::vector permuteInpShape, permuteOutShape; }; Ptr ReorgLayer::create(const LayerParams& params) diff --git a/modules/dnn/src/layers/shuffle_channel_layer.cpp b/modules/dnn/src/layers/shuffle_channel_layer.cpp index 67fb489f84..c4c04786b1 100644 --- a/modules/dnn/src/layers/shuffle_channel_layer.cpp +++ b/modules/dnn/src/layers/shuffle_channel_layer.cpp @@ -62,11 +62,40 @@ public: } } +#ifdef HAVE_OPENCL + bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals) + { + std::vector inputs; + std::vector outputs; + + inps.getUMatVector(inputs); + outs.getUMatVector(outputs); + + if (inputs[0].u != outputs[0].u) + { + if (!permute.empty()) + { + inputs[0] = inputs[0].reshape(1, permuteInpShape.size(), &permuteInpShape[0]); + outputs[0] = outputs[0].reshape(1, permuteOutShape.size(), &permuteOutShape[0]); + permute->preferableTarget = preferableTarget; + permute->forward(inputs, outputs, internals); + } + else + inputs[0].copyTo(outputs[0]); + } + return true; + } +#endif + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && + OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + forward_ocl(inputs_arr, outputs_arr, internals_arr)) + if (inputs_arr.depth() == CV_16S) { forward_fallback(inputs_arr, outputs_arr, internals_arr); diff --git a/modules/dnn/src/opencl/reorg.cl b/modules/dnn/src/opencl/reorg.cl deleted file mode 100644 index 7802239ad7..0000000000 --- a/modules/dnn/src/opencl/reorg.cl +++ /dev/null @@ -1,70 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#if defined(cl_khr_fp16) -#pragma OPENCL EXTENSION cl_khr_fp16 : enable -#endif - -__kernel void reorg(const int count, - __global const Dtype* src, - const int channels, - const int height, - const int width, - const int reorgStride, - __global Dtype* dst) -{ - for (int index = get_global_id(0); index < count; index += get_global_size(0)) - { - int sample_size = channels*height*width; - int b = index/sample_size; - int new_index = index%sample_size; - int k = new_index / (height * width); - int j = (new_index - (k * height * width)) / width; - int i = new_index % width; - int out_c = channels / (reorgStride*reorgStride); - int c2 = k % out_c; - int offset = k / out_c; - int w2 = i*reorgStride + offset % reorgStride; - int h2 = j*reorgStride + offset / reorgStride; - int in_index = w2 + width*reorgStride*(h2 + height*reorgStride*c2); - dst[index] = src[b*sample_size + in_index]; - } -} diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 14c6f55f40..be0e37e294 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -1288,13 +1288,15 @@ TEST(Layer_Test_PoolingIndices, Accuracy) normAssert(indices, outputs[1].reshape(1, 5)); } -typedef testing::TestWithParam > Layer_Test_ShuffleChannel; +typedef testing::TestWithParam > > Layer_Test_ShuffleChannel; TEST_P(Layer_Test_ShuffleChannel, Accuracy) { Vec4i inpShapeVec = get<0>(GetParam()); int group = get<1>(GetParam()); ASSERT_EQ(inpShapeVec[1] % group, 0); const int groupSize = inpShapeVec[1] / group; + int backendId = get<0>(get<2>(GetParam())); + int targetId = get<1>(get<2>(GetParam())); Net net; LayerParams lp; @@ -1308,21 +1310,25 @@ TEST_P(Layer_Test_ShuffleChannel, Accuracy) randu(inp, 0, 255); net.setInput(inp); + net.setPreferableBackend(backendId); + net.setPreferableTarget(targetId); Mat out = net.forward(); + double l1 = (targetId == DNN_TARGET_OPENCL_FP16) ? 5e-2 : 1e-5; + double lInf = (targetId == DNN_TARGET_OPENCL_FP16) ? 7e-2 : 1e-4; for (int n = 0; n < inpShapeVec[0]; ++n) { for (int c = 0; c < inpShapeVec[1]; ++c) { Mat outChannel = getPlane(out, n, c); Mat inpChannel = getPlane(inp, n, groupSize * (c % group) + c / group); - normAssert(outChannel, inpChannel); + normAssert(outChannel, inpChannel, "", l1, lInf); } } } INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_ShuffleChannel, Combine( /*input shape*/ Values(Vec4i(1, 6, 5, 7), Vec4i(3, 12, 1, 4)), -/*group*/ Values(1, 2, 3, 6) +/*group*/ Values(1, 2, 3, 6), dnnBackendsAndTargets(/*with IE*/ false) )); // Check if relu is not fused to convolution if we requested it's output