From 2124361ff7c61f8e8693d7ac249552aacb0ae16f Mon Sep 17 00:00:00 2001 From: Li Peng Date: Tue, 16 Jan 2018 21:54:32 +0800 Subject: [PATCH] ocl support for Deconvolution layer Signed-off-by: Li Peng --- modules/dnn/src/layers/convolution_layer.cpp | 99 ++++++++++++++ modules/dnn/src/opencl/col2im.cl | 129 +++++++++++-------- modules/dnn/test/test_layers.cpp | 5 + modules/dnn/test/test_tf_importer.cpp | 5 + modules/dnn/test/test_torch_importer.cpp | 5 + 5 files changed, 187 insertions(+), 56 deletions(-) diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 7abde1397a..e2ae78cf83 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -46,6 +46,7 @@ #include "opencv2/core/hal/hal.hpp" #include "opencv2/core/hal/intrin.hpp" #include +#include "opencl_kernels_dnn.hpp" #ifdef HAVE_OPENCL using namespace cv::dnn::ocl4dnn; @@ -1051,6 +1052,8 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl { public: Mat weightsMat, biasesMat; + UMat umat_weights; + UMat umat_biases; MatShape computeColRowShape(const MatShape &inpShape, const MatShape &outShape) const { @@ -1341,11 +1344,107 @@ public: } }; +#ifdef HAVE_OPENCL + bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_) + { + std::vector inputs; + std::vector outputs; + std::vector internals; + + inputs_.getUMatVector(inputs); + outputs_.getUMatVector(outputs); + internals_.getUMatVector(internals); + + int outCn = numOutput; + int inpCn = inputs[0].size[1]; + + if (is1x1()) + return false; + + if (umat_weights.empty()) + { + transpose(blobs[0].reshape(1, inpCn), umat_weights); + umat_biases = hasBias() ? blobs[1].reshape(1, outCn).getUMat(ACCESS_READ) : + UMat::zeros(outCn, 1, CV_32F); + } + + String buildopt = format("-DT=%s ", ocl::typeToStr(inputs[0].type())); + buildopt += format("-DPAD_H=%d -DPAD_W=%d -DKERNEL_H=%d -DKERNEL_W=%d -DSTRIDE_H=%d -DSTRIDE_W=%d ", + pad.height, pad.width, kernel.height, kernel.width, stride.height, stride.width); + + for (size_t ii = 0; ii < outputs.size(); ii++) + { + int ngroups = outCn / blobs[0].size[1]; + int inpGroupCn = inpCn / ngroups; + int outGroupCn = blobs[0].size[1]; + const UMat& inp = inputs[ii]; + UMat& out = outputs[ii]; + int numImg = inp.size[0]; + int inpH = inp.size[2], inpW = inp.size[3]; + int outH = out.size[2], outW = out.size[3]; + + MatShape inpshape = shape(numImg*inpCn, inpH*inpW); + MatShape outshape = shape(numImg*outCn, outH*outW); + UMat convBlob = inputs[ii].reshape(1, inpshape.size(), &inpshape[0]); + UMat decnBlob = out.reshape(1, outshape.size(), &outshape[0]); + int rows = internals[0].rows / ngroups; + + for (int n = 0; n < numImg; n++) + { + for (int g = 0; g < ngroups; g++) + { + UMat colMat = internals[0].rowRange(_Range(g * rows, rows)); + UMat convMat = convBlob.rowRange(_Range((g + n * ngroups) * inpGroupCn, inpGroupCn)); + UMat wghtMat = umat_weights.colRange(_Range(g * inpGroupCn, inpGroupCn)); + gemm(wghtMat, convMat, 1, noArray(), 0, colMat, 0); + } + + for (int g = 0; g < ngroups; g++) + { + int total = outGroupCn * decnBlob.cols; + int index = 0; + int height_col = (outH + 2 * pad.height - kernel.height) / stride.height + 1; + int width_col = (outW + 2 * pad.width - kernel.width) / stride.width + 1; + int coeff_h = (1 - stride.height * kernel.width * height_col) * width_col; + int coeff_w = (1 - stride.width * height_col * width_col); + + ocl::Kernel k("col2im", ocl::dnn::col2im_oclsrc, buildopt); + k.set(index++, total); + k.set(index++, ocl::KernelArg::PtrReadOnly(internals[0])); + k.set(index++, (int)(g * rows * internals[0].cols)); + k.set(index++, outGroupCn); + k.set(index++, outH); + k.set(index++, outW); + k.set(index++, height_col); + k.set(index++, width_col); + k.set(index++, coeff_h); + k.set(index++, coeff_w); + k.set(index++, ocl::KernelArg::PtrReadOnly(umat_biases)); + k.set(index++, (int)(g * outGroupCn * umat_biases.cols)); + k.set(index++, ocl::KernelArg::PtrWriteOnly(decnBlob)); + k.set(index++, (int)((g + n * ngroups) * outGroupCn * decnBlob.cols)); + + size_t global[] = { (size_t)total }; + bool ret = k.run(1, global, NULL, false); + if (!ret) + return false; + } + } + } + + return true; + } +#endif + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) && + OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + forward_ocl(inputs_arr, outputs_arr, internals_arr)) + Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr); } diff --git a/modules/dnn/src/opencl/col2im.cl b/modules/dnn/src/opencl/col2im.cl index 30d4664df8..f2ca95154c 100644 --- a/modules/dnn/src/opencl/col2im.cl +++ b/modules/dnn/src/opencl/col2im.cl @@ -1,62 +1,79 @@ -/************************************************************************************* - * Copyright (c) 2015, Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, - * are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation and/or - * other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, - * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - **************************************************************************************/ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2017, Intel Corporation, all rights reserved. +// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ -__kernel void col2im(const int n, __global const T* data_col, const int col_offset, - const int height, const int width, const int channels, - const int patch_h, const int patch_w, - const int pad_h, const int pad_w, - const int stride_h, const int stride_w, - const int height_col, const int width_col, - __global T* data_im, const int img_offset) +__kernel void col2im(const int n, __global const T* data_col, + const int data_col_offset, + const int channels, + const int height, const int width, + const int height_col, const int width_col, + const int coeff_h, const int coeff_w, + __global const T* biasvec, + const int bias_offset, + __global T* data_im, + const int data_im_offset) { - data_col = data_col + col_offset; - data_im = data_im + img_offset; - int index = get_global_id(0); - if(index < n) { - T val = 0; - int w = index % width + pad_w; - int h = (index / width) % height + pad_h; - int c = index / (width * height); + data_col = data_col + data_col_offset; + biasvec = biasvec + bias_offset; + data_im = data_im + data_im_offset; + int index = get_global_id(0); - // compute the start and end of the output - int w_col_start = (w < patch_w) ? 0 : (w - patch_w) / stride_w + 1; - int w_col_end = min(w / stride_w + 1, width_col); - int h_col_start = (h < patch_h) ? 0 : (h - patch_h) / stride_h + 1; - int h_col_end = min(h / stride_h + 1, height_col); + if(index < n) + { + T val = 0.f; + int w = index % width + PAD_W; + int h = (index / width) % height + PAD_H; + int c = index / (width * height); + int h_col_start = (h < KERNEL_H) ? 0 : (h - KERNEL_H) / STRIDE_H + 1; + int h_col_end = min(h / STRIDE_H + 1, height_col); + int plane_size_col = height_col * width_col; + int offset = (c * KERNEL_H * KERNEL_W + h * KERNEL_W + w) * plane_size_col; - // equivalent implementation - int offset = - (c * patch_h * patch_w + h * patch_w + w) * height_col * width_col; - int coeff_h_col = (1 - stride_h * patch_w * height_col) * width_col; - int coeff_w_col = (1 - stride_w * height_col * width_col); - for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { - for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { - val += data_col[offset + h_col * coeff_h_col + w_col * coeff_w_col]; - } + int w_col_start = (w < KERNEL_W) ? 0 : (w - KERNEL_W) / STRIDE_W + 1; + int w_col_end = min(w / STRIDE_W + 1, width_col); + + for (int h_col = h_col_start; h_col < h_col_end; ++h_col) + for (int w_col = w_col_start; w_col < w_col_end; ++w_col) + val += data_col[offset + h_col * coeff_h + w_col * coeff_w]; + + data_im[index] = val + biasvec[c]; } - data_im[index] = val; - } } diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index d88f01d380..0f90b42bfd 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -167,6 +167,11 @@ TEST(Layer_Test_DeConvolution, Accuracy) testLayerUsingCaffeModels("layer_deconvolution", DNN_TARGET_CPU, true, false); } +OCL_TEST(Layer_Test_DeConvolution, Accuracy) +{ + testLayerUsingCaffeModels("layer_deconvolution", DNN_TARGET_OPENCL, true, false); +} + TEST(Layer_Test_InnerProduct, Accuracy) { testLayerUsingCaffeModels("layer_inner_product", DNN_TARGET_CPU, true); diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index bde5760bfc..8cf471df53 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -171,6 +171,11 @@ TEST(Test_TensorFlow, deconvolution) runTensorFlowNet("deconvolution"); } +OCL_TEST(Test_TensorFlow, deconvolution) +{ + runTensorFlowNet("deconvolution", DNN_TARGET_OPENCL); +} + TEST(Test_TensorFlow, matmul) { runTensorFlowNet("matmul"); diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp index f7471dd144..60bc3fedbe 100644 --- a/modules/dnn/test/test_torch_importer.cpp +++ b/modules/dnn/test/test_torch_importer.cpp @@ -165,6 +165,11 @@ TEST(Torch_Importer, run_deconv) runTorchNet("net_deconv"); } +OCL_TEST(Torch_Importer, run_deconv) +{ + runTorchNet("net_deconv", DNN_TARGET_OPENCL); +} + TEST(Torch_Importer, run_batch_norm) { runTorchNet("net_batch_norm", DNN_TARGET_CPU, "", false, true);