From 910d7dab1f056ccd6531c8615e4f50e0c1c1c76d Mon Sep 17 00:00:00 2001 From: Li Peng Date: Tue, 5 Dec 2017 23:17:34 +0800 Subject: [PATCH 1/3] prior box layer ocl implementation Signed-off-by: Li Peng --- modules/dnn/src/layers/prior_box_layer.cpp | 106 +++++++++++++++ modules/dnn/src/opencl/prior_box.cl | 148 +++++++++++++++++++++ 2 files changed, 254 insertions(+) create mode 100644 modules/dnn/src/opencl/prior_box.cl diff --git a/modules/dnn/src/layers/prior_box_layer.cpp b/modules/dnn/src/layers/prior_box_layer.cpp index 5fc852a82e..575ac5eb9c 100644 --- a/modules/dnn/src/layers/prior_box_layer.cpp +++ b/modules/dnn/src/layers/prior_box_layer.cpp @@ -45,6 +45,7 @@ #include #include #include +#include "opencl_kernels_dnn.hpp" namespace cv { @@ -270,11 +271,108 @@ public: return false; } +#ifdef HAVE_OPENCL + bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals) + { + std::vector inputs; + std::vector outputs; + + inps.getUMatVector(inputs); + outs.getUMatVector(outputs); + + int _layerWidth = inputs[0].size[3]; + int _layerHeight = inputs[0].size[2]; + + int _imageWidth = inputs[1].size[3]; + int _imageHeight = inputs[1].size[2]; + + float stepX, stepY; + if (_stepX == 0 || _stepY == 0) + { + stepX = static_cast(_imageWidth) / _layerWidth; + stepY = static_cast(_imageHeight) / _layerHeight; + } else { + stepX = _stepX; + stepY = _stepY; + } + + if (umat_offsetsX.empty()) + { + Mat offsetsX(1, _offsetsX.size(), CV_32FC1, &_offsetsX[0]); + Mat offsetsY(1, _offsetsX.size(), CV_32FC1, &_offsetsY[0]); + Mat aspectRatios(1, _aspectRatios.size(), CV_32FC1, &_aspectRatios[0]); + Mat variance(1, _variance.size(), CV_32FC1, &_variance[0]); + + offsetsX.copyTo(umat_offsetsX); + offsetsY.copyTo(umat_offsetsY); + aspectRatios.copyTo(umat_aspectRatios); + variance.copyTo(umat_variance); + + int real_numPriors = _numPriors / pow(2, _offsetsX.size() - 1); + umat_scales = UMat(1, &real_numPriors, CV_32F, 1.0f); + } + + size_t nthreads = _layerHeight * _layerWidth; + + ocl::Kernel kernel("prior_box", ocl::dnn::prior_box_oclsrc); + kernel.set(0, (int)nthreads); + kernel.set(1, (float)stepX); + kernel.set(2, (float)stepY); + kernel.set(3, (float)_minSize); + kernel.set(4, (float)_maxSize); + kernel.set(5, ocl::KernelArg::PtrReadOnly(umat_offsetsX)); + kernel.set(6, ocl::KernelArg::PtrReadOnly(umat_offsetsY)); + kernel.set(7, (int)_offsetsX.size()); + kernel.set(8, ocl::KernelArg::PtrReadOnly(umat_aspectRatios)); + kernel.set(9, (int)_aspectRatios.size()); + kernel.set(10, ocl::KernelArg::PtrReadOnly(umat_scales)); + kernel.set(11, ocl::KernelArg::PtrWriteOnly(outputs[0])); + kernel.set(12, (int)_layerHeight); + kernel.set(13, (int)_layerWidth); + kernel.set(14, (int)_imageHeight); + kernel.set(15, (int)_imageWidth); + kernel.run(1, &nthreads, NULL, false); + + // clip the prior's coordidate such that it is within [0, 1] + if (_clip) + { + Mat mat = outputs[0].getMat(ACCESS_READ); + int aspect_count = (_maxSize > 0) ? 1 : 0; + int offset = nthreads * 4 * _offsetsX.size() * (1 + aspect_count + _aspectRatios.size()); + float* outputPtr = mat.ptr() + offset; + int _outChannelSize = _layerHeight * _layerWidth * _numPriors * 4; + for (size_t d = 0; d < _outChannelSize; ++d) + { + outputPtr[d] = std::min(std::max(outputPtr[d], 0.), 1.); + } + } + + // set the variance. + { + ocl::Kernel kernel("set_variance", ocl::dnn::prior_box_oclsrc); + int offset = total(shape(outputs[0]), 2); + size_t nthreads = _layerHeight * _layerWidth * _numPriors; + kernel.set(0, (int)nthreads); + kernel.set(1, (int)offset); + kernel.set(2, (int)_variance.size()); + kernel.set(3, ocl::KernelArg::PtrReadOnly(umat_variance)); + kernel.set(4, ocl::KernelArg::PtrWriteOnly(outputs[0])); + if (!kernel.run(1, &nthreads, NULL, false)) + return false; + } + return true; + } +#endif + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) && + OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + forward_ocl(inputs_arr, outputs_arr, internals_arr)) + Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr); } @@ -441,6 +539,14 @@ private: std::vector _offsetsX; std::vector _offsetsY; +#ifdef HAVE_OPENCL + UMat umat_offsetsX; + UMat umat_offsetsY; + UMat umat_aspectRatios; + UMat umat_scales; + UMat umat_variance; +#endif + bool _flip; bool _clip; bool _explicitSizes; diff --git a/modules/dnn/src/opencl/prior_box.cl b/modules/dnn/src/opencl/prior_box.cl new file mode 100644 index 0000000000..660ccb64d5 --- /dev/null +++ b/modules/dnn/src/opencl/prior_box.cl @@ -0,0 +1,148 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#define Dtype float +#define Dtype4 float4 + +__kernel void prior_box(const int nthreads, + const Dtype stepX, + const Dtype stepY, + const Dtype _minSize, + const Dtype _maxSize, + __global const Dtype* _offsetsX, + __global const Dtype* _offsetsY, + const int offsetsX_size, + __global const Dtype* _aspectRatios, + const int aspectRatios_size, + __global const Dtype* scales, + __global Dtype* dst, + const int _layerHeight, + const int _layerWidth, + const int imgHeight, + const int imgWidth) +{ + for (int index = get_global_id(0); index < nthreads; index += get_global_size(0)) + { + int w = index % _layerWidth; + int h = index / _layerWidth; + __global Dtype* outputPtr; + int aspect_count = (_maxSize > 0) ? 1 : 0; + outputPtr = dst + index * 4 * offsetsX_size * (1 + aspect_count + aspectRatios_size); + + Dtype _boxWidth, _boxHeight; + Dtype4 vec; + _boxWidth = _boxHeight = _minSize * scales[0]; + for (int i = 0; i < offsetsX_size; ++i) + { + float center_x = (w + _offsetsX[i]) * stepX; + float center_y = (h + _offsetsY[i]) * stepY; + + vec.x = (center_x - _boxWidth * 0.5f) / imgWidth; // xmin + vec.y = (center_y - _boxHeight * 0.5f) / imgHeight; // ymin + vec.z = (center_x + _boxWidth * 0.5f) / imgWidth; // xmax + vec.w = (center_y + _boxHeight * 0.5f) / imgHeight; // ymax + vstore4(vec, 0, outputPtr); + + outputPtr += 4; + } + + if (_maxSize > 0) + { + _boxWidth = _boxHeight = native_sqrt(_minSize * _maxSize) * scales[1]; + + for (int i = 0; i < offsetsX_size; ++i) + { + float center_x = (w + _offsetsX[i]) * stepX; + float center_y = (h + _offsetsY[i]) * stepY; + + vec.x = (center_x - _boxWidth * 0.5f) / imgWidth; // xmin + vec.y = (center_y - _boxHeight * 0.5f) / imgHeight; // ymin + vec.z = (center_x + _boxWidth * 0.5f) / imgWidth; // xmax + vec.w = (center_y + _boxHeight * 0.5f) / imgHeight; // ymax + vstore4(vec, 0, outputPtr); + + outputPtr += 4; + } + } + + for (int r = 0; r < aspectRatios_size; ++r) + { + float ar = native_sqrt(_aspectRatios[r]); + float scale = scales[(_maxSize > 0 ? 2 : 1) + r]; + + _boxWidth = _minSize * ar * scale; + _boxHeight = _minSize / ar * scale; + + for (int i = 0; i < offsetsX_size; ++i) + { + float center_x = (w + _offsetsX[i]) * stepX; + float center_y = (h + _offsetsY[i]) * stepY; + + vec.x = (center_x - _boxWidth * 0.5f) / imgWidth; // xmin + vec.y = (center_y - _boxHeight * 0.5f) / imgHeight; // ymin + vec.z = (center_x + _boxWidth * 0.5f) / imgWidth; // xmax + vec.w = (center_y + _boxHeight * 0.5f) / imgHeight; // ymax + vstore4(vec, 0, outputPtr); + + outputPtr += 4; + } + } + } +} + +__kernel void set_variance(const int nthreads, + const int offset, + const int variance_size, + __global const Dtype* variance, + __global Dtype* dst) +{ + for (int index = get_global_id(0); index < nthreads; index += get_global_size(0)) + { + Dtype4 var_vec; + + if (variance_size == 1) + var_vec = (Dtype4)(variance[0]); + else + var_vec = vload4(0, variance); + + vstore4(var_vec, 0, dst + offset + index * 4); + } +} From 436d7e4eaf4315139e81fda29db316c4ef81eb04 Mon Sep 17 00:00:00 2001 From: Li Peng Date: Tue, 19 Dec 2017 17:59:13 +0800 Subject: [PATCH 2/3] add depthwise convolution kernel Signed-off-by: Li Peng --- modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp | 5 + .../src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp | 100 +++++++++++++++++- modules/dnn/src/opencl/conv_layer_spatial.cl | 59 ++++++++++- 3 files changed, 160 insertions(+), 4 deletions(-) diff --git a/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp b/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp index b137896bbe..f9a74ae4e7 100644 --- a/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp +++ b/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp @@ -215,6 +215,9 @@ class OCL4DNNConvSpatial bool createGEMMLikeConvKernel(int32_t blockWidth, int32_t blockHeight, int32_t blockDepth); + bool createDWConvKernel(int32_t blockWidth, + int32_t blockHeight, + int32_t blockDepth); void CreateSubBuffer(const UMat& buffer, UMat& sub_buffer, int32_t offset, int32_t size, bool write_only); bool convolve(const UMat &bottom, UMat &top, @@ -282,6 +285,8 @@ class OCL4DNNConvSpatial int32_t M_; bool tuned_; + bool dwconv_; + std::string key_, key_sanitized_; std::string short_key_; std::string kernel_name_; diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp index 6a305558eb..ae188f763b 100644 --- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp +++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp @@ -103,6 +103,7 @@ OCL4DNNConvSpatial::OCL4DNNConvSpatial(OCL4DNNConvConfig config) top_dim_ = num_output_ * output_w_ * output_h_; cache_path_ = utils::getConfigurationParameterString("OPENCV_OCL4DNN_CONFIG_PATH", ""); + dwconv_ = (num_output_ == channels_ && channels_ == group_); use_cache_path_ = false; if (!cache_path_.empty()) @@ -203,7 +204,8 @@ void OCL4DNNConvSpatial::collectCommonInformation() typedef enum { KERNEL_TYPE_INTEL_IDLF = 2, KERNEL_TYPE_BASIC = 4, - KERNEL_TYPE_GEMM_LIKE = 5 + KERNEL_TYPE_GEMM_LIKE = 5, + KERNEL_TYPE_DWCONV = 6 } ocl4dnnConvSpatialKernelType_t; template @@ -313,6 +315,7 @@ void OCL4DNNConvSpatial::setupKernelDetails(int32_t kernelType, if (clOptionSupport("-cl-no-subgroup-ifp")) options_ << " -cl-no-subgroup-ifp "; + addDef("KERNEL_GEMM_LIKE"); addDef("INPUT_DEPTH", channels_); addDef("WIDTH1", M_); addDef("OUT_PADDING_LEFT", 0); @@ -329,6 +332,28 @@ void OCL4DNNConvSpatial::setupKernelDetails(int32_t kernelType, setFusionDefine(fused_activ_, fused_eltwise_); src_ = ocl::dnn::conv_layer_spatial_oclsrc; } + else if (kernelType == KERNEL_TYPE_DWCONV) + { + kernelUKey = generateSpecificKey(KERNEL_TYPE_DWCONV, blockM, blockK, blockN); + kernel_name_ = "DWCONV_"; + kernel_name_ += kernelUKey.c_str(); + + options_ << " -cl-fast-relaxed-math "; + if (clOptionSupport("-cl-no-subgroup-ifp")) + options_ << " -cl-no-subgroup-ifp "; + + addDef("KERNEL_DWCONV"); + addDef("KERNEL_SIZE", kernel_w_ * kernel_h_); + addDef("KERNEL_W", kernel_w_); + addDef("KERNEL_H", kernel_h_); + addDef("APPLY_BIAS", bias_term_); + addDef("OUTPUT_Z", num_output_ * num_); + addDef("CHANNELS", num_output_); + setFusionDefine(fused_activ_, fused_eltwise_); + + options_ << " -D DWCONV=" << kernel_name_; + src_ = cv::ocl::dnn::conv_layer_spatial_oclsrc; + } } template @@ -906,6 +931,33 @@ bool OCL4DNNConvSpatial::convolve(const UMat &bottom, UMat &top, return false; } } + } else if (config->kernelType == KERNEL_TYPE_DWCONV) { + ocl::Kernel kernel(config->kernelName.c_str(), program); + if (kernel.empty()) + return false; + + cl_uint argIdx = 0; + setFusionArg(fused_activ_, fused_eltwise_, kernel, argIdx); + kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bottom)); + kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(weight)); + if (bias_term_) + kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bias)); + kernel.set(argIdx++, ocl::KernelArg::PtrWriteOnly(top)); + kernel.set(argIdx++, (uint16_t)width_); + kernel.set(argIdx++, (uint16_t)height_); + kernel.set(argIdx++, (uint16_t)output_w_); + kernel.set(argIdx++, (uint16_t)output_h_); + + size_t global_size[3]; + global_size[0] = output_w_; + global_size[1] = output_h_; + global_size[2] = num_output_ * num_; + + if (!kernel.run(3, global_size, NULL, false)) + { + std::cout << "DWCONV kernel run failed." << std::endl; + return false; + } } else { for (int32_t n = 0; n < numImages; ++n) { for (int32_t g = 0; g < group_; ++g) { @@ -1222,6 +1274,39 @@ bool OCL4DNNConvSpatial::createIDLFKernel(int32_t blockWidth, return false; } +template<> +bool OCL4DNNConvSpatial::createDWConvKernel(int32_t blockWidth, + int32_t blockHeight, + int32_t blockDepth) +{ + if (!dwconv_) + return false; + + int workItemOutput[3] = { 1, 1, 1 }; + size_t local_size[3] = { 1, 1, 1 }; + size_t global_size[3]; + global_size[0] = divUp(output_w_, workItemOutput[0]); + global_size[1] = divUp(output_h_, workItemOutput[1]); + global_size[2] = divUp(M_ * num_, workItemOutput[2]); + + kernelType_ = KERNEL_TYPE_DWCONV; + blockM_ = blockWidth; + blockK_ = blockHeight; + blockN_ = blockDepth; + + setupKernel(); + + ocl::Program program = compileKernel(); + if (program.ptr()) + { + kernelQueue.push_back(makePtr(kernel_name_, &global_size[0], &local_size[0], + &workItemOutput[0], false, KERNEL_TYPE_DWCONV)); + return true; + } + else + return false; +} + template<> bool OCL4DNNConvSpatial::createConvolutionKernel(int32_t kernelType, int32_t blockWidth, @@ -1238,6 +1323,8 @@ bool OCL4DNNConvSpatial::createConvolutionKernel(int32_t kernelType, return createBasicKernel(blockWidth, blockHeight, blockDepth); else if (kernelType == KERNEL_TYPE_GEMM_LIKE) return createGEMMLikeConvKernel(blockWidth, blockHeight, blockDepth); + else if (kernelType == KERNEL_TYPE_DWCONV) + return createDWConvKernel(blockWidth, blockHeight, blockDepth); else CV_Assert(0 && "Internal error"); return false; @@ -1246,7 +1333,16 @@ bool OCL4DNNConvSpatial::createConvolutionKernel(int32_t kernelType, template<> void OCL4DNNConvSpatial::generateTunerItems(std::vector< cv::Ptr > &tunerItems) { - if (ocl::Device::getDefault().intelSubgroupsSupport()) { + if (ocl::Device::getDefault().intelSubgroupsSupport()) + { + //depth_wise kernels + if (dwconv_) + { + tunerItems.push_back(makePtr(KERNEL_TYPE_DWCONV, 1, 1, 1)); + if (group_ > 8) + return; + } + /* IDLF kernels are using Intel specific extension which make them intel only. */ // Generates static key_ diff --git a/modules/dnn/src/opencl/conv_layer_spatial.cl b/modules/dnn/src/opencl/conv_layer_spatial.cl index 91066bdbfd..2457cf7677 100644 --- a/modules/dnn/src/opencl/conv_layer_spatial.cl +++ b/modules/dnn/src/opencl/conv_layer_spatial.cl @@ -383,7 +383,7 @@ convolve_simd( } } -#else // KERNEL_GEMM_LIKE +#elif defined KERNEL_GEMM_LIKE #if APPLY_BIAS // Dtype bias[4]; @@ -1501,4 +1501,59 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS) INTERLEAVED_SIMD16_OUTPUT(dst, out_offset, 0); } #endif -#endif // KERNEL_BASIC/IDLF/GEMM_LIKE + +#elif defined KERNEL_DWCONV + +__kernel void DWCONV( + ELTWISE_DATA_ARG + NEGATIVE_SLOPE_ARG + __global Dtype* image_data, + __global Dtype* kernel_data, + BIAS_KERNEL_ARG + __global Dtype* convolved_image, + const ushort input_width, + const ushort input_height, + const ushort output_width, + const ushort output_height) { + + const int outputX = get_global_id(0); + const int outputY = get_global_id(1); + const int outputZ = get_global_id(2); + if(outputX < output_width && outputY < output_height) + { + Dtype sum = 0.; + + const int org_y = outputY * STRIDE_Y - INPUT_PAD_H; + const int org_x = outputX * STRIDE_X - INPUT_PAD_W; + const int currentKernelOffset = KERNEL_SIZE*(outputZ%CHANNELS); + const int biasIndex=outputZ%CHANNELS; + const int local_image_offset = org_y*input_width + org_x; + const int imageSize = input_width*input_height; + + __global Dtype* image_dataPtrFloat = (image_data + (imageSize*outputZ + local_image_offset)); + __global Dtype* kernel_dataPtrFloat = (kernel_data + (currentKernelOffset)); + + for(int y = 0; y < KERNEL_H; y++) + { + for(int x = 0; x < KERNEL_W; x++) + { + if(!(org_y + y * DILATION_Y >= 0 && org_y + y * DILATION_Y < input_height && org_x + x * DILATION_X >= 0 && org_x + x * DILATION_X < input_width)) + { + continue; + } + sum += image_dataPtrFloat[x * DILATION_X] * kernel_dataPtrFloat[x]; + } + image_dataPtrFloat += input_width * DILATION_Y; + kernel_dataPtrFloat += KERNEL_W; + } + + #if APPLY_BIAS + int offset = outputZ*output_height*output_width + outputY*output_width + outputX; + ACTIVATION_FUNCTION(convolved_image, offset, sum + biases_base[biasIndex], biasIndex); + #else + int offset = outputZ*output_height*output_width + outputY*output_width + outputX; + ACTIVATION_FUNCTION(convolved_image, offset, sum, biasIndex); + #endif + } +} +#endif // KERNEL_BASIC/IDLF/GEMM_LIKE/DWCONV From 3b84acfc48426dc5f6177cf9638edba2ef9872d9 Mon Sep 17 00:00:00 2001 From: Li Peng Date: Tue, 19 Dec 2017 16:59:46 +0800 Subject: [PATCH 3/3] add ocl accuracy test for tf mobilenet ssd Signed-off-by: Li Peng --- modules/dnn/test/test_tf_importer.cpp | 39 +++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index fbb90a7241..1badf74ab7 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -11,6 +11,8 @@ Test for Tensorflow models loading #include "test_precomp.hpp" #include "npy_blob.hpp" +#include +#include namespace cvtest { @@ -219,6 +221,43 @@ TEST(Test_TensorFlow, MobileNet_SSD) normAssert(target[2].reshape(1, 1), output[2].reshape(1, 1), "", 4e-5, 1e-2); } +OCL_TEST(Test_TensorFlow, MobileNet_SSD) +{ + std::string netPath = findDataFile("dnn/ssd_mobilenet_v1_coco.pb", false); + std::string netConfig = findDataFile("dnn/ssd_mobilenet_v1_coco.pbtxt", false); + std::string imgPath = findDataFile("dnn/street.png", false); + + Mat inp; + resize(imread(imgPath), inp, Size(300, 300)); + inp = blobFromImage(inp, 1.0f / 127.5, Size(), Scalar(127.5, 127.5, 127.5), true); + + std::vector outNames(3); + outNames[0] = "concat"; + outNames[1] = "concat_1"; + outNames[2] = "detection_out"; + + std::vector target(outNames.size()); + for (int i = 0; i < outNames.size(); ++i) + { + std::string path = findDataFile("dnn/tensorflow/ssd_mobilenet_v1_coco." + outNames[i] + ".npy", false); + target[i] = blobFromNPY(path); + } + + Net net = readNetFromTensorflow(netPath, netConfig); + + net.setPreferableBackend(DNN_BACKEND_DEFAULT); + net.setPreferableTarget(DNN_TARGET_OPENCL); + + net.setInput(inp); + + std::vector output; + net.forward(output, outNames); + + normAssert(target[0].reshape(1, 1), output[0].reshape(1, 1)); + normAssert(target[1].reshape(1, 1), output[1].reshape(1, 1), "", 1e-5, 2e-4); + normAssert(target[2].reshape(1, 1), output[2].reshape(1, 1), "", 4e-5, 1e-2); +} + TEST(Test_TensorFlow, lstm) { runTensorFlowNet("lstm", true);