mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00
Merge pull request #10341 from pengli:dnn
This commit is contained in:
commit
dcdd6af5a8
@ -45,6 +45,7 @@
|
||||
#include <float.h>
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include "opencl_kernels_dnn.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
@ -270,11 +271,108 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
inps.getUMatVector(inputs);
|
||||
outs.getUMatVector(outputs);
|
||||
|
||||
int _layerWidth = inputs[0].size[3];
|
||||
int _layerHeight = inputs[0].size[2];
|
||||
|
||||
int _imageWidth = inputs[1].size[3];
|
||||
int _imageHeight = inputs[1].size[2];
|
||||
|
||||
float stepX, stepY;
|
||||
if (_stepX == 0 || _stepY == 0)
|
||||
{
|
||||
stepX = static_cast<float>(_imageWidth) / _layerWidth;
|
||||
stepY = static_cast<float>(_imageHeight) / _layerHeight;
|
||||
} else {
|
||||
stepX = _stepX;
|
||||
stepY = _stepY;
|
||||
}
|
||||
|
||||
if (umat_offsetsX.empty())
|
||||
{
|
||||
Mat offsetsX(1, _offsetsX.size(), CV_32FC1, &_offsetsX[0]);
|
||||
Mat offsetsY(1, _offsetsX.size(), CV_32FC1, &_offsetsY[0]);
|
||||
Mat aspectRatios(1, _aspectRatios.size(), CV_32FC1, &_aspectRatios[0]);
|
||||
Mat variance(1, _variance.size(), CV_32FC1, &_variance[0]);
|
||||
|
||||
offsetsX.copyTo(umat_offsetsX);
|
||||
offsetsY.copyTo(umat_offsetsY);
|
||||
aspectRatios.copyTo(umat_aspectRatios);
|
||||
variance.copyTo(umat_variance);
|
||||
|
||||
int real_numPriors = _numPriors / pow(2, _offsetsX.size() - 1);
|
||||
umat_scales = UMat(1, &real_numPriors, CV_32F, 1.0f);
|
||||
}
|
||||
|
||||
size_t nthreads = _layerHeight * _layerWidth;
|
||||
|
||||
ocl::Kernel kernel("prior_box", ocl::dnn::prior_box_oclsrc);
|
||||
kernel.set(0, (int)nthreads);
|
||||
kernel.set(1, (float)stepX);
|
||||
kernel.set(2, (float)stepY);
|
||||
kernel.set(3, (float)_minSize);
|
||||
kernel.set(4, (float)_maxSize);
|
||||
kernel.set(5, ocl::KernelArg::PtrReadOnly(umat_offsetsX));
|
||||
kernel.set(6, ocl::KernelArg::PtrReadOnly(umat_offsetsY));
|
||||
kernel.set(7, (int)_offsetsX.size());
|
||||
kernel.set(8, ocl::KernelArg::PtrReadOnly(umat_aspectRatios));
|
||||
kernel.set(9, (int)_aspectRatios.size());
|
||||
kernel.set(10, ocl::KernelArg::PtrReadOnly(umat_scales));
|
||||
kernel.set(11, ocl::KernelArg::PtrWriteOnly(outputs[0]));
|
||||
kernel.set(12, (int)_layerHeight);
|
||||
kernel.set(13, (int)_layerWidth);
|
||||
kernel.set(14, (int)_imageHeight);
|
||||
kernel.set(15, (int)_imageWidth);
|
||||
kernel.run(1, &nthreads, NULL, false);
|
||||
|
||||
// clip the prior's coordidate such that it is within [0, 1]
|
||||
if (_clip)
|
||||
{
|
||||
Mat mat = outputs[0].getMat(ACCESS_READ);
|
||||
int aspect_count = (_maxSize > 0) ? 1 : 0;
|
||||
int offset = nthreads * 4 * _offsetsX.size() * (1 + aspect_count + _aspectRatios.size());
|
||||
float* outputPtr = mat.ptr<float>() + offset;
|
||||
int _outChannelSize = _layerHeight * _layerWidth * _numPriors * 4;
|
||||
for (size_t d = 0; d < _outChannelSize; ++d)
|
||||
{
|
||||
outputPtr[d] = std::min<float>(std::max<float>(outputPtr[d], 0.), 1.);
|
||||
}
|
||||
}
|
||||
|
||||
// set the variance.
|
||||
{
|
||||
ocl::Kernel kernel("set_variance", ocl::dnn::prior_box_oclsrc);
|
||||
int offset = total(shape(outputs[0]), 2);
|
||||
size_t nthreads = _layerHeight * _layerWidth * _numPriors;
|
||||
kernel.set(0, (int)nthreads);
|
||||
kernel.set(1, (int)offset);
|
||||
kernel.set(2, (int)_variance.size());
|
||||
kernel.set(3, ocl::KernelArg::PtrReadOnly(umat_variance));
|
||||
kernel.set(4, ocl::KernelArg::PtrWriteOnly(outputs[0]));
|
||||
if (!kernel.run(1, &nthreads, NULL, false))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
}
|
||||
|
||||
@ -441,6 +539,14 @@ private:
|
||||
std::vector<float> _offsetsX;
|
||||
std::vector<float> _offsetsY;
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
UMat umat_offsetsX;
|
||||
UMat umat_offsetsY;
|
||||
UMat umat_aspectRatios;
|
||||
UMat umat_scales;
|
||||
UMat umat_variance;
|
||||
#endif
|
||||
|
||||
bool _flip;
|
||||
bool _clip;
|
||||
bool _explicitSizes;
|
||||
|
@ -215,6 +215,9 @@ class OCL4DNNConvSpatial
|
||||
bool createGEMMLikeConvKernel(int32_t blockWidth,
|
||||
int32_t blockHeight,
|
||||
int32_t blockDepth);
|
||||
bool createDWConvKernel(int32_t blockWidth,
|
||||
int32_t blockHeight,
|
||||
int32_t blockDepth);
|
||||
void CreateSubBuffer(const UMat& buffer, UMat& sub_buffer,
|
||||
int32_t offset, int32_t size, bool write_only);
|
||||
bool convolve(const UMat &bottom, UMat &top,
|
||||
@ -282,6 +285,8 @@ class OCL4DNNConvSpatial
|
||||
int32_t M_;
|
||||
|
||||
bool tuned_;
|
||||
bool dwconv_;
|
||||
|
||||
std::string key_, key_sanitized_;
|
||||
std::string short_key_;
|
||||
std::string kernel_name_;
|
||||
|
@ -103,6 +103,7 @@ OCL4DNNConvSpatial<Dtype>::OCL4DNNConvSpatial(OCL4DNNConvConfig config)
|
||||
top_dim_ = num_output_ * output_w_ * output_h_;
|
||||
|
||||
cache_path_ = utils::getConfigurationParameterString("OPENCV_OCL4DNN_CONFIG_PATH", "");
|
||||
dwconv_ = (num_output_ == channels_ && channels_ == group_);
|
||||
|
||||
use_cache_path_ = false;
|
||||
if (!cache_path_.empty())
|
||||
@ -203,7 +204,8 @@ void OCL4DNNConvSpatial<Dtype>::collectCommonInformation()
|
||||
typedef enum {
|
||||
KERNEL_TYPE_INTEL_IDLF = 2,
|
||||
KERNEL_TYPE_BASIC = 4,
|
||||
KERNEL_TYPE_GEMM_LIKE = 5
|
||||
KERNEL_TYPE_GEMM_LIKE = 5,
|
||||
KERNEL_TYPE_DWCONV = 6
|
||||
} ocl4dnnConvSpatialKernelType_t;
|
||||
|
||||
template<typename Dtype>
|
||||
@ -313,6 +315,7 @@ void OCL4DNNConvSpatial<Dtype>::setupKernelDetails(int32_t kernelType,
|
||||
if (clOptionSupport("-cl-no-subgroup-ifp"))
|
||||
options_ << " -cl-no-subgroup-ifp ";
|
||||
|
||||
addDef("KERNEL_GEMM_LIKE");
|
||||
addDef("INPUT_DEPTH", channels_);
|
||||
addDef("WIDTH1", M_);
|
||||
addDef("OUT_PADDING_LEFT", 0);
|
||||
@ -329,6 +332,28 @@ void OCL4DNNConvSpatial<Dtype>::setupKernelDetails(int32_t kernelType,
|
||||
setFusionDefine(fused_activ_, fused_eltwise_);
|
||||
src_ = ocl::dnn::conv_layer_spatial_oclsrc;
|
||||
}
|
||||
else if (kernelType == KERNEL_TYPE_DWCONV)
|
||||
{
|
||||
kernelUKey = generateSpecificKey(KERNEL_TYPE_DWCONV, blockM, blockK, blockN);
|
||||
kernel_name_ = "DWCONV_";
|
||||
kernel_name_ += kernelUKey.c_str();
|
||||
|
||||
options_ << " -cl-fast-relaxed-math ";
|
||||
if (clOptionSupport("-cl-no-subgroup-ifp"))
|
||||
options_ << " -cl-no-subgroup-ifp ";
|
||||
|
||||
addDef("KERNEL_DWCONV");
|
||||
addDef("KERNEL_SIZE", kernel_w_ * kernel_h_);
|
||||
addDef("KERNEL_W", kernel_w_);
|
||||
addDef("KERNEL_H", kernel_h_);
|
||||
addDef("APPLY_BIAS", bias_term_);
|
||||
addDef("OUTPUT_Z", num_output_ * num_);
|
||||
addDef("CHANNELS", num_output_);
|
||||
setFusionDefine(fused_activ_, fused_eltwise_);
|
||||
|
||||
options_ << " -D DWCONV=" << kernel_name_;
|
||||
src_ = cv::ocl::dnn::conv_layer_spatial_oclsrc;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Dtype>
|
||||
@ -906,6 +931,33 @@ bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top,
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else if (config->kernelType == KERNEL_TYPE_DWCONV) {
|
||||
ocl::Kernel kernel(config->kernelName.c_str(), program);
|
||||
if (kernel.empty())
|
||||
return false;
|
||||
|
||||
cl_uint argIdx = 0;
|
||||
setFusionArg(fused_activ_, fused_eltwise_, kernel, argIdx);
|
||||
kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bottom));
|
||||
kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(weight));
|
||||
if (bias_term_)
|
||||
kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bias));
|
||||
kernel.set(argIdx++, ocl::KernelArg::PtrWriteOnly(top));
|
||||
kernel.set(argIdx++, (uint16_t)width_);
|
||||
kernel.set(argIdx++, (uint16_t)height_);
|
||||
kernel.set(argIdx++, (uint16_t)output_w_);
|
||||
kernel.set(argIdx++, (uint16_t)output_h_);
|
||||
|
||||
size_t global_size[3];
|
||||
global_size[0] = output_w_;
|
||||
global_size[1] = output_h_;
|
||||
global_size[2] = num_output_ * num_;
|
||||
|
||||
if (!kernel.run(3, global_size, NULL, false))
|
||||
{
|
||||
std::cout << "DWCONV kernel run failed." << std::endl;
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
for (int32_t n = 0; n < numImages; ++n) {
|
||||
for (int32_t g = 0; g < group_; ++g) {
|
||||
@ -1222,6 +1274,39 @@ bool OCL4DNNConvSpatial<float>::createIDLFKernel(int32_t blockWidth,
|
||||
return false;
|
||||
}
|
||||
|
||||
template<>
|
||||
bool OCL4DNNConvSpatial<float>::createDWConvKernel(int32_t blockWidth,
|
||||
int32_t blockHeight,
|
||||
int32_t blockDepth)
|
||||
{
|
||||
if (!dwconv_)
|
||||
return false;
|
||||
|
||||
int workItemOutput[3] = { 1, 1, 1 };
|
||||
size_t local_size[3] = { 1, 1, 1 };
|
||||
size_t global_size[3];
|
||||
global_size[0] = divUp(output_w_, workItemOutput[0]);
|
||||
global_size[1] = divUp(output_h_, workItemOutput[1]);
|
||||
global_size[2] = divUp(M_ * num_, workItemOutput[2]);
|
||||
|
||||
kernelType_ = KERNEL_TYPE_DWCONV;
|
||||
blockM_ = blockWidth;
|
||||
blockK_ = blockHeight;
|
||||
blockN_ = blockDepth;
|
||||
|
||||
setupKernel();
|
||||
|
||||
ocl::Program program = compileKernel();
|
||||
if (program.ptr())
|
||||
{
|
||||
kernelQueue.push_back(makePtr<kernelConfig>(kernel_name_, &global_size[0], &local_size[0],
|
||||
&workItemOutput[0], false, KERNEL_TYPE_DWCONV));
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
template<>
|
||||
bool OCL4DNNConvSpatial<float>::createConvolutionKernel(int32_t kernelType,
|
||||
int32_t blockWidth,
|
||||
@ -1238,6 +1323,8 @@ bool OCL4DNNConvSpatial<float>::createConvolutionKernel(int32_t kernelType,
|
||||
return createBasicKernel(blockWidth, blockHeight, blockDepth);
|
||||
else if (kernelType == KERNEL_TYPE_GEMM_LIKE)
|
||||
return createGEMMLikeConvKernel(blockWidth, blockHeight, blockDepth);
|
||||
else if (kernelType == KERNEL_TYPE_DWCONV)
|
||||
return createDWConvKernel(blockWidth, blockHeight, blockDepth);
|
||||
else
|
||||
CV_Assert(0 && "Internal error");
|
||||
return false;
|
||||
@ -1246,7 +1333,16 @@ bool OCL4DNNConvSpatial<float>::createConvolutionKernel(int32_t kernelType,
|
||||
template<>
|
||||
void OCL4DNNConvSpatial<float>::generateTunerItems(std::vector< cv::Ptr<tunerParam> > &tunerItems)
|
||||
{
|
||||
if (ocl::Device::getDefault().intelSubgroupsSupport()) {
|
||||
if (ocl::Device::getDefault().intelSubgroupsSupport())
|
||||
{
|
||||
//depth_wise kernels
|
||||
if (dwconv_)
|
||||
{
|
||||
tunerItems.push_back(makePtr<tunerParam>(KERNEL_TYPE_DWCONV, 1, 1, 1));
|
||||
if (group_ > 8)
|
||||
return;
|
||||
}
|
||||
|
||||
/* IDLF kernels are using Intel specific extension which make
|
||||
them intel only. */
|
||||
// Generates static key_
|
||||
|
@ -383,7 +383,7 @@ convolve_simd(
|
||||
}
|
||||
}
|
||||
|
||||
#else // KERNEL_GEMM_LIKE
|
||||
#elif defined KERNEL_GEMM_LIKE
|
||||
|
||||
#if APPLY_BIAS
|
||||
// Dtype bias[4];
|
||||
@ -1501,4 +1501,59 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
INTERLEAVED_SIMD16_OUTPUT(dst, out_offset, 0);
|
||||
}
|
||||
#endif
|
||||
#endif // KERNEL_BASIC/IDLF/GEMM_LIKE
|
||||
|
||||
#elif defined KERNEL_DWCONV
|
||||
|
||||
__kernel void DWCONV(
|
||||
ELTWISE_DATA_ARG
|
||||
NEGATIVE_SLOPE_ARG
|
||||
__global Dtype* image_data,
|
||||
__global Dtype* kernel_data,
|
||||
BIAS_KERNEL_ARG
|
||||
__global Dtype* convolved_image,
|
||||
const ushort input_width,
|
||||
const ushort input_height,
|
||||
const ushort output_width,
|
||||
const ushort output_height) {
|
||||
|
||||
const int outputX = get_global_id(0);
|
||||
const int outputY = get_global_id(1);
|
||||
const int outputZ = get_global_id(2);
|
||||
if(outputX < output_width && outputY < output_height)
|
||||
{
|
||||
Dtype sum = 0.;
|
||||
|
||||
const int org_y = outputY * STRIDE_Y - INPUT_PAD_H;
|
||||
const int org_x = outputX * STRIDE_X - INPUT_PAD_W;
|
||||
const int currentKernelOffset = KERNEL_SIZE*(outputZ%CHANNELS);
|
||||
const int biasIndex=outputZ%CHANNELS;
|
||||
const int local_image_offset = org_y*input_width + org_x;
|
||||
const int imageSize = input_width*input_height;
|
||||
|
||||
__global Dtype* image_dataPtrFloat = (image_data + (imageSize*outputZ + local_image_offset));
|
||||
__global Dtype* kernel_dataPtrFloat = (kernel_data + (currentKernelOffset));
|
||||
|
||||
for(int y = 0; y < KERNEL_H; y++)
|
||||
{
|
||||
for(int x = 0; x < KERNEL_W; x++)
|
||||
{
|
||||
if(!(org_y + y * DILATION_Y >= 0 && org_y + y * DILATION_Y < input_height && org_x + x * DILATION_X >= 0 && org_x + x * DILATION_X < input_width))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
sum += image_dataPtrFloat[x * DILATION_X] * kernel_dataPtrFloat[x];
|
||||
}
|
||||
image_dataPtrFloat += input_width * DILATION_Y;
|
||||
kernel_dataPtrFloat += KERNEL_W;
|
||||
}
|
||||
|
||||
#if APPLY_BIAS
|
||||
int offset = outputZ*output_height*output_width + outputY*output_width + outputX;
|
||||
ACTIVATION_FUNCTION(convolved_image, offset, sum + biases_base[biasIndex], biasIndex);
|
||||
#else
|
||||
int offset = outputZ*output_height*output_width + outputY*output_width + outputX;
|
||||
ACTIVATION_FUNCTION(convolved_image, offset, sum, biasIndex);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#endif // KERNEL_BASIC/IDLF/GEMM_LIKE/DWCONV
|
||||
|
148
modules/dnn/src/opencl/prior_box.cl
Normal file
148
modules/dnn/src/opencl/prior_box.cl
Normal file
@ -0,0 +1,148 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#define Dtype float
|
||||
#define Dtype4 float4
|
||||
|
||||
__kernel void prior_box(const int nthreads,
|
||||
const Dtype stepX,
|
||||
const Dtype stepY,
|
||||
const Dtype _minSize,
|
||||
const Dtype _maxSize,
|
||||
__global const Dtype* _offsetsX,
|
||||
__global const Dtype* _offsetsY,
|
||||
const int offsetsX_size,
|
||||
__global const Dtype* _aspectRatios,
|
||||
const int aspectRatios_size,
|
||||
__global const Dtype* scales,
|
||||
__global Dtype* dst,
|
||||
const int _layerHeight,
|
||||
const int _layerWidth,
|
||||
const int imgHeight,
|
||||
const int imgWidth)
|
||||
{
|
||||
for (int index = get_global_id(0); index < nthreads; index += get_global_size(0))
|
||||
{
|
||||
int w = index % _layerWidth;
|
||||
int h = index / _layerWidth;
|
||||
__global Dtype* outputPtr;
|
||||
int aspect_count = (_maxSize > 0) ? 1 : 0;
|
||||
outputPtr = dst + index * 4 * offsetsX_size * (1 + aspect_count + aspectRatios_size);
|
||||
|
||||
Dtype _boxWidth, _boxHeight;
|
||||
Dtype4 vec;
|
||||
_boxWidth = _boxHeight = _minSize * scales[0];
|
||||
for (int i = 0; i < offsetsX_size; ++i)
|
||||
{
|
||||
float center_x = (w + _offsetsX[i]) * stepX;
|
||||
float center_y = (h + _offsetsY[i]) * stepY;
|
||||
|
||||
vec.x = (center_x - _boxWidth * 0.5f) / imgWidth; // xmin
|
||||
vec.y = (center_y - _boxHeight * 0.5f) / imgHeight; // ymin
|
||||
vec.z = (center_x + _boxWidth * 0.5f) / imgWidth; // xmax
|
||||
vec.w = (center_y + _boxHeight * 0.5f) / imgHeight; // ymax
|
||||
vstore4(vec, 0, outputPtr);
|
||||
|
||||
outputPtr += 4;
|
||||
}
|
||||
|
||||
if (_maxSize > 0)
|
||||
{
|
||||
_boxWidth = _boxHeight = native_sqrt(_minSize * _maxSize) * scales[1];
|
||||
|
||||
for (int i = 0; i < offsetsX_size; ++i)
|
||||
{
|
||||
float center_x = (w + _offsetsX[i]) * stepX;
|
||||
float center_y = (h + _offsetsY[i]) * stepY;
|
||||
|
||||
vec.x = (center_x - _boxWidth * 0.5f) / imgWidth; // xmin
|
||||
vec.y = (center_y - _boxHeight * 0.5f) / imgHeight; // ymin
|
||||
vec.z = (center_x + _boxWidth * 0.5f) / imgWidth; // xmax
|
||||
vec.w = (center_y + _boxHeight * 0.5f) / imgHeight; // ymax
|
||||
vstore4(vec, 0, outputPtr);
|
||||
|
||||
outputPtr += 4;
|
||||
}
|
||||
}
|
||||
|
||||
for (int r = 0; r < aspectRatios_size; ++r)
|
||||
{
|
||||
float ar = native_sqrt(_aspectRatios[r]);
|
||||
float scale = scales[(_maxSize > 0 ? 2 : 1) + r];
|
||||
|
||||
_boxWidth = _minSize * ar * scale;
|
||||
_boxHeight = _minSize / ar * scale;
|
||||
|
||||
for (int i = 0; i < offsetsX_size; ++i)
|
||||
{
|
||||
float center_x = (w + _offsetsX[i]) * stepX;
|
||||
float center_y = (h + _offsetsY[i]) * stepY;
|
||||
|
||||
vec.x = (center_x - _boxWidth * 0.5f) / imgWidth; // xmin
|
||||
vec.y = (center_y - _boxHeight * 0.5f) / imgHeight; // ymin
|
||||
vec.z = (center_x + _boxWidth * 0.5f) / imgWidth; // xmax
|
||||
vec.w = (center_y + _boxHeight * 0.5f) / imgHeight; // ymax
|
||||
vstore4(vec, 0, outputPtr);
|
||||
|
||||
outputPtr += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void set_variance(const int nthreads,
|
||||
const int offset,
|
||||
const int variance_size,
|
||||
__global const Dtype* variance,
|
||||
__global Dtype* dst)
|
||||
{
|
||||
for (int index = get_global_id(0); index < nthreads; index += get_global_size(0))
|
||||
{
|
||||
Dtype4 var_vec;
|
||||
|
||||
if (variance_size == 1)
|
||||
var_vec = (Dtype4)(variance[0]);
|
||||
else
|
||||
var_vec = vload4(0, variance);
|
||||
|
||||
vstore4(var_vec, 0, dst + offset + index * 4);
|
||||
}
|
||||
}
|
@ -11,6 +11,8 @@ Test for Tensorflow models loading
|
||||
|
||||
#include "test_precomp.hpp"
|
||||
#include "npy_blob.hpp"
|
||||
#include <opencv2/core/ocl.hpp>
|
||||
#include <opencv2/ts/ocl_test.hpp>
|
||||
|
||||
namespace cvtest
|
||||
{
|
||||
@ -219,6 +221,43 @@ TEST(Test_TensorFlow, MobileNet_SSD)
|
||||
normAssert(target[2].reshape(1, 1), output[2].reshape(1, 1), "", 4e-5, 1e-2);
|
||||
}
|
||||
|
||||
OCL_TEST(Test_TensorFlow, MobileNet_SSD)
|
||||
{
|
||||
std::string netPath = findDataFile("dnn/ssd_mobilenet_v1_coco.pb", false);
|
||||
std::string netConfig = findDataFile("dnn/ssd_mobilenet_v1_coco.pbtxt", false);
|
||||
std::string imgPath = findDataFile("dnn/street.png", false);
|
||||
|
||||
Mat inp;
|
||||
resize(imread(imgPath), inp, Size(300, 300));
|
||||
inp = blobFromImage(inp, 1.0f / 127.5, Size(), Scalar(127.5, 127.5, 127.5), true);
|
||||
|
||||
std::vector<String> outNames(3);
|
||||
outNames[0] = "concat";
|
||||
outNames[1] = "concat_1";
|
||||
outNames[2] = "detection_out";
|
||||
|
||||
std::vector<Mat> target(outNames.size());
|
||||
for (int i = 0; i < outNames.size(); ++i)
|
||||
{
|
||||
std::string path = findDataFile("dnn/tensorflow/ssd_mobilenet_v1_coco." + outNames[i] + ".npy", false);
|
||||
target[i] = blobFromNPY(path);
|
||||
}
|
||||
|
||||
Net net = readNetFromTensorflow(netPath, netConfig);
|
||||
|
||||
net.setPreferableBackend(DNN_BACKEND_DEFAULT);
|
||||
net.setPreferableTarget(DNN_TARGET_OPENCL);
|
||||
|
||||
net.setInput(inp);
|
||||
|
||||
std::vector<Mat> output;
|
||||
net.forward(output, outNames);
|
||||
|
||||
normAssert(target[0].reshape(1, 1), output[0].reshape(1, 1));
|
||||
normAssert(target[1].reshape(1, 1), output[1].reshape(1, 1), "", 1e-5, 2e-4);
|
||||
normAssert(target[2].reshape(1, 1), output[2].reshape(1, 1), "", 4e-5, 1e-2);
|
||||
}
|
||||
|
||||
TEST(Test_TensorFlow, lstm)
|
||||
{
|
||||
runTensorFlowNet("lstm", true);
|
||||
|
Loading…
Reference in New Issue
Block a user