mirror of
https://github.com/opencv/opencv.git
synced 2025-08-06 14:36:36 +08:00
prior box layer ocl implementation
Signed-off-by: Li Peng <peng.li@intel.com>
This commit is contained in:
parent
cac4a7e5b5
commit
910d7dab1f
@ -45,6 +45,7 @@
|
||||
#include <float.h>
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include "opencl_kernels_dnn.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
@ -270,11 +271,108 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
inps.getUMatVector(inputs);
|
||||
outs.getUMatVector(outputs);
|
||||
|
||||
int _layerWidth = inputs[0].size[3];
|
||||
int _layerHeight = inputs[0].size[2];
|
||||
|
||||
int _imageWidth = inputs[1].size[3];
|
||||
int _imageHeight = inputs[1].size[2];
|
||||
|
||||
float stepX, stepY;
|
||||
if (_stepX == 0 || _stepY == 0)
|
||||
{
|
||||
stepX = static_cast<float>(_imageWidth) / _layerWidth;
|
||||
stepY = static_cast<float>(_imageHeight) / _layerHeight;
|
||||
} else {
|
||||
stepX = _stepX;
|
||||
stepY = _stepY;
|
||||
}
|
||||
|
||||
if (umat_offsetsX.empty())
|
||||
{
|
||||
Mat offsetsX(1, _offsetsX.size(), CV_32FC1, &_offsetsX[0]);
|
||||
Mat offsetsY(1, _offsetsX.size(), CV_32FC1, &_offsetsY[0]);
|
||||
Mat aspectRatios(1, _aspectRatios.size(), CV_32FC1, &_aspectRatios[0]);
|
||||
Mat variance(1, _variance.size(), CV_32FC1, &_variance[0]);
|
||||
|
||||
offsetsX.copyTo(umat_offsetsX);
|
||||
offsetsY.copyTo(umat_offsetsY);
|
||||
aspectRatios.copyTo(umat_aspectRatios);
|
||||
variance.copyTo(umat_variance);
|
||||
|
||||
int real_numPriors = _numPriors / pow(2, _offsetsX.size() - 1);
|
||||
umat_scales = UMat(1, &real_numPriors, CV_32F, 1.0f);
|
||||
}
|
||||
|
||||
size_t nthreads = _layerHeight * _layerWidth;
|
||||
|
||||
ocl::Kernel kernel("prior_box", ocl::dnn::prior_box_oclsrc);
|
||||
kernel.set(0, (int)nthreads);
|
||||
kernel.set(1, (float)stepX);
|
||||
kernel.set(2, (float)stepY);
|
||||
kernel.set(3, (float)_minSize);
|
||||
kernel.set(4, (float)_maxSize);
|
||||
kernel.set(5, ocl::KernelArg::PtrReadOnly(umat_offsetsX));
|
||||
kernel.set(6, ocl::KernelArg::PtrReadOnly(umat_offsetsY));
|
||||
kernel.set(7, (int)_offsetsX.size());
|
||||
kernel.set(8, ocl::KernelArg::PtrReadOnly(umat_aspectRatios));
|
||||
kernel.set(9, (int)_aspectRatios.size());
|
||||
kernel.set(10, ocl::KernelArg::PtrReadOnly(umat_scales));
|
||||
kernel.set(11, ocl::KernelArg::PtrWriteOnly(outputs[0]));
|
||||
kernel.set(12, (int)_layerHeight);
|
||||
kernel.set(13, (int)_layerWidth);
|
||||
kernel.set(14, (int)_imageHeight);
|
||||
kernel.set(15, (int)_imageWidth);
|
||||
kernel.run(1, &nthreads, NULL, false);
|
||||
|
||||
// clip the prior's coordidate such that it is within [0, 1]
|
||||
if (_clip)
|
||||
{
|
||||
Mat mat = outputs[0].getMat(ACCESS_READ);
|
||||
int aspect_count = (_maxSize > 0) ? 1 : 0;
|
||||
int offset = nthreads * 4 * _offsetsX.size() * (1 + aspect_count + _aspectRatios.size());
|
||||
float* outputPtr = mat.ptr<float>() + offset;
|
||||
int _outChannelSize = _layerHeight * _layerWidth * _numPriors * 4;
|
||||
for (size_t d = 0; d < _outChannelSize; ++d)
|
||||
{
|
||||
outputPtr[d] = std::min<float>(std::max<float>(outputPtr[d], 0.), 1.);
|
||||
}
|
||||
}
|
||||
|
||||
// set the variance.
|
||||
{
|
||||
ocl::Kernel kernel("set_variance", ocl::dnn::prior_box_oclsrc);
|
||||
int offset = total(shape(outputs[0]), 2);
|
||||
size_t nthreads = _layerHeight * _layerWidth * _numPriors;
|
||||
kernel.set(0, (int)nthreads);
|
||||
kernel.set(1, (int)offset);
|
||||
kernel.set(2, (int)_variance.size());
|
||||
kernel.set(3, ocl::KernelArg::PtrReadOnly(umat_variance));
|
||||
kernel.set(4, ocl::KernelArg::PtrWriteOnly(outputs[0]));
|
||||
if (!kernel.run(1, &nthreads, NULL, false))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
}
|
||||
|
||||
@ -441,6 +539,14 @@ private:
|
||||
std::vector<float> _offsetsX;
|
||||
std::vector<float> _offsetsY;
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
UMat umat_offsetsX;
|
||||
UMat umat_offsetsY;
|
||||
UMat umat_aspectRatios;
|
||||
UMat umat_scales;
|
||||
UMat umat_variance;
|
||||
#endif
|
||||
|
||||
bool _flip;
|
||||
bool _clip;
|
||||
bool _explicitSizes;
|
||||
|
148
modules/dnn/src/opencl/prior_box.cl
Normal file
148
modules/dnn/src/opencl/prior_box.cl
Normal file
@ -0,0 +1,148 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#define Dtype float
|
||||
#define Dtype4 float4
|
||||
|
||||
__kernel void prior_box(const int nthreads,
|
||||
const Dtype stepX,
|
||||
const Dtype stepY,
|
||||
const Dtype _minSize,
|
||||
const Dtype _maxSize,
|
||||
__global const Dtype* _offsetsX,
|
||||
__global const Dtype* _offsetsY,
|
||||
const int offsetsX_size,
|
||||
__global const Dtype* _aspectRatios,
|
||||
const int aspectRatios_size,
|
||||
__global const Dtype* scales,
|
||||
__global Dtype* dst,
|
||||
const int _layerHeight,
|
||||
const int _layerWidth,
|
||||
const int imgHeight,
|
||||
const int imgWidth)
|
||||
{
|
||||
for (int index = get_global_id(0); index < nthreads; index += get_global_size(0))
|
||||
{
|
||||
int w = index % _layerWidth;
|
||||
int h = index / _layerWidth;
|
||||
__global Dtype* outputPtr;
|
||||
int aspect_count = (_maxSize > 0) ? 1 : 0;
|
||||
outputPtr = dst + index * 4 * offsetsX_size * (1 + aspect_count + aspectRatios_size);
|
||||
|
||||
Dtype _boxWidth, _boxHeight;
|
||||
Dtype4 vec;
|
||||
_boxWidth = _boxHeight = _minSize * scales[0];
|
||||
for (int i = 0; i < offsetsX_size; ++i)
|
||||
{
|
||||
float center_x = (w + _offsetsX[i]) * stepX;
|
||||
float center_y = (h + _offsetsY[i]) * stepY;
|
||||
|
||||
vec.x = (center_x - _boxWidth * 0.5f) / imgWidth; // xmin
|
||||
vec.y = (center_y - _boxHeight * 0.5f) / imgHeight; // ymin
|
||||
vec.z = (center_x + _boxWidth * 0.5f) / imgWidth; // xmax
|
||||
vec.w = (center_y + _boxHeight * 0.5f) / imgHeight; // ymax
|
||||
vstore4(vec, 0, outputPtr);
|
||||
|
||||
outputPtr += 4;
|
||||
}
|
||||
|
||||
if (_maxSize > 0)
|
||||
{
|
||||
_boxWidth = _boxHeight = native_sqrt(_minSize * _maxSize) * scales[1];
|
||||
|
||||
for (int i = 0; i < offsetsX_size; ++i)
|
||||
{
|
||||
float center_x = (w + _offsetsX[i]) * stepX;
|
||||
float center_y = (h + _offsetsY[i]) * stepY;
|
||||
|
||||
vec.x = (center_x - _boxWidth * 0.5f) / imgWidth; // xmin
|
||||
vec.y = (center_y - _boxHeight * 0.5f) / imgHeight; // ymin
|
||||
vec.z = (center_x + _boxWidth * 0.5f) / imgWidth; // xmax
|
||||
vec.w = (center_y + _boxHeight * 0.5f) / imgHeight; // ymax
|
||||
vstore4(vec, 0, outputPtr);
|
||||
|
||||
outputPtr += 4;
|
||||
}
|
||||
}
|
||||
|
||||
for (int r = 0; r < aspectRatios_size; ++r)
|
||||
{
|
||||
float ar = native_sqrt(_aspectRatios[r]);
|
||||
float scale = scales[(_maxSize > 0 ? 2 : 1) + r];
|
||||
|
||||
_boxWidth = _minSize * ar * scale;
|
||||
_boxHeight = _minSize / ar * scale;
|
||||
|
||||
for (int i = 0; i < offsetsX_size; ++i)
|
||||
{
|
||||
float center_x = (w + _offsetsX[i]) * stepX;
|
||||
float center_y = (h + _offsetsY[i]) * stepY;
|
||||
|
||||
vec.x = (center_x - _boxWidth * 0.5f) / imgWidth; // xmin
|
||||
vec.y = (center_y - _boxHeight * 0.5f) / imgHeight; // ymin
|
||||
vec.z = (center_x + _boxWidth * 0.5f) / imgWidth; // xmax
|
||||
vec.w = (center_y + _boxHeight * 0.5f) / imgHeight; // ymax
|
||||
vstore4(vec, 0, outputPtr);
|
||||
|
||||
outputPtr += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void set_variance(const int nthreads,
|
||||
const int offset,
|
||||
const int variance_size,
|
||||
__global const Dtype* variance,
|
||||
__global Dtype* dst)
|
||||
{
|
||||
for (int index = get_global_id(0); index < nthreads; index += get_global_size(0))
|
||||
{
|
||||
Dtype4 var_vec;
|
||||
|
||||
if (variance_size == 1)
|
||||
var_vec = (Dtype4)(variance[0]);
|
||||
else
|
||||
var_vec = vload4(0, variance);
|
||||
|
||||
vstore4(var_vec, 0, dst + offset + index * 4);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user