mirror of
https://github.com/opencv/opencv.git
synced 2025-01-18 14:13:15 +08:00
Use only absolute prior boxes explicit sizes. Remove scales attributes. (#10874)
* Use only absolute prior boxes explicit sizes. Remove scales attributes. * Simplified PriorBox layer forward pass
This commit is contained in:
parent
88b689bcf1
commit
8b4871a28d
@ -179,7 +179,6 @@ public:
|
||||
}
|
||||
|
||||
PriorBoxLayerImpl(const LayerParams ¶ms)
|
||||
: _boxWidth(0), _boxHeight(0)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
_minSize = getParameter<float>(params, "min_size", 0, false, 0);
|
||||
@ -187,37 +186,56 @@ public:
|
||||
_clip = getParameter<bool>(params, "clip", 0, false, true);
|
||||
_bboxesNormalized = getParameter<bool>(params, "normalized_bbox", 0, false, true);
|
||||
|
||||
_scales.clear();
|
||||
_aspectRatios.clear();
|
||||
|
||||
getAspectRatios(params);
|
||||
getVariance(params);
|
||||
getParams("scales", params, &_scales);
|
||||
getParams("width", params, &_widths);
|
||||
getParams("height", params, &_heights);
|
||||
_explicitSizes = !_widths.empty();
|
||||
CV_Assert(_widths.size() == _heights.size());
|
||||
|
||||
if (_explicitSizes)
|
||||
{
|
||||
CV_Assert(_aspectRatios.empty(), !params.has("min_size"), !params.has("max_size"));
|
||||
_numPriors = _widths.size();
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert(!_aspectRatios.empty(), _minSize > 0);
|
||||
_numPriors = _aspectRatios.size() + 1; // + 1 for an aspect ratio 1.0
|
||||
}
|
||||
|
||||
_maxSize = -1;
|
||||
if (params.has("max_size"))
|
||||
{
|
||||
_maxSize = params.get("max_size").get<float>(0);
|
||||
CV_Assert(_maxSize > _minSize);
|
||||
|
||||
_numPriors += 1;
|
||||
}
|
||||
|
||||
std::vector<float> widths, heights;
|
||||
getParams("width", params, &widths);
|
||||
getParams("height", params, &heights);
|
||||
_explicitSizes = !widths.empty();
|
||||
CV_Assert(widths.size() == heights.size());
|
||||
|
||||
if (_explicitSizes)
|
||||
{
|
||||
CV_Assert(_aspectRatios.empty(), !params.has("min_size"), !params.has("max_size"));
|
||||
_boxWidths = widths;
|
||||
_boxHeights = heights;
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert(!_aspectRatios.empty(), _minSize > 0);
|
||||
_boxWidths.resize(1 + (_maxSize > 0 ? 1 : 0) + _aspectRatios.size());
|
||||
_boxHeights.resize(_boxWidths.size());
|
||||
_boxWidths[0] = _boxHeights[0] = _minSize;
|
||||
|
||||
int i = 1;
|
||||
if (_maxSize > 0)
|
||||
{
|
||||
// second prior: aspect_ratio = 1, size = sqrt(min_size * max_size)
|
||||
_boxWidths[i] = _boxHeights[i] = sqrt(_minSize * _maxSize);
|
||||
i += 1;
|
||||
}
|
||||
|
||||
// rest of priors
|
||||
for (size_t r = 0; r < _aspectRatios.size(); ++r)
|
||||
{
|
||||
float arSqrt = sqrt(_aspectRatios[r]);
|
||||
_boxWidths[i + r] = _minSize * arSqrt;
|
||||
_boxHeights[i + r] = _minSize / arSqrt;
|
||||
}
|
||||
}
|
||||
CV_Assert(_boxWidths.size() == _boxHeights.size());
|
||||
_numPriors = _boxWidths.size();
|
||||
|
||||
if (params.has("step_h") || params.has("step_w")) {
|
||||
CV_Assert(!params.has("step"));
|
||||
_stepY = getParameter<float>(params, "step_h");
|
||||
@ -252,8 +270,7 @@ public:
|
||||
virtual bool supportBackend(int backendId)
|
||||
{
|
||||
return backendId == DNN_BACKEND_DEFAULT ||
|
||||
backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() &&
|
||||
_scales.empty() && !_explicitSizes;
|
||||
backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && !_explicitSizes;
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
@ -307,27 +324,16 @@ public:
|
||||
if (umat_offsetsX.empty())
|
||||
{
|
||||
Mat offsetsX(1, _offsetsX.size(), CV_32FC1, &_offsetsX[0]);
|
||||
Mat offsetsY(1, _offsetsX.size(), CV_32FC1, &_offsetsY[0]);
|
||||
Mat aspectRatios(1, _aspectRatios.size(), CV_32FC1, &_aspectRatios[0]);
|
||||
Mat offsetsY(1, _offsetsY.size(), CV_32FC1, &_offsetsY[0]);
|
||||
Mat variance(1, _variance.size(), CV_32FC1, &_variance[0]);
|
||||
Mat widths(1, _boxWidths.size(), CV_32FC1, &_boxWidths[0]);
|
||||
Mat heights(1, _boxHeights.size(), CV_32FC1, &_boxHeights[0]);
|
||||
|
||||
offsetsX.copyTo(umat_offsetsX);
|
||||
offsetsY.copyTo(umat_offsetsY);
|
||||
aspectRatios.copyTo(umat_aspectRatios);
|
||||
variance.copyTo(umat_variance);
|
||||
|
||||
int real_numPriors = _numPriors >> (_offsetsX.size() - 1);
|
||||
if (_scales.empty())
|
||||
{
|
||||
_scales.resize(real_numPriors, 1.0f);
|
||||
umat_scales = UMat(1, &real_numPriors, CV_32F, 1.0f);
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert(_scales.size() == real_numPriors);
|
||||
Mat scales(1, _scales.size(), CV_32FC1, &_scales[0]);
|
||||
scales.copyTo(umat_scales);
|
||||
}
|
||||
widths.copyTo(umat_widths);
|
||||
heights.copyTo(umat_heights);
|
||||
}
|
||||
|
||||
size_t nthreads = _layerHeight * _layerWidth;
|
||||
@ -336,19 +342,17 @@ public:
|
||||
kernel.set(0, (int)nthreads);
|
||||
kernel.set(1, (float)stepX);
|
||||
kernel.set(2, (float)stepY);
|
||||
kernel.set(3, (float)_minSize);
|
||||
kernel.set(4, (float)_maxSize);
|
||||
kernel.set(5, ocl::KernelArg::PtrReadOnly(umat_offsetsX));
|
||||
kernel.set(6, ocl::KernelArg::PtrReadOnly(umat_offsetsY));
|
||||
kernel.set(7, (int)_offsetsX.size());
|
||||
kernel.set(8, ocl::KernelArg::PtrReadOnly(umat_aspectRatios));
|
||||
kernel.set(9, (int)_aspectRatios.size());
|
||||
kernel.set(10, ocl::KernelArg::PtrReadOnly(umat_scales));
|
||||
kernel.set(11, ocl::KernelArg::PtrWriteOnly(outputs[0]));
|
||||
kernel.set(12, (int)_layerHeight);
|
||||
kernel.set(13, (int)_layerWidth);
|
||||
kernel.set(14, (int)_imageHeight);
|
||||
kernel.set(15, (int)_imageWidth);
|
||||
kernel.set(3, ocl::KernelArg::PtrReadOnly(umat_offsetsX));
|
||||
kernel.set(4, ocl::KernelArg::PtrReadOnly(umat_offsetsY));
|
||||
kernel.set(5, (int)_offsetsX.size());
|
||||
kernel.set(6, ocl::KernelArg::PtrReadOnly(umat_widths));
|
||||
kernel.set(7, ocl::KernelArg::PtrReadOnly(umat_heights));
|
||||
kernel.set(8, (int)_boxWidths.size());
|
||||
kernel.set(9, ocl::KernelArg::PtrWriteOnly(outputs[0]));
|
||||
kernel.set(10, (int)_layerHeight);
|
||||
kernel.set(11, (int)_layerWidth);
|
||||
kernel.set(12, (int)_imageHeight);
|
||||
kernel.set(13, (int)_imageWidth);
|
||||
kernel.run(1, &nthreads, NULL, false);
|
||||
|
||||
// clip the prior's coordidate such that it is within [0, 1]
|
||||
@ -401,12 +405,6 @@ public:
|
||||
|
||||
CV_Assert(inputs.size() == 2);
|
||||
|
||||
size_t real_numPriors = _numPriors >> (_offsetsX.size() - 1);
|
||||
if (_scales.empty())
|
||||
_scales.resize(real_numPriors, 1.0f);
|
||||
else
|
||||
CV_Assert(_scales.size() == real_numPriors);
|
||||
|
||||
int _layerWidth = inputs[0]->size[3];
|
||||
int _layerHeight = inputs[0]->size[2];
|
||||
|
||||
@ -425,72 +423,15 @@ public:
|
||||
int _outChannelSize = _layerHeight * _layerWidth * _numPriors * 4;
|
||||
|
||||
float* outputPtr = outputs[0].ptr<float>();
|
||||
float _boxWidth, _boxHeight;
|
||||
for (size_t h = 0; h < _layerHeight; ++h)
|
||||
{
|
||||
for (size_t w = 0; w < _layerWidth; ++w)
|
||||
{
|
||||
// first prior: aspect_ratio = 1, size = min_size
|
||||
if (_explicitSizes)
|
||||
for (size_t i = 0; i < _boxWidths.size(); ++i)
|
||||
{
|
||||
_boxWidth = _widths[0] * _scales[0];
|
||||
_boxHeight = _heights[0] * _scales[0];
|
||||
if (_bboxesNormalized)
|
||||
{
|
||||
_boxWidth *= _imageWidth;
|
||||
_boxHeight *= _imageHeight;
|
||||
}
|
||||
}
|
||||
else
|
||||
_boxWidth = _boxHeight = _minSize * _scales[0];
|
||||
|
||||
for (int i = 0; i < _offsetsX.size(); ++i)
|
||||
{
|
||||
float center_x = (w + _offsetsX[i]) * stepX;
|
||||
float center_y = (h + _offsetsY[i]) * stepY;
|
||||
outputPtr = addPrior(center_x, center_y, _boxWidth, _boxHeight, _imageWidth,
|
||||
_imageHeight, _bboxesNormalized, outputPtr);
|
||||
}
|
||||
if (_maxSize > 0)
|
||||
{
|
||||
// second prior: aspect_ratio = 1, size = sqrt(min_size * max_size)
|
||||
_boxWidth = _boxHeight = sqrt(_minSize * _maxSize) * _scales[1];
|
||||
for (int i = 0; i < _offsetsX.size(); ++i)
|
||||
{
|
||||
float center_x = (w + _offsetsX[i]) * stepX;
|
||||
float center_y = (h + _offsetsY[i]) * stepY;
|
||||
outputPtr = addPrior(center_x, center_y, _boxWidth, _boxHeight, _imageWidth,
|
||||
_imageHeight, _bboxesNormalized, outputPtr);
|
||||
}
|
||||
}
|
||||
|
||||
// rest of priors
|
||||
CV_Assert(_aspectRatios.empty() || (_maxSize > 0 ? 2 : 1) + _aspectRatios.size() == _scales.size());
|
||||
for (size_t r = 0; r < _aspectRatios.size(); ++r)
|
||||
{
|
||||
float ar = _aspectRatios[r];
|
||||
float scale = _scales[(_maxSize > 0 ? 2 : 1) + r];
|
||||
_boxWidth = _minSize * sqrt(ar) * scale;
|
||||
_boxHeight = _minSize / sqrt(ar) * scale;
|
||||
for (int i = 0; i < _offsetsX.size(); ++i)
|
||||
{
|
||||
float center_x = (w + _offsetsX[i]) * stepX;
|
||||
float center_y = (h + _offsetsY[i]) * stepY;
|
||||
outputPtr = addPrior(center_x, center_y, _boxWidth, _boxHeight, _imageWidth,
|
||||
_imageHeight, _bboxesNormalized, outputPtr);
|
||||
}
|
||||
}
|
||||
|
||||
// rest of sizes
|
||||
CV_Assert(_widths.empty() || _widths.size() == _scales.size());
|
||||
for (size_t i = 1; i < _widths.size(); ++i)
|
||||
{
|
||||
_boxWidth = _widths[i] * _scales[i];
|
||||
_boxHeight = _heights[i] * _scales[i];
|
||||
if (_bboxesNormalized)
|
||||
{
|
||||
_boxWidth *= _imageWidth;
|
||||
_boxHeight *= _imageHeight;
|
||||
}
|
||||
_boxWidth = _boxWidths[i];
|
||||
_boxHeight = _boxHeights[i];
|
||||
for (int j = 0; j < _offsetsX.size(); ++j)
|
||||
{
|
||||
float center_x = (w + _offsetsX[j]) * stepX;
|
||||
@ -591,24 +532,21 @@ private:
|
||||
float _minSize;
|
||||
float _maxSize;
|
||||
|
||||
float _boxWidth;
|
||||
float _boxHeight;
|
||||
|
||||
float _stepX, _stepY;
|
||||
|
||||
std::vector<float> _aspectRatios;
|
||||
std::vector<float> _variance;
|
||||
std::vector<float> _scales;
|
||||
std::vector<float> _widths;
|
||||
std::vector<float> _heights;
|
||||
std::vector<float> _offsetsX;
|
||||
std::vector<float> _offsetsY;
|
||||
// Precomputed final widhts and heights based on aspect ratios or explicit sizes.
|
||||
std::vector<float> _boxWidths;
|
||||
std::vector<float> _boxHeights;
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
UMat umat_offsetsX;
|
||||
UMat umat_offsetsY;
|
||||
UMat umat_aspectRatios;
|
||||
UMat umat_scales;
|
||||
UMat umat_widths;
|
||||
UMat umat_heights;
|
||||
UMat umat_variance;
|
||||
#endif
|
||||
|
||||
|
@ -45,14 +45,12 @@
|
||||
__kernel void prior_box(const int nthreads,
|
||||
const Dtype stepX,
|
||||
const Dtype stepY,
|
||||
const Dtype _minSize,
|
||||
const Dtype _maxSize,
|
||||
__global const Dtype* _offsetsX,
|
||||
__global const Dtype* _offsetsY,
|
||||
const int offsetsX_size,
|
||||
__global const Dtype* _aspectRatios,
|
||||
const int aspectRatios_size,
|
||||
__global const Dtype* scales,
|
||||
__global const Dtype* _widths,
|
||||
__global const Dtype* _heights,
|
||||
const int widths_size,
|
||||
__global Dtype* dst,
|
||||
const int _layerHeight,
|
||||
const int _layerWidth,
|
||||
@ -64,57 +62,19 @@ __kernel void prior_box(const int nthreads,
|
||||
int w = index % _layerWidth;
|
||||
int h = index / _layerWidth;
|
||||
__global Dtype* outputPtr;
|
||||
int aspect_count = (_maxSize > 0) ? 1 : 0;
|
||||
outputPtr = dst + index * 4 * offsetsX_size * (1 + aspect_count + aspectRatios_size);
|
||||
|
||||
outputPtr = dst + index * 4 * offsetsX_size * widths_size;
|
||||
|
||||
Dtype _boxWidth, _boxHeight;
|
||||
Dtype4 vec;
|
||||
_boxWidth = _boxHeight = _minSize * scales[0];
|
||||
for (int i = 0; i < offsetsX_size; ++i)
|
||||
for (int i = 0; i < widths_size; ++i)
|
||||
{
|
||||
float center_x = (w + _offsetsX[i]) * stepX;
|
||||
float center_y = (h + _offsetsY[i]) * stepY;
|
||||
|
||||
vec.x = (center_x - _boxWidth * 0.5f) / imgWidth; // xmin
|
||||
vec.y = (center_y - _boxHeight * 0.5f) / imgHeight; // ymin
|
||||
vec.z = (center_x + _boxWidth * 0.5f) / imgWidth; // xmax
|
||||
vec.w = (center_y + _boxHeight * 0.5f) / imgHeight; // ymax
|
||||
vstore4(vec, 0, outputPtr);
|
||||
|
||||
outputPtr += 4;
|
||||
}
|
||||
|
||||
if (_maxSize > 0)
|
||||
{
|
||||
_boxWidth = _boxHeight = native_sqrt(_minSize * _maxSize) * scales[1];
|
||||
|
||||
for (int i = 0; i < offsetsX_size; ++i)
|
||||
_boxWidth = _widths[i];
|
||||
_boxHeight = _heights[i];
|
||||
for (int j = 0; j < offsetsX_size; ++j)
|
||||
{
|
||||
float center_x = (w + _offsetsX[i]) * stepX;
|
||||
float center_y = (h + _offsetsY[i]) * stepY;
|
||||
|
||||
vec.x = (center_x - _boxWidth * 0.5f) / imgWidth; // xmin
|
||||
vec.y = (center_y - _boxHeight * 0.5f) / imgHeight; // ymin
|
||||
vec.z = (center_x + _boxWidth * 0.5f) / imgWidth; // xmax
|
||||
vec.w = (center_y + _boxHeight * 0.5f) / imgHeight; // ymax
|
||||
vstore4(vec, 0, outputPtr);
|
||||
|
||||
outputPtr += 4;
|
||||
}
|
||||
}
|
||||
|
||||
for (int r = 0; r < aspectRatios_size; ++r)
|
||||
{
|
||||
float ar = native_sqrt(_aspectRatios[r]);
|
||||
float scale = scales[(_maxSize > 0 ? 2 : 1) + r];
|
||||
|
||||
_boxWidth = _minSize * ar * scale;
|
||||
_boxHeight = _minSize / ar * scale;
|
||||
|
||||
for (int i = 0; i < offsetsX_size; ++i)
|
||||
{
|
||||
float center_x = (w + _offsetsX[i]) * stepX;
|
||||
float center_y = (h + _offsetsY[i]) * stepY;
|
||||
float center_x = (w + _offsetsX[j]) * stepX;
|
||||
float center_y = (h + _offsetsY[j]) * stepY;
|
||||
|
||||
vec.x = (center_x - _boxWidth * 0.5f) / imgWidth; // xmin
|
||||
vec.y = (center_y - _boxHeight * 0.5f) / imgHeight; // ymin
|
||||
|
@ -26,6 +26,8 @@ parser.add_argument('--max_scale', default=0.95, type=float, help='Hyper-paramet
|
||||
parser.add_argument('--num_layers', default=6, type=int, help='Hyper-parameter of ssd_anchor_generator from config file.')
|
||||
parser.add_argument('--aspect_ratios', default=[1.0, 2.0, 0.5, 3.0, 0.333], type=float, nargs='+',
|
||||
help='Hyper-parameter of ssd_anchor_generator from config file.')
|
||||
parser.add_argument('--image_width', default=300, type=int, help='Training images width.')
|
||||
parser.add_argument('--image_height', default=300, type=int, help='Training images height.')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Nodes that should be kept.
|
||||
@ -192,7 +194,6 @@ for i in range(args.num_layers):
|
||||
|
||||
text_format.Merge('b: false', priorBox.attr["flip"])
|
||||
text_format.Merge('b: false', priorBox.attr["clip"])
|
||||
text_format.Merge('b: true', priorBox.attr["normalized_bbox"])
|
||||
|
||||
if i == 0:
|
||||
widths = [args.min_scale * 0.5, args.min_scale * sqrt(2.0), args.min_scale * sqrt(0.5)]
|
||||
@ -203,6 +204,8 @@ for i in range(args.num_layers):
|
||||
|
||||
widths += [sqrt(scales[i] * scales[i + 1])]
|
||||
heights += [sqrt(scales[i] * scales[i + 1])]
|
||||
widths = [w * args.image_width for w in widths]
|
||||
heights = [h * args.image_height for h in heights]
|
||||
text_format.Merge(tensorMsg(widths), priorBox.attr["width"])
|
||||
text_format.Merge(tensorMsg(heights), priorBox.attr["height"])
|
||||
text_format.Merge(tensorMsg([0.1, 0.1, 0.2, 0.2]), priorBox.attr["variance"])
|
||||
|
Loading…
Reference in New Issue
Block a user