diff --git a/modules/dnn/src/layers/prior_box_layer.cpp b/modules/dnn/src/layers/prior_box_layer.cpp index 45f48e7c32..848e1921be 100644 --- a/modules/dnn/src/layers/prior_box_layer.cpp +++ b/modules/dnn/src/layers/prior_box_layer.cpp @@ -179,7 +179,6 @@ public: } PriorBoxLayerImpl(const LayerParams ¶ms) - : _boxWidth(0), _boxHeight(0) { setParamsFrom(params); _minSize = getParameter(params, "min_size", 0, false, 0); @@ -187,37 +186,56 @@ public: _clip = getParameter(params, "clip", 0, false, true); _bboxesNormalized = getParameter(params, "normalized_bbox", 0, false, true); - _scales.clear(); _aspectRatios.clear(); getAspectRatios(params); getVariance(params); - getParams("scales", params, &_scales); - getParams("width", params, &_widths); - getParams("height", params, &_heights); - _explicitSizes = !_widths.empty(); - CV_Assert(_widths.size() == _heights.size()); - - if (_explicitSizes) - { - CV_Assert(_aspectRatios.empty(), !params.has("min_size"), !params.has("max_size")); - _numPriors = _widths.size(); - } - else - { - CV_Assert(!_aspectRatios.empty(), _minSize > 0); - _numPriors = _aspectRatios.size() + 1; // + 1 for an aspect ratio 1.0 - } _maxSize = -1; if (params.has("max_size")) { _maxSize = params.get("max_size").get(0); CV_Assert(_maxSize > _minSize); - - _numPriors += 1; } + std::vector widths, heights; + getParams("width", params, &widths); + getParams("height", params, &heights); + _explicitSizes = !widths.empty(); + CV_Assert(widths.size() == heights.size()); + + if (_explicitSizes) + { + CV_Assert(_aspectRatios.empty(), !params.has("min_size"), !params.has("max_size")); + _boxWidths = widths; + _boxHeights = heights; + } + else + { + CV_Assert(!_aspectRatios.empty(), _minSize > 0); + _boxWidths.resize(1 + (_maxSize > 0 ? 1 : 0) + _aspectRatios.size()); + _boxHeights.resize(_boxWidths.size()); + _boxWidths[0] = _boxHeights[0] = _minSize; + + int i = 1; + if (_maxSize > 0) + { + // second prior: aspect_ratio = 1, size = sqrt(min_size * max_size) + _boxWidths[i] = _boxHeights[i] = sqrt(_minSize * _maxSize); + i += 1; + } + + // rest of priors + for (size_t r = 0; r < _aspectRatios.size(); ++r) + { + float arSqrt = sqrt(_aspectRatios[r]); + _boxWidths[i + r] = _minSize * arSqrt; + _boxHeights[i + r] = _minSize / arSqrt; + } + } + CV_Assert(_boxWidths.size() == _boxHeights.size()); + _numPriors = _boxWidths.size(); + if (params.has("step_h") || params.has("step_w")) { CV_Assert(!params.has("step")); _stepY = getParameter(params, "step_h"); @@ -252,8 +270,7 @@ public: virtual bool supportBackend(int backendId) { return backendId == DNN_BACKEND_DEFAULT || - backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && - _scales.empty() && !_explicitSizes; + backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && !_explicitSizes; } bool getMemoryShapes(const std::vector &inputs, @@ -307,27 +324,16 @@ public: if (umat_offsetsX.empty()) { Mat offsetsX(1, _offsetsX.size(), CV_32FC1, &_offsetsX[0]); - Mat offsetsY(1, _offsetsX.size(), CV_32FC1, &_offsetsY[0]); - Mat aspectRatios(1, _aspectRatios.size(), CV_32FC1, &_aspectRatios[0]); + Mat offsetsY(1, _offsetsY.size(), CV_32FC1, &_offsetsY[0]); Mat variance(1, _variance.size(), CV_32FC1, &_variance[0]); + Mat widths(1, _boxWidths.size(), CV_32FC1, &_boxWidths[0]); + Mat heights(1, _boxHeights.size(), CV_32FC1, &_boxHeights[0]); offsetsX.copyTo(umat_offsetsX); offsetsY.copyTo(umat_offsetsY); - aspectRatios.copyTo(umat_aspectRatios); variance.copyTo(umat_variance); - - int real_numPriors = _numPriors >> (_offsetsX.size() - 1); - if (_scales.empty()) - { - _scales.resize(real_numPriors, 1.0f); - umat_scales = UMat(1, &real_numPriors, CV_32F, 1.0f); - } - else - { - CV_Assert(_scales.size() == real_numPriors); - Mat scales(1, _scales.size(), CV_32FC1, &_scales[0]); - scales.copyTo(umat_scales); - } + widths.copyTo(umat_widths); + heights.copyTo(umat_heights); } size_t nthreads = _layerHeight * _layerWidth; @@ -336,19 +342,17 @@ public: kernel.set(0, (int)nthreads); kernel.set(1, (float)stepX); kernel.set(2, (float)stepY); - kernel.set(3, (float)_minSize); - kernel.set(4, (float)_maxSize); - kernel.set(5, ocl::KernelArg::PtrReadOnly(umat_offsetsX)); - kernel.set(6, ocl::KernelArg::PtrReadOnly(umat_offsetsY)); - kernel.set(7, (int)_offsetsX.size()); - kernel.set(8, ocl::KernelArg::PtrReadOnly(umat_aspectRatios)); - kernel.set(9, (int)_aspectRatios.size()); - kernel.set(10, ocl::KernelArg::PtrReadOnly(umat_scales)); - kernel.set(11, ocl::KernelArg::PtrWriteOnly(outputs[0])); - kernel.set(12, (int)_layerHeight); - kernel.set(13, (int)_layerWidth); - kernel.set(14, (int)_imageHeight); - kernel.set(15, (int)_imageWidth); + kernel.set(3, ocl::KernelArg::PtrReadOnly(umat_offsetsX)); + kernel.set(4, ocl::KernelArg::PtrReadOnly(umat_offsetsY)); + kernel.set(5, (int)_offsetsX.size()); + kernel.set(6, ocl::KernelArg::PtrReadOnly(umat_widths)); + kernel.set(7, ocl::KernelArg::PtrReadOnly(umat_heights)); + kernel.set(8, (int)_boxWidths.size()); + kernel.set(9, ocl::KernelArg::PtrWriteOnly(outputs[0])); + kernel.set(10, (int)_layerHeight); + kernel.set(11, (int)_layerWidth); + kernel.set(12, (int)_imageHeight); + kernel.set(13, (int)_imageWidth); kernel.run(1, &nthreads, NULL, false); // clip the prior's coordidate such that it is within [0, 1] @@ -401,12 +405,6 @@ public: CV_Assert(inputs.size() == 2); - size_t real_numPriors = _numPriors >> (_offsetsX.size() - 1); - if (_scales.empty()) - _scales.resize(real_numPriors, 1.0f); - else - CV_Assert(_scales.size() == real_numPriors); - int _layerWidth = inputs[0]->size[3]; int _layerHeight = inputs[0]->size[2]; @@ -425,72 +423,15 @@ public: int _outChannelSize = _layerHeight * _layerWidth * _numPriors * 4; float* outputPtr = outputs[0].ptr(); + float _boxWidth, _boxHeight; for (size_t h = 0; h < _layerHeight; ++h) { for (size_t w = 0; w < _layerWidth; ++w) { - // first prior: aspect_ratio = 1, size = min_size - if (_explicitSizes) + for (size_t i = 0; i < _boxWidths.size(); ++i) { - _boxWidth = _widths[0] * _scales[0]; - _boxHeight = _heights[0] * _scales[0]; - if (_bboxesNormalized) - { - _boxWidth *= _imageWidth; - _boxHeight *= _imageHeight; - } - } - else - _boxWidth = _boxHeight = _minSize * _scales[0]; - - for (int i = 0; i < _offsetsX.size(); ++i) - { - float center_x = (w + _offsetsX[i]) * stepX; - float center_y = (h + _offsetsY[i]) * stepY; - outputPtr = addPrior(center_x, center_y, _boxWidth, _boxHeight, _imageWidth, - _imageHeight, _bboxesNormalized, outputPtr); - } - if (_maxSize > 0) - { - // second prior: aspect_ratio = 1, size = sqrt(min_size * max_size) - _boxWidth = _boxHeight = sqrt(_minSize * _maxSize) * _scales[1]; - for (int i = 0; i < _offsetsX.size(); ++i) - { - float center_x = (w + _offsetsX[i]) * stepX; - float center_y = (h + _offsetsY[i]) * stepY; - outputPtr = addPrior(center_x, center_y, _boxWidth, _boxHeight, _imageWidth, - _imageHeight, _bboxesNormalized, outputPtr); - } - } - - // rest of priors - CV_Assert(_aspectRatios.empty() || (_maxSize > 0 ? 2 : 1) + _aspectRatios.size() == _scales.size()); - for (size_t r = 0; r < _aspectRatios.size(); ++r) - { - float ar = _aspectRatios[r]; - float scale = _scales[(_maxSize > 0 ? 2 : 1) + r]; - _boxWidth = _minSize * sqrt(ar) * scale; - _boxHeight = _minSize / sqrt(ar) * scale; - for (int i = 0; i < _offsetsX.size(); ++i) - { - float center_x = (w + _offsetsX[i]) * stepX; - float center_y = (h + _offsetsY[i]) * stepY; - outputPtr = addPrior(center_x, center_y, _boxWidth, _boxHeight, _imageWidth, - _imageHeight, _bboxesNormalized, outputPtr); - } - } - - // rest of sizes - CV_Assert(_widths.empty() || _widths.size() == _scales.size()); - for (size_t i = 1; i < _widths.size(); ++i) - { - _boxWidth = _widths[i] * _scales[i]; - _boxHeight = _heights[i] * _scales[i]; - if (_bboxesNormalized) - { - _boxWidth *= _imageWidth; - _boxHeight *= _imageHeight; - } + _boxWidth = _boxWidths[i]; + _boxHeight = _boxHeights[i]; for (int j = 0; j < _offsetsX.size(); ++j) { float center_x = (w + _offsetsX[j]) * stepX; @@ -591,24 +532,21 @@ private: float _minSize; float _maxSize; - float _boxWidth; - float _boxHeight; - float _stepX, _stepY; std::vector _aspectRatios; std::vector _variance; - std::vector _scales; - std::vector _widths; - std::vector _heights; std::vector _offsetsX; std::vector _offsetsY; + // Precomputed final widhts and heights based on aspect ratios or explicit sizes. + std::vector _boxWidths; + std::vector _boxHeights; #ifdef HAVE_OPENCL UMat umat_offsetsX; UMat umat_offsetsY; - UMat umat_aspectRatios; - UMat umat_scales; + UMat umat_widths; + UMat umat_heights; UMat umat_variance; #endif diff --git a/modules/dnn/src/opencl/prior_box.cl b/modules/dnn/src/opencl/prior_box.cl index 660ccb64d5..c51cd43830 100644 --- a/modules/dnn/src/opencl/prior_box.cl +++ b/modules/dnn/src/opencl/prior_box.cl @@ -45,14 +45,12 @@ __kernel void prior_box(const int nthreads, const Dtype stepX, const Dtype stepY, - const Dtype _minSize, - const Dtype _maxSize, __global const Dtype* _offsetsX, __global const Dtype* _offsetsY, const int offsetsX_size, - __global const Dtype* _aspectRatios, - const int aspectRatios_size, - __global const Dtype* scales, + __global const Dtype* _widths, + __global const Dtype* _heights, + const int widths_size, __global Dtype* dst, const int _layerHeight, const int _layerWidth, @@ -64,57 +62,19 @@ __kernel void prior_box(const int nthreads, int w = index % _layerWidth; int h = index / _layerWidth; __global Dtype* outputPtr; - int aspect_count = (_maxSize > 0) ? 1 : 0; - outputPtr = dst + index * 4 * offsetsX_size * (1 + aspect_count + aspectRatios_size); + + outputPtr = dst + index * 4 * offsetsX_size * widths_size; Dtype _boxWidth, _boxHeight; Dtype4 vec; - _boxWidth = _boxHeight = _minSize * scales[0]; - for (int i = 0; i < offsetsX_size; ++i) + for (int i = 0; i < widths_size; ++i) { - float center_x = (w + _offsetsX[i]) * stepX; - float center_y = (h + _offsetsY[i]) * stepY; - - vec.x = (center_x - _boxWidth * 0.5f) / imgWidth; // xmin - vec.y = (center_y - _boxHeight * 0.5f) / imgHeight; // ymin - vec.z = (center_x + _boxWidth * 0.5f) / imgWidth; // xmax - vec.w = (center_y + _boxHeight * 0.5f) / imgHeight; // ymax - vstore4(vec, 0, outputPtr); - - outputPtr += 4; - } - - if (_maxSize > 0) - { - _boxWidth = _boxHeight = native_sqrt(_minSize * _maxSize) * scales[1]; - - for (int i = 0; i < offsetsX_size; ++i) + _boxWidth = _widths[i]; + _boxHeight = _heights[i]; + for (int j = 0; j < offsetsX_size; ++j) { - float center_x = (w + _offsetsX[i]) * stepX; - float center_y = (h + _offsetsY[i]) * stepY; - - vec.x = (center_x - _boxWidth * 0.5f) / imgWidth; // xmin - vec.y = (center_y - _boxHeight * 0.5f) / imgHeight; // ymin - vec.z = (center_x + _boxWidth * 0.5f) / imgWidth; // xmax - vec.w = (center_y + _boxHeight * 0.5f) / imgHeight; // ymax - vstore4(vec, 0, outputPtr); - - outputPtr += 4; - } - } - - for (int r = 0; r < aspectRatios_size; ++r) - { - float ar = native_sqrt(_aspectRatios[r]); - float scale = scales[(_maxSize > 0 ? 2 : 1) + r]; - - _boxWidth = _minSize * ar * scale; - _boxHeight = _minSize / ar * scale; - - for (int i = 0; i < offsetsX_size; ++i) - { - float center_x = (w + _offsetsX[i]) * stepX; - float center_y = (h + _offsetsY[i]) * stepY; + float center_x = (w + _offsetsX[j]) * stepX; + float center_y = (h + _offsetsY[j]) * stepY; vec.x = (center_x - _boxWidth * 0.5f) / imgWidth; // xmin vec.y = (center_y - _boxHeight * 0.5f) / imgHeight; // ymin diff --git a/samples/dnn/tf_text_graph_ssd.py b/samples/dnn/tf_text_graph_ssd.py index 50fdc2ca3a..f4f10668a9 100644 --- a/samples/dnn/tf_text_graph_ssd.py +++ b/samples/dnn/tf_text_graph_ssd.py @@ -26,6 +26,8 @@ parser.add_argument('--max_scale', default=0.95, type=float, help='Hyper-paramet parser.add_argument('--num_layers', default=6, type=int, help='Hyper-parameter of ssd_anchor_generator from config file.') parser.add_argument('--aspect_ratios', default=[1.0, 2.0, 0.5, 3.0, 0.333], type=float, nargs='+', help='Hyper-parameter of ssd_anchor_generator from config file.') +parser.add_argument('--image_width', default=300, type=int, help='Training images width.') +parser.add_argument('--image_height', default=300, type=int, help='Training images height.') args = parser.parse_args() # Nodes that should be kept. @@ -192,7 +194,6 @@ for i in range(args.num_layers): text_format.Merge('b: false', priorBox.attr["flip"]) text_format.Merge('b: false', priorBox.attr["clip"]) - text_format.Merge('b: true', priorBox.attr["normalized_bbox"]) if i == 0: widths = [args.min_scale * 0.5, args.min_scale * sqrt(2.0), args.min_scale * sqrt(0.5)] @@ -203,6 +204,8 @@ for i in range(args.num_layers): widths += [sqrt(scales[i] * scales[i + 1])] heights += [sqrt(scales[i] * scales[i + 1])] + widths = [w * args.image_width for w in widths] + heights = [h * args.image_height for h in heights] text_format.Merge(tensorMsg(widths), priorBox.attr["width"]) text_format.Merge(tensorMsg(heights), priorBox.attr["height"]) text_format.Merge(tensorMsg([0.1, 0.1, 0.2, 0.2]), priorBox.attr["variance"])