diff --git a/modules/dnn/perf/perf_utils.cpp b/modules/dnn/perf/perf_utils.cpp new file mode 100644 index 0000000000..1ab544b3eb --- /dev/null +++ b/modules/dnn/perf/perf_utils.cpp @@ -0,0 +1,66 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2017, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. + +#include "perf_precomp.hpp" + +namespace opencv_test { + +using Utils_blobFromImage = TestBaseWithParam>; +PERF_TEST_P_(Utils_blobFromImage, HWC_TO_NCHW) { + std::vector input_shape = GetParam(); + + Mat input(input_shape, CV_32FC3); + randu(input, -10.0f, 10.f); + + TEST_CYCLE() { + Mat blob = blobFromImage(input); + } + + SANITY_CHECK_NOTHING(); +} + +INSTANTIATE_TEST_CASE_P(/**/, Utils_blobFromImage, + Values(std::vector{ 32, 32}, + std::vector{ 64, 64}, + std::vector{ 128, 128}, + std::vector{ 256, 256}, + std::vector{ 512, 512}, + std::vector{1024, 1024}, + std::vector{2048, 2048}) +); + +using Utils_blobFromImages = TestBaseWithParam>; +PERF_TEST_P_(Utils_blobFromImages, HWC_TO_NCHW) { + std::vector input_shape = GetParam(); + int batch = input_shape.front(); + std::vector input_shape_no_batch(input_shape.begin()+1, input_shape.end()); + + std::vector inputs; + for (int i = 0; i < batch; i++) { + Mat input(input_shape_no_batch, CV_32FC3); + randu(input, -10.0f, 10.f); + inputs.push_back(input); + } + + TEST_CYCLE() { + Mat blobs = blobFromImages(inputs); + } + + SANITY_CHECK_NOTHING(); +} + +INSTANTIATE_TEST_CASE_P(/**/, Utils_blobFromImages, + Values(std::vector{16, 32, 32}, + std::vector{16, 64, 64}, + std::vector{16, 128, 128}, + std::vector{16, 256, 256}, + std::vector{16, 512, 512}, + std::vector{16, 1024, 1024}, + std::vector{16, 2048, 2048}) +); + +} diff --git a/modules/dnn/src/dnn_utils.cpp b/modules/dnn/src/dnn_utils.cpp index 3ef75fad36..87561d0e50 100644 --- a/modules/dnn/src/dnn_utils.cpp +++ b/modules/dnn/src/dnn_utils.cpp @@ -126,6 +126,111 @@ Mat blobFromImagesWithParams(InputArrayOfArrays images, const Image2BlobParams& return blob; } +template +void blobFromImagesNCHWImpl(const std::vector& images, Mat& blob_, const Image2BlobParams& param) +{ + int w = images[0].cols; + int h = images[0].rows; + int wh = w * h; + int nch = images[0].channels(); + CV_Assert(nch == 1 || nch == 3 || nch == 4); + int sz[] = { (int)images.size(), nch, h, w}; + blob_.create(4, sz, param.ddepth); + + for (size_t k = 0; k < images.size(); ++k) + { + CV_Assert(images[k].depth() == images[0].depth()); + CV_Assert(images[k].channels() == images[0].channels()); + CV_Assert(images[k].size() == images[0].size()); + + Tout* p_blob = blob_.ptr() + k * nch * wh; + Tout* p_blob_r = p_blob; + Tout* p_blob_g = p_blob + wh; + Tout* p_blob_b = p_blob + 2 * wh; + Tout* p_blob_a = p_blob + 3 * wh; + + if (param.swapRB) + std::swap(p_blob_r, p_blob_b); + + for (size_t i = 0; i < h; ++i) + { + const Tinp* p_img_row = images[k].ptr(i); + + if (nch == 1) + { + for (size_t j = 0; j < w; ++j) + { + p_blob[i * w + j] = p_img_row[j]; + } + } + else if (nch == 3) + { + for (size_t j = 0; j < w; ++j) + { + p_blob_r[i * w + j] = p_img_row[j * 3 ]; + p_blob_g[i * w + j] = p_img_row[j * 3 + 1]; + p_blob_b[i * w + j] = p_img_row[j * 3 + 2]; + } + } + else // if (nch == 4) + { + for (size_t j = 0; j < w; ++j) + { + p_blob_r[i * w + j] = p_img_row[j * 4 ]; + p_blob_g[i * w + j] = p_img_row[j * 4 + 1]; + p_blob_b[i * w + j] = p_img_row[j * 4 + 2]; + p_blob_a[i * w + j] = p_img_row[j * 4 + 3]; + } + } + } + } + + if (param.mean == Scalar() && param.scalefactor == Scalar::all(1.0)) + return; + CV_CheckTypeEQ(param.ddepth, CV_32F, "Scaling and mean substraction is supported only for CV_32F blob depth"); + + for (size_t k = 0; k < images.size(); ++k) + { + for (size_t ch = 0; ch < nch; ++ch) + { + float cur_mean = param.mean[ch]; + float cur_scale = param.scalefactor[ch]; + Tout* p_blob = blob_.ptr() + k * nch * wh + ch * wh; + for (size_t i = 0; i < wh; ++i) + { + p_blob[i] = (p_blob[i] - cur_mean) * cur_scale; + } + } + } +} + +template +void blobFromImagesNCHW(const std::vector& images, Mat& blob_, const Image2BlobParams& param) +{ + if (images[0].depth() == CV_8U) + blobFromImagesNCHWImpl(images, blob_, param); + else if (images[0].depth() == CV_8S) + blobFromImagesNCHWImpl(images, blob_, param); + else if (images[0].depth() == CV_16U) + blobFromImagesNCHWImpl(images, blob_, param); + else if (images[0].depth() == CV_16S) + blobFromImagesNCHWImpl(images, blob_, param); + else if (images[0].depth() == CV_32S) + blobFromImagesNCHWImpl(images, blob_, param); + else if (images[0].depth() == CV_32F) + blobFromImagesNCHWImpl(images, blob_, param); + else if (images[0].depth() == CV_64F) + blobFromImagesNCHWImpl(images, blob_, param); + else + CV_Error(Error::BadDepth, "Unsupported input image depth for blobFromImagesNCHW"); +} + +template +void blobFromImagesNCHW(const std::vector& images, UMat& blob_, const Image2BlobParams& param) +{ + CV_Error(Error::StsNotImplemented, ""); +} + template void blobFromImagesWithParamsImpl(InputArrayOfArrays images_, Tmat& blob_, const Image2BlobParams& param) { @@ -154,19 +259,6 @@ void blobFromImagesWithParamsImpl(InputArrayOfArrays images_, Tmat& blob_, const Scalar scalefactor = param.scalefactor; Scalar mean = param.mean; - if (param.swapRB) - { - if (nch > 2) - { - std::swap(mean[0], mean[2]); - std::swap(scalefactor[0], scalefactor[2]); - } - else - { - CV_LOG_WARNING(NULL, "Red/blue color swapping requires at least three image channels."); - } - } - for (size_t i = 0; i < images.size(); i++) { Size imgSize = images[i].size(); @@ -203,18 +295,35 @@ void blobFromImagesWithParamsImpl(InputArrayOfArrays images_, Tmat& blob_, const resize(images[i], images[i], size, 0, 0, INTER_LINEAR); } } - - if (images[i].depth() == CV_8U && param.ddepth == CV_32F) - images[i].convertTo(images[i], CV_32F); - - subtract(images[i], mean, images[i]); - multiply(images[i], scalefactor, images[i]); } size_t nimages = images.size(); Tmat image0 = images[0]; CV_Assert(image0.dims == 2); + if (std::is_same::value && param.datalayout == DNN_LAYOUT_NCHW) + { + // Fast implementation for HWC cv::Mat images -> NCHW cv::Mat blob + if (param.ddepth == CV_8U) + blobFromImagesNCHW(images, blob_, param); + else + blobFromImagesNCHW(images, blob_, param); + return; + } + + if (param.swapRB) + { + if (nch > 2) + { + std::swap(mean[0], mean[2]); + std::swap(scalefactor[0], scalefactor[2]); + } + else + { + CV_LOG_WARNING(NULL, "Red/blue color swapping requires at least three image channels."); + } + } + if (param.datalayout == DNN_LAYOUT_NCHW) { if (nch == 3 || nch == 4) @@ -225,7 +334,14 @@ void blobFromImagesWithParamsImpl(InputArrayOfArrays images_, Tmat& blob_, const for (size_t i = 0; i < nimages; i++) { - const Tmat& image = images[i]; + Tmat& image = images[i]; + if (image.depth() == CV_8U && param.ddepth == CV_32F) + image.convertTo(image, CV_32F); + if (mean != Scalar()) + subtract(image, mean, image); + if (scalefactor != Scalar::all(1.0)) + multiply(image, scalefactor, image); + CV_Assert(image.depth() == blob_.depth()); nch = image.channels(); CV_Assert(image.dims == 2 && (nch == 3 || nch == 4)); @@ -250,7 +366,14 @@ void blobFromImagesWithParamsImpl(InputArrayOfArrays images_, Tmat& blob_, const for (size_t i = 0; i < nimages; i++) { - const Tmat& image = images[i]; + Tmat& image = images[i]; + if (image.depth() == CV_8U && param.ddepth == CV_32F) + image.convertTo(image, CV_32F); + if (mean != Scalar()) + subtract(image, mean, image); + if (scalefactor != Scalar::all(1.0)) + multiply(image, scalefactor, image); + CV_Assert(image.depth() == blob_.depth()); nch = image.channels(); CV_Assert(image.dims == 2 && (nch == 1)); @@ -269,7 +392,14 @@ void blobFromImagesWithParamsImpl(InputArrayOfArrays images_, Tmat& blob_, const int subMatType = CV_MAKETYPE(param.ddepth, nch); for (size_t i = 0; i < nimages; i++) { - const Tmat& image = images[i]; + Tmat& image = images[i]; + if (image.depth() == CV_8U && param.ddepth == CV_32F) + image.convertTo(image, CV_32F); + if (mean != Scalar()) + subtract(image, mean, image); + if (scalefactor != Scalar::all(1.0)) + multiply(image, scalefactor, image); + CV_Assert(image.depth() == blob_.depth()); CV_Assert(image.channels() == image0.channels()); CV_Assert(image.size() == image0.size());