mirror of
https://github.com/opencv/opencv.git
synced 2025-06-12 20:42:53 +08:00
Merge pull request #26127 from alexlyulkov:al/blob-from-images
Faster implementation of blobFromImages for cpu nchw output #26127 Faster implementation of blobFromImage and blobFromImages for HWC cv::Mat images -> NCHW cv::Mat case Running time on my pc in ms: **blobFromImage** ``` image size old new speed-up 32x32x3 0.008 0.002 4.0x 64x64x3 0.021 0.009 2.3x 128x128x3 0.164 0.037 4.4x 256x256x3 0.728 0.158 4.6x 512x512x3 3.310 0.628 5.2x 1024x1024x3 14.503 3.124 4.6x 2048x2048x3 61.647 28.049 2.2x ``` **blobFromImages** ``` image size old new speed-up 16x32x32x3 0.122 0.041 3.0x 16x64x64x3 0.790 0.165 4.8x 16x128x128x3 3.313 0.652 5.1x 16x256x256x3 13.495 3.127 4.3x 16x512x512x3 58.795 28.127 2.1x 16x1024x1024x3 251.135 121.955 2.1x 16x2048x2048x3 1023.570 487.188 2.1x ``` ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
d9a139f9e8
commit
aa52dafc90
66
modules/dnn/perf/perf_utils.cpp
Normal file
66
modules/dnn/perf/perf_utils.cpp
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
|
||||||
|
#include "perf_precomp.hpp"
|
||||||
|
|
||||||
|
namespace opencv_test {
|
||||||
|
|
||||||
|
using Utils_blobFromImage = TestBaseWithParam<std::vector<int>>;
|
||||||
|
PERF_TEST_P_(Utils_blobFromImage, HWC_TO_NCHW) {
|
||||||
|
std::vector<int> input_shape = GetParam();
|
||||||
|
|
||||||
|
Mat input(input_shape, CV_32FC3);
|
||||||
|
randu(input, -10.0f, 10.f);
|
||||||
|
|
||||||
|
TEST_CYCLE() {
|
||||||
|
Mat blob = blobFromImage(input);
|
||||||
|
}
|
||||||
|
|
||||||
|
SANITY_CHECK_NOTHING();
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(/**/, Utils_blobFromImage,
|
||||||
|
Values(std::vector<int>{ 32, 32},
|
||||||
|
std::vector<int>{ 64, 64},
|
||||||
|
std::vector<int>{ 128, 128},
|
||||||
|
std::vector<int>{ 256, 256},
|
||||||
|
std::vector<int>{ 512, 512},
|
||||||
|
std::vector<int>{1024, 1024},
|
||||||
|
std::vector<int>{2048, 2048})
|
||||||
|
);
|
||||||
|
|
||||||
|
using Utils_blobFromImages = TestBaseWithParam<std::vector<int>>;
|
||||||
|
PERF_TEST_P_(Utils_blobFromImages, HWC_TO_NCHW) {
|
||||||
|
std::vector<int> input_shape = GetParam();
|
||||||
|
int batch = input_shape.front();
|
||||||
|
std::vector<int> input_shape_no_batch(input_shape.begin()+1, input_shape.end());
|
||||||
|
|
||||||
|
std::vector<Mat> inputs;
|
||||||
|
for (int i = 0; i < batch; i++) {
|
||||||
|
Mat input(input_shape_no_batch, CV_32FC3);
|
||||||
|
randu(input, -10.0f, 10.f);
|
||||||
|
inputs.push_back(input);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CYCLE() {
|
||||||
|
Mat blobs = blobFromImages(inputs);
|
||||||
|
}
|
||||||
|
|
||||||
|
SANITY_CHECK_NOTHING();
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(/**/, Utils_blobFromImages,
|
||||||
|
Values(std::vector<int>{16, 32, 32},
|
||||||
|
std::vector<int>{16, 64, 64},
|
||||||
|
std::vector<int>{16, 128, 128},
|
||||||
|
std::vector<int>{16, 256, 256},
|
||||||
|
std::vector<int>{16, 512, 512},
|
||||||
|
std::vector<int>{16, 1024, 1024},
|
||||||
|
std::vector<int>{16, 2048, 2048})
|
||||||
|
);
|
||||||
|
|
||||||
|
}
|
@ -126,6 +126,111 @@ Mat blobFromImagesWithParams(InputArrayOfArrays images, const Image2BlobParams&
|
|||||||
return blob;
|
return blob;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename Tinp, typename Tout>
|
||||||
|
void blobFromImagesNCHWImpl(const std::vector<Mat>& images, Mat& blob_, const Image2BlobParams& param)
|
||||||
|
{
|
||||||
|
int w = images[0].cols;
|
||||||
|
int h = images[0].rows;
|
||||||
|
int wh = w * h;
|
||||||
|
int nch = images[0].channels();
|
||||||
|
CV_Assert(nch == 1 || nch == 3 || nch == 4);
|
||||||
|
int sz[] = { (int)images.size(), nch, h, w};
|
||||||
|
blob_.create(4, sz, param.ddepth);
|
||||||
|
|
||||||
|
for (size_t k = 0; k < images.size(); ++k)
|
||||||
|
{
|
||||||
|
CV_Assert(images[k].depth() == images[0].depth());
|
||||||
|
CV_Assert(images[k].channels() == images[0].channels());
|
||||||
|
CV_Assert(images[k].size() == images[0].size());
|
||||||
|
|
||||||
|
Tout* p_blob = blob_.ptr<Tout>() + k * nch * wh;
|
||||||
|
Tout* p_blob_r = p_blob;
|
||||||
|
Tout* p_blob_g = p_blob + wh;
|
||||||
|
Tout* p_blob_b = p_blob + 2 * wh;
|
||||||
|
Tout* p_blob_a = p_blob + 3 * wh;
|
||||||
|
|
||||||
|
if (param.swapRB)
|
||||||
|
std::swap(p_blob_r, p_blob_b);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < h; ++i)
|
||||||
|
{
|
||||||
|
const Tinp* p_img_row = images[k].ptr<Tinp>(i);
|
||||||
|
|
||||||
|
if (nch == 1)
|
||||||
|
{
|
||||||
|
for (size_t j = 0; j < w; ++j)
|
||||||
|
{
|
||||||
|
p_blob[i * w + j] = p_img_row[j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (nch == 3)
|
||||||
|
{
|
||||||
|
for (size_t j = 0; j < w; ++j)
|
||||||
|
{
|
||||||
|
p_blob_r[i * w + j] = p_img_row[j * 3 ];
|
||||||
|
p_blob_g[i * w + j] = p_img_row[j * 3 + 1];
|
||||||
|
p_blob_b[i * w + j] = p_img_row[j * 3 + 2];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else // if (nch == 4)
|
||||||
|
{
|
||||||
|
for (size_t j = 0; j < w; ++j)
|
||||||
|
{
|
||||||
|
p_blob_r[i * w + j] = p_img_row[j * 4 ];
|
||||||
|
p_blob_g[i * w + j] = p_img_row[j * 4 + 1];
|
||||||
|
p_blob_b[i * w + j] = p_img_row[j * 4 + 2];
|
||||||
|
p_blob_a[i * w + j] = p_img_row[j * 4 + 3];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (param.mean == Scalar() && param.scalefactor == Scalar::all(1.0))
|
||||||
|
return;
|
||||||
|
CV_CheckTypeEQ(param.ddepth, CV_32F, "Scaling and mean substraction is supported only for CV_32F blob depth");
|
||||||
|
|
||||||
|
for (size_t k = 0; k < images.size(); ++k)
|
||||||
|
{
|
||||||
|
for (size_t ch = 0; ch < nch; ++ch)
|
||||||
|
{
|
||||||
|
float cur_mean = param.mean[ch];
|
||||||
|
float cur_scale = param.scalefactor[ch];
|
||||||
|
Tout* p_blob = blob_.ptr<Tout>() + k * nch * wh + ch * wh;
|
||||||
|
for (size_t i = 0; i < wh; ++i)
|
||||||
|
{
|
||||||
|
p_blob[i] = (p_blob[i] - cur_mean) * cur_scale;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Tout>
|
||||||
|
void blobFromImagesNCHW(const std::vector<Mat>& images, Mat& blob_, const Image2BlobParams& param)
|
||||||
|
{
|
||||||
|
if (images[0].depth() == CV_8U)
|
||||||
|
blobFromImagesNCHWImpl<uint8_t, Tout>(images, blob_, param);
|
||||||
|
else if (images[0].depth() == CV_8S)
|
||||||
|
blobFromImagesNCHWImpl<int8_t, Tout>(images, blob_, param);
|
||||||
|
else if (images[0].depth() == CV_16U)
|
||||||
|
blobFromImagesNCHWImpl<uint16_t, Tout>(images, blob_, param);
|
||||||
|
else if (images[0].depth() == CV_16S)
|
||||||
|
blobFromImagesNCHWImpl<int16_t, Tout>(images, blob_, param);
|
||||||
|
else if (images[0].depth() == CV_32S)
|
||||||
|
blobFromImagesNCHWImpl<int32_t, Tout>(images, blob_, param);
|
||||||
|
else if (images[0].depth() == CV_32F)
|
||||||
|
blobFromImagesNCHWImpl<float, Tout>(images, blob_, param);
|
||||||
|
else if (images[0].depth() == CV_64F)
|
||||||
|
blobFromImagesNCHWImpl<double, Tout>(images, blob_, param);
|
||||||
|
else
|
||||||
|
CV_Error(Error::BadDepth, "Unsupported input image depth for blobFromImagesNCHW");
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Tout>
|
||||||
|
void blobFromImagesNCHW(const std::vector<UMat>& images, UMat& blob_, const Image2BlobParams& param)
|
||||||
|
{
|
||||||
|
CV_Error(Error::StsNotImplemented, "");
|
||||||
|
}
|
||||||
|
|
||||||
template<class Tmat>
|
template<class Tmat>
|
||||||
void blobFromImagesWithParamsImpl(InputArrayOfArrays images_, Tmat& blob_, const Image2BlobParams& param)
|
void blobFromImagesWithParamsImpl(InputArrayOfArrays images_, Tmat& blob_, const Image2BlobParams& param)
|
||||||
{
|
{
|
||||||
@ -154,19 +259,6 @@ void blobFromImagesWithParamsImpl(InputArrayOfArrays images_, Tmat& blob_, const
|
|||||||
Scalar scalefactor = param.scalefactor;
|
Scalar scalefactor = param.scalefactor;
|
||||||
Scalar mean = param.mean;
|
Scalar mean = param.mean;
|
||||||
|
|
||||||
if (param.swapRB)
|
|
||||||
{
|
|
||||||
if (nch > 2)
|
|
||||||
{
|
|
||||||
std::swap(mean[0], mean[2]);
|
|
||||||
std::swap(scalefactor[0], scalefactor[2]);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
CV_LOG_WARNING(NULL, "Red/blue color swapping requires at least three image channels.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t i = 0; i < images.size(); i++)
|
for (size_t i = 0; i < images.size(); i++)
|
||||||
{
|
{
|
||||||
Size imgSize = images[i].size();
|
Size imgSize = images[i].size();
|
||||||
@ -203,18 +295,35 @@ void blobFromImagesWithParamsImpl(InputArrayOfArrays images_, Tmat& blob_, const
|
|||||||
resize(images[i], images[i], size, 0, 0, INTER_LINEAR);
|
resize(images[i], images[i], size, 0, 0, INTER_LINEAR);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (images[i].depth() == CV_8U && param.ddepth == CV_32F)
|
|
||||||
images[i].convertTo(images[i], CV_32F);
|
|
||||||
|
|
||||||
subtract(images[i], mean, images[i]);
|
|
||||||
multiply(images[i], scalefactor, images[i]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t nimages = images.size();
|
size_t nimages = images.size();
|
||||||
Tmat image0 = images[0];
|
Tmat image0 = images[0];
|
||||||
CV_Assert(image0.dims == 2);
|
CV_Assert(image0.dims == 2);
|
||||||
|
|
||||||
|
if (std::is_same<Tmat, Mat>::value && param.datalayout == DNN_LAYOUT_NCHW)
|
||||||
|
{
|
||||||
|
// Fast implementation for HWC cv::Mat images -> NCHW cv::Mat blob
|
||||||
|
if (param.ddepth == CV_8U)
|
||||||
|
blobFromImagesNCHW<uint8_t>(images, blob_, param);
|
||||||
|
else
|
||||||
|
blobFromImagesNCHW<float>(images, blob_, param);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (param.swapRB)
|
||||||
|
{
|
||||||
|
if (nch > 2)
|
||||||
|
{
|
||||||
|
std::swap(mean[0], mean[2]);
|
||||||
|
std::swap(scalefactor[0], scalefactor[2]);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
CV_LOG_WARNING(NULL, "Red/blue color swapping requires at least three image channels.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (param.datalayout == DNN_LAYOUT_NCHW)
|
if (param.datalayout == DNN_LAYOUT_NCHW)
|
||||||
{
|
{
|
||||||
if (nch == 3 || nch == 4)
|
if (nch == 3 || nch == 4)
|
||||||
@ -225,7 +334,14 @@ void blobFromImagesWithParamsImpl(InputArrayOfArrays images_, Tmat& blob_, const
|
|||||||
|
|
||||||
for (size_t i = 0; i < nimages; i++)
|
for (size_t i = 0; i < nimages; i++)
|
||||||
{
|
{
|
||||||
const Tmat& image = images[i];
|
Tmat& image = images[i];
|
||||||
|
if (image.depth() == CV_8U && param.ddepth == CV_32F)
|
||||||
|
image.convertTo(image, CV_32F);
|
||||||
|
if (mean != Scalar())
|
||||||
|
subtract(image, mean, image);
|
||||||
|
if (scalefactor != Scalar::all(1.0))
|
||||||
|
multiply(image, scalefactor, image);
|
||||||
|
|
||||||
CV_Assert(image.depth() == blob_.depth());
|
CV_Assert(image.depth() == blob_.depth());
|
||||||
nch = image.channels();
|
nch = image.channels();
|
||||||
CV_Assert(image.dims == 2 && (nch == 3 || nch == 4));
|
CV_Assert(image.dims == 2 && (nch == 3 || nch == 4));
|
||||||
@ -250,7 +366,14 @@ void blobFromImagesWithParamsImpl(InputArrayOfArrays images_, Tmat& blob_, const
|
|||||||
|
|
||||||
for (size_t i = 0; i < nimages; i++)
|
for (size_t i = 0; i < nimages; i++)
|
||||||
{
|
{
|
||||||
const Tmat& image = images[i];
|
Tmat& image = images[i];
|
||||||
|
if (image.depth() == CV_8U && param.ddepth == CV_32F)
|
||||||
|
image.convertTo(image, CV_32F);
|
||||||
|
if (mean != Scalar())
|
||||||
|
subtract(image, mean, image);
|
||||||
|
if (scalefactor != Scalar::all(1.0))
|
||||||
|
multiply(image, scalefactor, image);
|
||||||
|
|
||||||
CV_Assert(image.depth() == blob_.depth());
|
CV_Assert(image.depth() == blob_.depth());
|
||||||
nch = image.channels();
|
nch = image.channels();
|
||||||
CV_Assert(image.dims == 2 && (nch == 1));
|
CV_Assert(image.dims == 2 && (nch == 1));
|
||||||
@ -269,7 +392,14 @@ void blobFromImagesWithParamsImpl(InputArrayOfArrays images_, Tmat& blob_, const
|
|||||||
int subMatType = CV_MAKETYPE(param.ddepth, nch);
|
int subMatType = CV_MAKETYPE(param.ddepth, nch);
|
||||||
for (size_t i = 0; i < nimages; i++)
|
for (size_t i = 0; i < nimages; i++)
|
||||||
{
|
{
|
||||||
const Tmat& image = images[i];
|
Tmat& image = images[i];
|
||||||
|
if (image.depth() == CV_8U && param.ddepth == CV_32F)
|
||||||
|
image.convertTo(image, CV_32F);
|
||||||
|
if (mean != Scalar())
|
||||||
|
subtract(image, mean, image);
|
||||||
|
if (scalefactor != Scalar::all(1.0))
|
||||||
|
multiply(image, scalefactor, image);
|
||||||
|
|
||||||
CV_Assert(image.depth() == blob_.depth());
|
CV_Assert(image.depth() == blob_.depth());
|
||||||
CV_Assert(image.channels() == image0.channels());
|
CV_Assert(image.channels() == image0.channels());
|
||||||
CV_Assert(image.size() == image0.size());
|
CV_Assert(image.size() == image0.size());
|
||||||
|
Loading…
Reference in New Issue
Block a user