Merge pull request #22750 from zihaomu:improve_blobFromImage

DNN: Add New API blobFromImageParam #22750

The purpose of this PR:

1. Add new API `blobFromImageParam` to extend `blobFromImage` API. It can support the different data layout (NCHW or NHWC), and letter_box.
2. ~~`blobFromImage` can output `CV_16F`~~

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
Zihao Mu 2023-04-22 00:10:17 +08:00 committed by GitHub
parent 810096c276
commit 601778e0e6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 408 additions and 152 deletions

View File

@ -108,6 +108,21 @@ CV__DNN_INLINE_NS_BEGIN
DNN_TARGET_NPU, DNN_TARGET_NPU,
}; };
/**
* @brief Enum of data layout for model inference.
* @see Image2BlobParams
*/
enum DataLayout
{
DNN_LAYOUT_UNKNOWN = 0,
DNN_LAYOUT_ND = 1, //!< OpenCV data layout for 2D data.
DNN_LAYOUT_NCHW = 2, //!< OpenCV data layout for 4D data.
DNN_LAYOUT_NCDHW = 3, //!< OpenCV data layout for 5D data.
DNN_LAYOUT_NHWC = 4, //!< Tensorflow-like data layout for 4D data.
DNN_LAYOUT_NDHWC = 5, //!< Tensorflow-like data layout for 5D data.
DNN_LAYOUT_PLANAR = 6, //!< Tensorflow-like data layout, it should only be used at tf or tflite model parsing.
};
CV_EXPORTS std::vector< std::pair<Backend, Target> > getAvailableBackends(); CV_EXPORTS std::vector< std::pair<Backend, Target> > getAvailableBackends();
CV_EXPORTS_W std::vector<Target> getAvailableTargets(dnn::Backend be); CV_EXPORTS_W std::vector<Target> getAvailableTargets(dnn::Backend be);
@ -1111,10 +1126,10 @@ CV__DNN_INLINE_NS_BEGIN
/** @brief Creates 4-dimensional blob from image. Optionally resizes and crops @p image from center, /** @brief Creates 4-dimensional blob from image. Optionally resizes and crops @p image from center,
* subtract @p mean values, scales values by @p scalefactor, swap Blue and Red channels. * subtract @p mean values, scales values by @p scalefactor, swap Blue and Red channels.
* @param image input image (with 1-, 3- or 4-channels). * @param image input image (with 1-, 3- or 4-channels).
* @param scalefactor multiplier for @p images values.
* @param size spatial size for output image * @param size spatial size for output image
* @param mean scalar with mean values which are subtracted from channels. Values are intended * @param mean scalar with mean values which are subtracted from channels. Values are intended
* to be in (mean-R, mean-G, mean-B) order if @p image has BGR ordering and @p swapRB is true. * to be in (mean-R, mean-G, mean-B) order if @p image has BGR ordering and @p swapRB is true.
* @param scalefactor multiplier for @p image values.
* @param swapRB flag which indicates that swap first and last channels * @param swapRB flag which indicates that swap first and last channels
* in 3-channel image is necessary. * in 3-channel image is necessary.
* @param crop flag which indicates whether image will be cropped after resize or not * @param crop flag which indicates whether image will be cropped after resize or not
@ -1123,6 +1138,9 @@ CV__DNN_INLINE_NS_BEGIN
* dimension in @p size and another one is equal or larger. Then, crop from the center is performed. * dimension in @p size and another one is equal or larger. Then, crop from the center is performed.
* If @p crop is false, direct resize without cropping and preserving aspect ratio is performed. * If @p crop is false, direct resize without cropping and preserving aspect ratio is performed.
* @returns 4-dimensional Mat with NCHW dimensions order. * @returns 4-dimensional Mat with NCHW dimensions order.
*
* @note
* The order and usage of `scalefactor` and `mean` are (input - mean) * scalefactor.
*/ */
CV_EXPORTS_W Mat blobFromImage(InputArray image, double scalefactor=1.0, const Size& size = Size(), CV_EXPORTS_W Mat blobFromImage(InputArray image, double scalefactor=1.0, const Size& size = Size(),
const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false, const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false,
@ -1153,6 +1171,9 @@ CV__DNN_INLINE_NS_BEGIN
* dimension in @p size and another one is equal or larger. Then, crop from the center is performed. * dimension in @p size and another one is equal or larger. Then, crop from the center is performed.
* If @p crop is false, direct resize without cropping and preserving aspect ratio is performed. * If @p crop is false, direct resize without cropping and preserving aspect ratio is performed.
* @returns 4-dimensional Mat with NCHW dimensions order. * @returns 4-dimensional Mat with NCHW dimensions order.
*
* @note
* The order and usage of `scalefactor` and `mean` are (input - mean) * scalefactor.
*/ */
CV_EXPORTS_W Mat blobFromImages(InputArrayOfArrays images, double scalefactor=1.0, CV_EXPORTS_W Mat blobFromImages(InputArrayOfArrays images, double scalefactor=1.0,
Size size = Size(), const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false, Size size = Size(), const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false,
@ -1167,6 +1188,74 @@ CV__DNN_INLINE_NS_BEGIN
const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false, const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false,
int ddepth=CV_32F); int ddepth=CV_32F);
/**
* @brief Enum of image processing mode.
* To facilitate the specialization pre-processing requirements of the dnn model.
* For example, the `letter box` often used in the Yolo series of models.
* @see Image2BlobParams
*/
enum ImagePaddingMode
{
DNN_PMODE_NULL = 0, // !< Default. Resize to required input size without extra processing.
DNN_PMODE_CROP_CENTER = 1, // !< Image will be cropped after resize.
DNN_PMODE_LETTERBOX = 2, // !< Resize image to the desired size while preserving the aspect ratio of original image.
};
/** @brief Processing params of image to blob.
*
* It includes all possible image processing operations and corresponding parameters.
*
* @see blobFromImageWithParams
*
* @note
* The order and usage of `scalefactor` and `mean` are (input - mean) * scalefactor.
* The order and usage of `scalefactor`, `size`, `mean`, `swapRB`, and `ddepth` are consistent
* with the function of @ref blobFromImage.
*/
struct CV_EXPORTS_W_SIMPLE Image2BlobParams
{
CV_WRAP Image2BlobParams();
CV_WRAP Image2BlobParams(const Scalar& scalefactor, const Size& size = Size(), const Scalar& mean = Scalar(),
bool swapRB = false, int ddepth = CV_32F, DataLayout datalayout = DNN_LAYOUT_NCHW,
ImagePaddingMode mode = DNN_PMODE_NULL);
CV_PROP_RW Scalar scalefactor; //!< scalefactor multiplier for input image values.
CV_PROP_RW Size size; //!< Spatial size for output image.
CV_PROP_RW Scalar mean; //!< Scalar with mean values which are subtracted from channels.
CV_PROP_RW bool swapRB; //!< Flag which indicates that swap first and last channels
CV_PROP_RW int ddepth; //!< Depth of output blob. Choose CV_32F or CV_8U.
CV_PROP_RW DataLayout datalayout; //!< Order of output dimensions. Choose DNN_LAYOUT_NCHW or DNN_LAYOUT_NHWC.
CV_PROP_RW ImagePaddingMode paddingmode; //!< Image padding mode. @see ImagePaddingMode.
};
/** @brief Creates 4-dimensional blob from image with given params.
*
* @details This function is an extension of @ref blobFromImage to meet more image preprocess needs.
* Given input image and preprocessing parameters, and function outputs the blob.
*
* @param image input image (all with 1-, 3- or 4-channels).
* @param param struct of Image2BlobParams, contains all parameters needed by processing of image to blob.
* @return 4-dimensional Mat.
*/
CV_EXPORTS_W Mat blobFromImageWithParams(InputArray image, const Image2BlobParams& param = Image2BlobParams());
/** @overload */
CV_EXPORTS_W void blobFromImageWithParams(InputArray image, OutputArray blob, const Image2BlobParams& param = Image2BlobParams());
/** @brief Creates 4-dimensional blob from series of images with given params.
*
* @details This function is an extension of @ref blobFromImages to meet more image preprocess needs.
* Given input image and preprocessing parameters, and function outputs the blob.
*
* @param images input image (all with 1-, 3- or 4-channels).
* @param param struct of Image2BlobParams, contains all parameters needed by processing of image to blob.
* @returns 4-dimensional Mat.
*/
CV_EXPORTS_W Mat blobFromImagesWithParams(InputArrayOfArrays images, const Image2BlobParams& param = Image2BlobParams());
/** @overload */
CV_EXPORTS_W void blobFromImagesWithParams(InputArrayOfArrays images, OutputArray blob, const Image2BlobParams& param = Image2BlobParams());
/** @brief Parse a 4D blob and output the images it contains as 2D arrays through a simpler data structure /** @brief Parse a 4D blob and output the images it contains as 2D arrays through a simpler data structure
* (std::vector<cv::Mat>). * (std::vector<cv::Mat>).
* @param[in] blob_ 4 dimensional array (images, channels, height, width) in floating point precision (CV_32F) from * @param[in] blob_ 4 dimensional array (images, channels, height, width) in floating point precision (CV_32F) from

View File

@ -119,7 +119,7 @@ class dnn_test(NewOpenCVTests):
inp = np.random.standard_normal([1, 2, 10, 11]).astype(np.float32) inp = np.random.standard_normal([1, 2, 10, 11]).astype(np.float32)
net.setInput(inp) net.setInput(inp)
net.forward() net.forward()
except BaseException as e: except BaseException:
return False return False
return True return True
@ -153,6 +153,41 @@ class dnn_test(NewOpenCVTests):
target = target.transpose(2, 0, 1).reshape(1, 3, height, width) # to NCHW target = target.transpose(2, 0, 1).reshape(1, 3, height, width) # to NCHW
normAssert(self, blob, target) normAssert(self, blob, target)
def test_blobFromImageWithParams(self):
np.random.seed(324)
width = 6
height = 7
stddev = np.array([0.2, 0.3, 0.4])
scalefactor = 1.0/127.5 * stddev
mean = (10, 20, 30)
# Test arguments names.
img = np.random.randint(0, 255, [4, 5, 3]).astype(np.uint8)
param = cv.dnn.Image2BlobParams()
param.scalefactor = scalefactor
param.size = (6, 7)
param.mean = mean
param.swapRB=True
param.datalayout = cv.dnn.DNN_LAYOUT_NHWC
blob = cv.dnn.blobFromImageWithParams(img, param)
blob_args = cv.dnn.blobFromImageWithParams(img, cv.dnn.Image2BlobParams(scalefactor=scalefactor, size=(6, 7), mean=mean,
swapRB=True, datalayout=cv.dnn.DNN_LAYOUT_NHWC))
normAssert(self, blob, blob_args)
target2 = cv.resize(img, (width, height), interpolation=cv.INTER_LINEAR).astype(np.float32)
target2 = target2[:,:,[2, 1, 0]] # BGR2RGB
target2[:,:,0] -= mean[0]
target2[:,:,1] -= mean[1]
target2[:,:,2] -= mean[2]
target2[:,:,0] *= scalefactor[0]
target2[:,:,1] *= scalefactor[1]
target2[:,:,2] *= scalefactor[2]
target2 = target2.reshape(1, height, width, 3) # to NHWC
normAssert(self, blob, target2)
def test_model(self): def test_model(self):
img_path = self.find_dnn_file("dnn/street.png") img_path = self.find_dnn_file("dnn/street.png")

View File

@ -11,8 +11,17 @@ namespace cv {
namespace dnn { namespace dnn {
CV__DNN_INLINE_NS_BEGIN CV__DNN_INLINE_NS_BEGIN
Image2BlobParams::Image2BlobParams():scalefactor(Scalar::all(1.0)), size(Size()), mean(Scalar()), swapRB(false), ddepth(CV_32F),
datalayout(DNN_LAYOUT_NCHW), paddingmode(DNN_PMODE_NULL)
{}
Mat blobFromImage(InputArray image, double scalefactor, const Size& size, Image2BlobParams::Image2BlobParams(const Scalar& scalefactor_, const Size& size_, const Scalar& mean_, bool swapRB_,
int ddepth_, DataLayout datalayout_, ImagePaddingMode mode_):
scalefactor(scalefactor_), size(size_), mean(mean_), swapRB(swapRB_), ddepth(ddepth_),
datalayout(datalayout_), paddingmode(mode_)
{}
Mat blobFromImage(InputArray image, const double scalefactor, const Size& size,
const Scalar& mean, bool swapRB, bool crop, int ddepth) const Scalar& mean, bool swapRB, bool crop, int ddepth)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
@ -42,16 +51,55 @@ void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalef
Size size, const Scalar& mean_, bool swapRB, bool crop, int ddepth) Size size, const Scalar& mean_, bool swapRB, bool crop, int ddepth)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
CV_CheckType(ddepth, ddepth == CV_32F || ddepth == CV_8U, "Blob depth should be CV_32F or CV_8U"); Image2BlobParams param(Scalar::all(scalefactor), size, mean_, swapRB, ddepth);
if (ddepth == CV_8U) if (crop)
{ param.paddingmode = DNN_PMODE_CROP_CENTER;
CV_CheckEQ(scalefactor, 1.0, "Scaling is not supported for CV_8U blob depth"); blobFromImagesWithParams(images_, blob_, param);
CV_Assert(mean_ == Scalar() && "Mean subtraction is not supported for CV_8U blob depth"); }
}
Mat blobFromImageWithParams(InputArray image, const Image2BlobParams& param)
{
CV_TRACE_FUNCTION();
Mat blob;
blobFromImageWithParams(image, blob, param);
return blob;
}
void blobFromImageWithParams(InputArray image, OutputArray blob, const Image2BlobParams& param)
{
CV_TRACE_FUNCTION();
std::vector<Mat> images(1, image.getMat());
blobFromImagesWithParams(images, blob, param);
}
Mat blobFromImagesWithParams(InputArrayOfArrays images, const Image2BlobParams& param)
{
CV_TRACE_FUNCTION();
Mat blob;
blobFromImagesWithParams(images, blob, param);
return blob;
}
void blobFromImagesWithParams(InputArrayOfArrays images_, OutputArray blob_, const Image2BlobParams& param)
{
CV_TRACE_FUNCTION();
CV_CheckType(param.ddepth, param.ddepth == CV_32F || param.ddepth == CV_8U,
"Blob depth should be CV_32F or CV_8U");
Size size = param.size;
std::vector<Mat> images; std::vector<Mat> images;
images_.getMatVector(images); images_.getMatVector(images);
CV_Assert(!images.empty()); CV_Assert(!images.empty());
int nch = images[0].channels();
Scalar scalefactor = param.scalefactor;
if (param.ddepth == CV_8U)
{
CV_Assert(scalefactor == Scalar::all(1.0) && "Scaling is not supported for CV_8U blob depth");
CV_Assert(param.mean == Scalar() && "Mean subtraction is not supported for CV_8U blob depth");
}
for (size_t i = 0; i < images.size(); i++) for (size_t i = 0; i < images.size(); i++)
{ {
Size imgSize = images[i].size(); Size imgSize = images[i].size();
@ -59,73 +107,122 @@ void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalef
size = imgSize; size = imgSize;
if (size != imgSize) if (size != imgSize)
{ {
if (crop) if (param.paddingmode == DNN_PMODE_CROP_CENTER)
{ {
float resizeFactor = std::max(size.width / (float)imgSize.width, float resizeFactor = std::max(size.width / (float)imgSize.width,
size.height / (float)imgSize.height); size.height / (float)imgSize.height);
resize(images[i], images[i], Size(), resizeFactor, resizeFactor, INTER_LINEAR); resize(images[i], images[i], Size(), resizeFactor, resizeFactor, INTER_LINEAR);
Rect crop(Point(0.5 * (images[i].cols - size.width), Rect crop(Point(0.5 * (images[i].cols - size.width),
0.5 * (images[i].rows - size.height)), 0.5 * (images[i].rows - size.height)),
size); size);
images[i] = images[i](crop); images[i] = images[i](crop);
} }
else else
resize(images[i], images[i], size, 0, 0, INTER_LINEAR); {
if (param.paddingmode == DNN_PMODE_LETTERBOX)
{
float resizeFactor = std::min(size.width / (float)imgSize.width,
size.height / (float)imgSize.height);
int rh = int(imgSize.height * resizeFactor);
int rw = int(imgSize.width * resizeFactor);
resize(images[i], images[i], Size(rw, rh), INTER_LINEAR);
int top = (size.height - rh)/2;
int bottom = size.height - top - rh;
int left = (size.width - rw)/2;
int right = size.width - left - rw;
copyMakeBorder(images[i], images[i], top, bottom, left, right, BORDER_CONSTANT);
}
else
resize(images[i], images[i], size, 0, 0, INTER_LINEAR);
}
} }
if (images[i].depth() == CV_8U && ddepth == CV_32F)
images[i].convertTo(images[i], CV_32F); Scalar mean = param.mean;
Scalar mean = mean_; if (param.swapRB)
if (swapRB) {
std::swap(mean[0], mean[2]); std::swap(mean[0], mean[2]);
std::swap(scalefactor[0], scalefactor[2]);
}
if (images[i].depth() == CV_8U && param.ddepth == CV_32F)
images[i].convertTo(images[i], CV_32F);
images[i] -= mean; images[i] -= mean;
images[i] *= scalefactor; multiply(images[i], scalefactor, images[i]);
} }
size_t nimages = images.size(); size_t nimages = images.size();
Mat image0 = images[0]; Mat image0 = images[0];
int nch = image0.channels();
CV_Assert(image0.dims == 2); CV_Assert(image0.dims == 2);
if (nch == 3 || nch == 4)
{
int sz[] = { (int)nimages, nch, image0.rows, image0.cols };
blob_.create(4, sz, ddepth);
Mat blob = blob_.getMat();
Mat ch[4];
if (param.datalayout == DNN_LAYOUT_NCHW)
{
if (nch == 3 || nch == 4)
{
int sz[] = { (int)nimages, nch, image0.rows, image0.cols };
blob_.create(4, sz, param.ddepth);
Mat blob = blob_.getMat();
Mat ch[4];
for (size_t i = 0; i < nimages; i++)
{
const Mat& image = images[i];
CV_Assert(image.depth() == blob_.depth());
nch = image.channels();
CV_Assert(image.dims == 2 && (nch == 3 || nch == 4));
CV_Assert(image.size() == image0.size());
for (int j = 0; j < nch; j++)
ch[j] = Mat(image.rows, image.cols, param.ddepth, blob.ptr((int)i, j));
if (param.swapRB)
std::swap(ch[0], ch[2]);
split(image, ch);
}
}
else
{
CV_Assert(nch == 1);
int sz[] = { (int)nimages, 1, image0.rows, image0.cols };
blob_.create(4, sz, param.ddepth);
Mat blob = blob_.getMat();
for (size_t i = 0; i < nimages; i++)
{
const Mat& image = images[i];
CV_Assert(image.depth() == blob_.depth());
nch = image.channels();
CV_Assert(image.dims == 2 && (nch == 1));
CV_Assert(image.size() == image0.size());
image.copyTo(Mat(image.rows, image.cols, param.ddepth, blob.ptr((int)i, 0)));
}
}
}
else if (param.datalayout == DNN_LAYOUT_NHWC)
{
int sz[] = { (int)nimages, image0.rows, image0.cols, nch};
blob_.create(4, sz, param.ddepth);
Mat blob = blob_.getMat();
int subMatType = CV_MAKETYPE(param.ddepth, nch);
for (size_t i = 0; i < nimages; i++) for (size_t i = 0; i < nimages; i++)
{ {
const Mat& image = images[i]; const Mat& image = images[i];
CV_Assert(image.depth() == blob_.depth()); CV_Assert(image.depth() == blob_.depth());
nch = image.channels(); CV_Assert(image.channels() == image0.channels());
CV_Assert(image.dims == 2 && (nch == 3 || nch == 4));
CV_Assert(image.size() == image0.size()); CV_Assert(image.size() == image0.size());
if (param.swapRB)
for (int j = 0; j < nch; j++) {
ch[j] = Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, j)); Mat tmpRB;
if (swapRB) cvtColor(image, tmpRB, COLOR_BGR2RGB);
std::swap(ch[0], ch[2]); tmpRB.copyTo(Mat(tmpRB.rows, tmpRB.cols, subMatType, blob.ptr((int)i, 0)));
split(image, ch); }
else
image.copyTo(Mat(image.rows, image.cols, subMatType, blob.ptr((int)i, 0)));
} }
} }
else else
{ CV_Error(Error::StsUnsupportedFormat, "Unsupported data layout in blobFromImagesWithParams function.");
CV_Assert(nch == 1);
int sz[] = { (int)nimages, 1, image0.rows, image0.cols };
blob_.create(4, sz, ddepth);
Mat blob = blob_.getMat();
for (size_t i = 0; i < nimages; i++)
{
const Mat& image = images[i];
CV_Assert(image.depth() == blob_.depth());
nch = image.channels();
CV_Assert(image.dims == 2 && (nch == 1));
CV_Assert(image.size() == image0.size());
image.copyTo(Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, 0)));
}
}
} }
void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_) void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_)

View File

@ -64,16 +64,6 @@ static int toNCDHW(int idx)
else return (5 + idx) % 4 + 1; else return (5 + idx) % 4 + 1;
} }
// This values are used to indicate layer output's data layout where it's possible.
enum DataLayout
{
DATA_LAYOUT_NHWC,
DATA_LAYOUT_NCHW,
DATA_LAYOUT_NDHWC,
DATA_LAYOUT_UNKNOWN,
DATA_LAYOUT_PLANAR // 2-dimensional outputs (matmul, flatten, reshape to 2d)
};
typedef std::vector<std::pair<String, int> > StrIntVector; typedef std::vector<std::pair<String, int> > StrIntVector;
struct Pin struct Pin
@ -276,15 +266,15 @@ static DataLayout getDataLayout(const tensorflow::NodeDef& layer)
{ {
std::string format = getLayerAttr(layer, "data_format").s(); std::string format = getLayerAttr(layer, "data_format").s();
if (format == "NHWC" || format == "channels_last") if (format == "NHWC" || format == "channels_last")
return DATA_LAYOUT_NHWC; return DNN_LAYOUT_NHWC;
else if (format == "NCHW" || format == "channels_first") else if (format == "NCHW" || format == "channels_first")
return DATA_LAYOUT_NCHW; return DNN_LAYOUT_NCHW;
else if (format == "NDHWC") else if (format == "NDHWC")
return DATA_LAYOUT_NDHWC; return DNN_LAYOUT_NDHWC;
else else
CV_Error(Error::StsParseError, "Unknown data_format value: " + format); CV_Error(Error::StsParseError, "Unknown data_format value: " + format);
} }
return DATA_LAYOUT_UNKNOWN; return DNN_LAYOUT_UNKNOWN;
} }
static inline std::string getNodeName(const std::string& tensorName) static inline std::string getNodeName(const std::string& tensorName)
@ -299,7 +289,7 @@ DataLayout getDataLayout(
) )
{ {
std::map<String, DataLayout>::const_iterator it = data_layouts.find(getNodeName(layerName)); std::map<String, DataLayout>::const_iterator it = data_layouts.find(getNodeName(layerName));
return it != data_layouts.end() ? it->second : DATA_LAYOUT_UNKNOWN; return it != data_layouts.end() ? it->second : DNN_LAYOUT_UNKNOWN;
} }
static static
@ -325,11 +315,11 @@ void setStrides(LayerParams &layerParams, const tensorflow::NodeDef &layer)
const tensorflow::AttrValue& val = getLayerAttr(layer, "strides"); const tensorflow::AttrValue& val = getLayerAttr(layer, "strides");
int dimX, dimY, dimC, dimD; int dimX, dimY, dimC, dimD;
int layout = getDataLayout(layer); int layout = getDataLayout(layer);
if (layout == DATA_LAYOUT_NCHW) if (layout == DNN_LAYOUT_NCHW)
{ {
dimC = 1; dimY = 2; dimX = 3; dimC = 1; dimY = 2; dimX = 3;
} }
else if (layout == DATA_LAYOUT_NDHWC) else if (layout == DNN_LAYOUT_NDHWC)
{ {
dimD = 1; dimY = 2; dimX = 3; dimC = 4; dimD = 1; dimY = 2; dimX = 3; dimC = 4;
} }
@ -340,7 +330,7 @@ void setStrides(LayerParams &layerParams, const tensorflow::NodeDef &layer)
if (!(val.list().i_size() == 4 || val.list().i_size() == 5) || if (!(val.list().i_size() == 4 || val.list().i_size() == 5) ||
val.list().i(0) != 1 || val.list().i(dimC) != 1) val.list().i(0) != 1 || val.list().i(dimC) != 1)
CV_Error(Error::StsError, "Unsupported strides"); CV_Error(Error::StsError, "Unsupported strides");
if (layout == DATA_LAYOUT_NDHWC) { if (layout == DNN_LAYOUT_NDHWC) {
int strides[] = {static_cast<int>(val.list().i(dimD)), int strides[] = {static_cast<int>(val.list().i(dimD)),
static_cast<int>(val.list().i(dimY)), static_cast<int>(val.list().i(dimY)),
static_cast<int>(val.list().i(dimX))}; static_cast<int>(val.list().i(dimX))};
@ -375,11 +365,11 @@ void setKSize(LayerParams &layerParams, const tensorflow::NodeDef &layer)
const tensorflow::AttrValue& val = getLayerAttr(layer, "ksize"); const tensorflow::AttrValue& val = getLayerAttr(layer, "ksize");
int dimX, dimY, dimC, dimD; int dimX, dimY, dimC, dimD;
int layout = getDataLayout(layer); int layout = getDataLayout(layer);
if (layout == DATA_LAYOUT_NCHW) if (layout == DNN_LAYOUT_NCHW)
{ {
dimC = 1; dimY = 2; dimX = 3; dimC = 1; dimY = 2; dimX = 3;
} }
else if (layout == DATA_LAYOUT_NDHWC) else if (layout == DNN_LAYOUT_NDHWC)
{ {
dimD = 1; dimY = 2; dimX = 3; dimC = 4; dimD = 1; dimY = 2; dimX = 3; dimC = 4;
} }
@ -391,7 +381,7 @@ void setKSize(LayerParams &layerParams, const tensorflow::NodeDef &layer)
val.list().i(0) != 1 || val.list().i(dimC) != 1) val.list().i(0) != 1 || val.list().i(dimC) != 1)
CV_Error(Error::StsError, "Unsupported ksize"); CV_Error(Error::StsError, "Unsupported ksize");
if (layout == DATA_LAYOUT_NDHWC) { if (layout == DNN_LAYOUT_NDHWC) {
int kernel[] = {static_cast<int>(val.list().i(dimD)), int kernel[] = {static_cast<int>(val.list().i(dimD)),
static_cast<int>(val.list().i(dimY)), static_cast<int>(val.list().i(dimY)),
static_cast<int>(val.list().i(dimX))}; static_cast<int>(val.list().i(dimX))};
@ -438,7 +428,7 @@ bool getExplicitPadding(LayerParams &layerParams, const tensorflow::NodeDef &lay
pads[i] = protoPads.list().i(i); pads[i] = protoPads.list().i(i);
} }
if (getDataLayout(layer) != DATA_LAYOUT_NCHW) if (getDataLayout(layer) != DNN_LAYOUT_NCHW)
{ {
CV_LOG_DEBUG(NULL, "DNN/TF: Data format " << getLayerAttr(layer, "data_format").s() << ", assuming NHWC."); CV_LOG_DEBUG(NULL, "DNN/TF: Data format " << getLayerAttr(layer, "data_format").s() << ", assuming NHWC.");
// Perhaps, we have NHWC padding dimensions order. // Perhaps, we have NHWC padding dimensions order.
@ -903,8 +893,8 @@ void TFImporter::parseConvolution(tensorflow::GraphDef& net, const tensorflow::N
connect(layer_id, dstNet, parsePin(input), id, 0); connect(layer_id, dstNet, parsePin(input), id, 0);
if (getDataLayout(name, data_layouts) == DATA_LAYOUT_UNKNOWN) if (getDataLayout(name, data_layouts) == DNN_LAYOUT_UNKNOWN)
data_layouts[name] = DATA_LAYOUT_NHWC; data_layouts[name] = DNN_LAYOUT_NHWC;
} }
// "BiasAdd" "Add" "AddV2" "Sub" "AddN" // "BiasAdd" "Add" "AddV2" "Sub" "AddN"
@ -1072,7 +1062,7 @@ void TFImporter::parseMatMul(tensorflow::GraphDef& net, const tensorflow::NodeDe
// one input only // one input only
int input_blob_index = kernel_blob_index == 0 ? 1 : 0; int input_blob_index = kernel_blob_index == 0 ? 1 : 0;
connect(layer_id, dstNet, parsePin(layer.input(input_blob_index)), id, 0); connect(layer_id, dstNet, parsePin(layer.input(input_blob_index)), id, 0);
data_layouts[name] = DATA_LAYOUT_PLANAR; data_layouts[name] = DNN_LAYOUT_PLANAR;
} }
void TFImporter::parseReshape(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) void TFImporter::parseReshape(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams)
@ -1100,7 +1090,7 @@ void TFImporter::parseReshape(tensorflow::GraphDef& net, const tensorflow::NodeD
bool changedType{false}; bool changedType{false};
if (inpLayout == DATA_LAYOUT_NHWC) if (inpLayout == DNN_LAYOUT_NHWC)
{ {
if (newShapeSize >= 2 || newShape.at<int>(1) == 1) if (newShapeSize >= 2 || newShape.at<int>(1) == 1)
{ {
@ -1108,11 +1098,11 @@ void TFImporter::parseReshape(tensorflow::GraphDef& net, const tensorflow::NodeD
addPermuteLayer(order, name + "/nhwc", inpId); addPermuteLayer(order, name + "/nhwc", inpId);
if (newShapeSize < 4) if (newShapeSize < 4)
{ {
inpLayout = DATA_LAYOUT_NCHW; inpLayout = DNN_LAYOUT_NCHW;
} }
else else
{ {
inpLayout = DATA_LAYOUT_NHWC; inpLayout = DNN_LAYOUT_NHWC;
changedType = newShapeSize == 4 && !hasSwap; changedType = newShapeSize == 4 && !hasSwap;
} }
} }
@ -1128,17 +1118,17 @@ void TFImporter::parseReshape(tensorflow::GraphDef& net, const tensorflow::NodeD
connect(layer_id, dstNet, inpId, id, 0); connect(layer_id, dstNet, inpId, id, 0);
inpId = Pin(setName); inpId = Pin(setName);
if ((inpLayout == DATA_LAYOUT_NHWC || inpLayout == DATA_LAYOUT_UNKNOWN || inpLayout == DATA_LAYOUT_PLANAR) && if ((inpLayout == DNN_LAYOUT_NHWC || inpLayout == DNN_LAYOUT_UNKNOWN || inpLayout == DNN_LAYOUT_PLANAR) &&
newShapeSize == 4 && !hasSwap) newShapeSize == 4 && !hasSwap)
{ {
int order[] = {0, 3, 1, 2}; // Transform back to OpenCV's NCHW. int order[] = {0, 3, 1, 2}; // Transform back to OpenCV's NCHW.
setName = changedType ? name : name + "/nchw"; setName = changedType ? name : name + "/nchw";
addPermuteLayer(order, setName, inpId); addPermuteLayer(order, setName, inpId);
inpLayout = DATA_LAYOUT_NCHW; inpLayout = DNN_LAYOUT_NCHW;
} }
data_layouts[name] = newShapeSize == 2 ? DATA_LAYOUT_PLANAR : inpLayout; data_layouts[name] = newShapeSize == 2 ? DNN_LAYOUT_PLANAR : inpLayout;
} }
else else
{ {
@ -1206,13 +1196,13 @@ void TFImporter::parseExpandDims(tensorflow::GraphDef& net, const tensorflow::No
addPermuteLayer(order, name + "/nhwc", inpId); addPermuteLayer(order, name + "/nhwc", inpId);
// Convert shape From OpenCV's NCHW to NHWC. // Convert shape From OpenCV's NCHW to NHWC.
if(inpLayout == DATA_LAYOUT_NHWC) if(inpLayout == DNN_LAYOUT_NHWC)
{ {
std::swap(outShape[1], outShape[2]); std::swap(outShape[1], outShape[2]);
std::swap(outShape[2], outShape[3]); std::swap(outShape[2], outShape[3]);
} }
} }
if(inpLayout == DATA_LAYOUT_NHWC || inpLayout == DATA_LAYOUT_NCHW) if(inpLayout == DNN_LAYOUT_NHWC || inpLayout == DNN_LAYOUT_NCHW)
{ {
// toNCHW // toNCHW
axis = (axis != 0)?(axis % outShapeSize + 1):0; axis = (axis != 0)?(axis % outShapeSize + 1):0;
@ -1221,13 +1211,13 @@ void TFImporter::parseExpandDims(tensorflow::GraphDef& net, const tensorflow::No
// After ExpendDims, 5-dim data will become 6-dim data, and OpenCV retains 6-dim data as original data layout. // After ExpendDims, 5-dim data will become 6-dim data, and OpenCV retains 6-dim data as original data layout.
// Convert OpenCV's NCDHW to NDHWC first. // Convert OpenCV's NCDHW to NDHWC first.
if (inpShape.size() == 5 && (inpLayout == DATA_LAYOUT_NDHWC || inpLayout == DATA_LAYOUT_UNKNOWN)) if (inpShape.size() == 5 && (inpLayout == DNN_LAYOUT_NDHWC || inpLayout == DNN_LAYOUT_UNKNOWN))
{ {
int order[] = {0, 2, 3, 4, 1}; // From OpenCV's NCDHW to NDHWC. int order[] = {0, 2, 3, 4, 1}; // From OpenCV's NCDHW to NDHWC.
addPermuteLayer(order, name + "/ndhwc", inpId, 5); addPermuteLayer(order, name + "/ndhwc", inpId, 5);
// Convert shape From OpenCV's NCDHW to NDHWC. // Convert shape From OpenCV's NCDHW to NDHWC.
if(inpLayout == DATA_LAYOUT_NDHWC) if(inpLayout == DNN_LAYOUT_NDHWC)
{ {
std::swap(outShape[1], outShape[2]); std::swap(outShape[1], outShape[2]);
std::swap(outShape[2], outShape[3]); std::swap(outShape[2], outShape[3]);
@ -1239,7 +1229,7 @@ void TFImporter::parseExpandDims(tensorflow::GraphDef& net, const tensorflow::No
outShapeSize += 1; outShapeSize += 1;
// From OpenCV's NCDHW to NDHWC. // From OpenCV's NCDHW to NDHWC.
if((inpLayout != DATA_LAYOUT_NHWC && inpLayout != DATA_LAYOUT_NCHW) && outShapeSize == 5) if((inpLayout != DNN_LAYOUT_NHWC && inpLayout != DNN_LAYOUT_NCHW) && outShapeSize == 5)
{ {
for(int i = 1; i < outShapeSize - 1; i++) for(int i = 1; i < outShapeSize - 1; i++)
{ {
@ -1255,11 +1245,11 @@ void TFImporter::parseExpandDims(tensorflow::GraphDef& net, const tensorflow::No
if(outShapeSize == 5) if(outShapeSize == 5)
{ {
data_layouts[name] = DATA_LAYOUT_NDHWC; data_layouts[name] = DNN_LAYOUT_NDHWC;
} }
else if(outShapeSize == 4) else if(outShapeSize == 4)
{ {
data_layouts[name] = DATA_LAYOUT_NCHW; data_layouts[name] = DNN_LAYOUT_NCHW;
} }
else else
{ {
@ -1320,7 +1310,7 @@ void TFImporter::parseFlatten(tensorflow::GraphDef& net, const tensorflow::NodeD
layerParams.set("axis", start); layerParams.set("axis", start);
layerParams.set("end_axis", end); layerParams.set("end_axis", end);
} }
if (inpLayout == DATA_LAYOUT_NHWC) if (inpLayout == DNN_LAYOUT_NHWC)
{ {
LayerParams permLP; LayerParams permLP;
int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC. int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC.
@ -1336,7 +1326,7 @@ void TFImporter::parseFlatten(tensorflow::GraphDef& net, const tensorflow::NodeD
int id = dstNet.addLayer(name, "Flatten", layerParams); int id = dstNet.addLayer(name, "Flatten", layerParams);
layer_id[name] = id; layer_id[name] = id;
connect(layer_id, dstNet, inpId, id, 0); connect(layer_id, dstNet, inpId, id, 0);
data_layouts[name] = DATA_LAYOUT_PLANAR; data_layouts[name] = DNN_LAYOUT_PLANAR;
} }
void TFImporter::parseTranspose(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) void TFImporter::parseTranspose(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams)
@ -1354,19 +1344,19 @@ void TFImporter::parseTranspose(tensorflow::GraphDef& net, const tensorflow::Nod
// keep NCHW layout this way. // keep NCHW layout this way.
int inpLayout = getDataLayout(layer.input(0), data_layouts); int inpLayout = getDataLayout(layer.input(0), data_layouts);
std::string type = "Identity"; std::string type = "Identity";
if (inpLayout == DATA_LAYOUT_NHWC) if (inpLayout == DNN_LAYOUT_NHWC)
{ {
if (permData[0] == 0 && permData[1] == 3 && permData[2] == 1 && permData[3] == 2) if (permData[0] == 0 && permData[1] == 3 && permData[2] == 1 && permData[3] == 2)
{ {
// in TensorFlow: NHWC->NCHW // in TensorFlow: NHWC->NCHW
// in OpenCV: NCHW->NCHW // in OpenCV: NCHW->NCHW
data_layouts[name] = DATA_LAYOUT_NCHW; data_layouts[name] = DNN_LAYOUT_NCHW;
} }
else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3) else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3)
{ {
// in TensorFlow: NHWC->NHWC // in TensorFlow: NHWC->NHWC
// in OpenCV: NCHW->NCHW // in OpenCV: NCHW->NCHW
data_layouts[name] = DATA_LAYOUT_NHWC; data_layouts[name] = DNN_LAYOUT_NHWC;
} }
else if (permData[0] == 0 && permData[1] == 3 && permData[2] == 2 && permData[3] == 1) else if (permData[0] == 0 && permData[1] == 3 && permData[2] == 2 && permData[3] == 1)
{ {
@ -1374,25 +1364,25 @@ void TFImporter::parseTranspose(tensorflow::GraphDef& net, const tensorflow::Nod
// in OpenCV: NCHW->NCWH // in OpenCV: NCHW->NCWH
int permData[] = {0, 1, 3, 2}; int permData[] = {0, 1, 3, 2};
layerParams.set("order", DictValue::arrayInt<int*>(permData, perm.total())); layerParams.set("order", DictValue::arrayInt<int*>(permData, perm.total()));
data_layouts[name] = DATA_LAYOUT_NCHW; // we keep track NCHW because channels position only matters data_layouts[name] = DNN_LAYOUT_NCHW; // we keep track NCHW because channels position only matters
type = "Permute"; type = "Permute";
} }
else else
CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed."); CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed.");
} }
else if (inpLayout == DATA_LAYOUT_NCHW) else if (inpLayout == DNN_LAYOUT_NCHW)
{ {
if (permData[0] == 0 && permData[1] == 2 && permData[2] == 3 && permData[3] == 1) if (permData[0] == 0 && permData[1] == 2 && permData[2] == 3 && permData[3] == 1)
{ {
// in TensorFlow: NCHW->NHWC // in TensorFlow: NCHW->NHWC
// in OpenCV: NCHW->NCHW // in OpenCV: NCHW->NCHW
data_layouts[name] = DATA_LAYOUT_NHWC; data_layouts[name] = DNN_LAYOUT_NHWC;
} }
else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3) else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3)
{ {
// in TensorFlow: NCHW->NCHW // in TensorFlow: NCHW->NCHW
// in OpenCV: NCHW->NCHW // in OpenCV: NCHW->NCHW
data_layouts[name] = DATA_LAYOUT_NCHW; data_layouts[name] = DNN_LAYOUT_NCHW;
} }
else else
CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed."); CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed.");
@ -1410,7 +1400,7 @@ void TFImporter::parseTranspose(tensorflow::GraphDef& net, const tensorflow::Nod
// one input only // one input only
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
data_layouts[name] = DATA_LAYOUT_UNKNOWN; data_layouts[name] = DNN_LAYOUT_UNKNOWN;
} }
} }
@ -1456,9 +1446,9 @@ void TFImporter::parseConcat(tensorflow::GraphDef& net, const tensorflow::NodeDe
int axisId = (type == "Concat" ? 0 : num_inputs - 1); int axisId = (type == "Concat" ? 0 : num_inputs - 1);
int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0); int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0);
if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) if (getDataLayout(name, data_layouts) == DNN_LAYOUT_NHWC)
axis = toNCHW(axis); axis = toNCHW(axis);
else if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NDHWC) else if (getDataLayout(name, data_layouts) == DNN_LAYOUT_NDHWC)
axis = toNCDHW(axis); axis = toNCDHW(axis);
layerParams.set("axis", axis); layerParams.set("axis", axis);
@ -1585,7 +1575,7 @@ void TFImporter::parsePlaceholder(tensorflow::GraphDef& net, const tensorflow::N
MatShape dims(shape.dim_size()); MatShape dims(shape.dim_size());
for (int i = 0; i < dims.size(); ++i) for (int i = 0; i < dims.size(); ++i)
dims[i] = shape.dim(i).size(); dims[i] = shape.dim(i).size();
if (dims.size() == 4 && predictedLayout == DATA_LAYOUT_NHWC) if (dims.size() == 4 && predictedLayout == DNN_LAYOUT_NHWC)
{ {
std::swap(dims[1], dims[3]); // NHWC->NCWH std::swap(dims[1], dims[3]); // NHWC->NCWH
std::swap(dims[2], dims[3]); // NCWH->NCHW std::swap(dims[2], dims[3]); // NCWH->NCHW
@ -1593,7 +1583,7 @@ void TFImporter::parsePlaceholder(tensorflow::GraphDef& net, const tensorflow::N
dims[0] = 1; dims[0] = 1;
} }
if (dims.size() == 5 && predictedLayout == DATA_LAYOUT_NDHWC) if (dims.size() == 5 && predictedLayout == DNN_LAYOUT_NDHWC)
{ {
std::swap(dims[3], dims[4]); // NDHWC->NDHCW std::swap(dims[3], dims[4]); // NDHWC->NDHCW
std::swap(dims[2], dims[3]); // NDHCW->NDCHW std::swap(dims[2], dims[3]); // NDHCW->NDCHW
@ -1624,7 +1614,7 @@ void TFImporter::parseSplit(tensorflow::GraphDef& net, const tensorflow::NodeDef
// num_split // num_split
// 1st blob is dims tensor // 1st blob is dims tensor
int axis = getConstBlob(layer, value_id, 0).int_val().Get(0); int axis = getConstBlob(layer, value_id, 0).int_val().Get(0);
if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) if (getDataLayout(name, data_layouts) == DNN_LAYOUT_NHWC)
axis = toNCHW(axis); axis = toNCHW(axis);
layerParams.set("axis", axis); layerParams.set("axis", axis);
@ -1654,7 +1644,7 @@ void TFImporter::parseSlice(tensorflow::GraphDef& net, const tensorflow::NodeDef
CV_CheckTypeEQ(begins.type(), CV_32SC1, ""); CV_CheckTypeEQ(begins.type(), CV_32SC1, "");
CV_CheckTypeEQ(sizes.type(), CV_32SC1, ""); CV_CheckTypeEQ(sizes.type(), CV_32SC1, "");
if (begins.total() == 4 && getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) if (begins.total() == 4 && getDataLayout(name, data_layouts) == DNN_LAYOUT_NHWC)
{ {
// Swap NHWC parameters' order to NCHW. // Swap NHWC parameters' order to NCHW.
std::swap(*begins.ptr<int32_t>(0, 2), *begins.ptr<int32_t>(0, 3)); std::swap(*begins.ptr<int32_t>(0, 2), *begins.ptr<int32_t>(0, 3));
@ -1695,7 +1685,7 @@ void TFImporter::parseStridedSlice(tensorflow::GraphDef& net, const tensorflow::
CV_Error(Error::StsNotImplemented, CV_Error(Error::StsNotImplemented,
format("StridedSlice with stride %d", strides.at<int>(i))); format("StridedSlice with stride %d", strides.at<int>(i)));
} }
if (begins.total() == 4 && getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) if (begins.total() == 4 && getDataLayout(name, data_layouts) == DNN_LAYOUT_NHWC)
{ {
// Swap NHWC parameters' order to NCHW. // Swap NHWC parameters' order to NCHW.
std::swap(begins.at<int>(2), begins.at<int>(3)); std::swap(begins.at<int>(2), begins.at<int>(3));
@ -2029,7 +2019,7 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso
const int strideY = layerParams.get<int>("stride_h"); const int strideY = layerParams.get<int>("stride_h");
const int strideX = layerParams.get<int>("stride_w"); const int strideX = layerParams.get<int>("stride_w");
Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0)); Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0));
int shift = (getDataLayout(layer) == DATA_LAYOUT_NCHW); int shift = (getDataLayout(layer) == DNN_LAYOUT_NCHW);
const int outH = outShape.at<int>(1 + shift) + begs[2] - ends[2]; const int outH = outShape.at<int>(1 + shift) + begs[2] - ends[2];
const int outW = outShape.at<int>(2 + shift) + begs[3] - ends[3]; const int outW = outShape.at<int>(2 + shift) + begs[3] - ends[3];
if (layerParams.get<String>("pad_mode") == "SAME") if (layerParams.get<String>("pad_mode") == "SAME")
@ -2141,7 +2131,7 @@ void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::Nod
// one input only // one input only
connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0); connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
data_layouts[name] = DATA_LAYOUT_UNKNOWN; data_layouts[name] = DNN_LAYOUT_UNKNOWN;
} }
// "ResizeNearestNeighbor" "ResizeBilinear" "FusedResizeAndPadConv2D" // "ResizeNearestNeighbor" "ResizeBilinear" "FusedResizeAndPadConv2D"
@ -2239,7 +2229,7 @@ void TFImporter::parseL2Normalize(tensorflow::GraphDef& net, const tensorflow::N
CV_Assert(reductionIndices.type() == CV_32SC1); CV_Assert(reductionIndices.type() == CV_32SC1);
const int numAxes = reductionIndices.total(); const int numAxes = reductionIndices.total();
if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) if (getDataLayout(name, data_layouts) == DNN_LAYOUT_NHWC)
for (int i = 0; i < numAxes; ++i) for (int i = 0; i < numAxes; ++i)
reductionIndices.at<int>(i) = toNCHW(reductionIndices.at<int>(i)); reductionIndices.at<int>(i) = toNCHW(reductionIndices.at<int>(i));
@ -2292,7 +2282,7 @@ void TFImporter::parsePriorBox(tensorflow::GraphDef& net, const tensorflow::Node
layer_id[name] = id; layer_id[name] = id;
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1); connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1);
data_layouts[name] = DATA_LAYOUT_UNKNOWN; data_layouts[name] = DNN_LAYOUT_UNKNOWN;
} }
void TFImporter::parseSoftmax(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) void TFImporter::parseSoftmax(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams)
@ -2417,7 +2407,7 @@ void TFImporter::parseMean(tensorflow::GraphDef& net, const tensorflow::NodeDef&
if (!keepDims) if (!keepDims)
{ {
if (layout == DATA_LAYOUT_NHWC) if (layout == DNN_LAYOUT_NHWC)
{ {
LayerParams permLP; LayerParams permLP;
int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC. int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC.
@ -2539,7 +2529,7 @@ void TFImporter::parseMean(tensorflow::GraphDef& net, const tensorflow::NodeDef&
int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp); int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp);
layer_id[flattenName] = flattenId; layer_id[flattenName] = flattenId;
connect(layer_id, dstNet, Pin(poolingName), flattenId, 0); connect(layer_id, dstNet, Pin(poolingName), flattenId, 0);
data_layouts[name] = DATA_LAYOUT_PLANAR; data_layouts[name] = DNN_LAYOUT_PLANAR;
} }
} }
} }
@ -2562,7 +2552,7 @@ void TFImporter::parsePack(tensorflow::GraphDef& net, const tensorflow::NodeDef&
if (dim != 0) if (dim != 0)
CV_Error(Error::StsNotImplemented, "Unsupported mode of pack operation."); CV_Error(Error::StsNotImplemented, "Unsupported mode of pack operation.");
data_layouts[name] = DATA_LAYOUT_UNKNOWN; data_layouts[name] = DNN_LAYOUT_UNKNOWN;
CV_Assert(hasLayerAttr(layer, "N")); CV_Assert(hasLayerAttr(layer, "N"));
int num = (int)getLayerAttr(layer, "N").i(); int num = (int)getLayerAttr(layer, "N").i();
@ -2959,11 +2949,11 @@ static void addConstNodes(tensorflow::GraphDef& net, std::map<String, int>& cons
} }
// If all inputs of specific layer have the same data layout we can say that // If all inputs of specific layer have the same data layout we can say that
// this layer's output has this data layout too. Returns DATA_LAYOUT_UNKNOWN otherwise. // this layer's output has this data layout too. Returns DNN_LAYOUT_UNKNOWN otherwise.
DataLayout TFImporter::predictOutputDataLayout(const tensorflow::NodeDef& layer) DataLayout TFImporter::predictOutputDataLayout(const tensorflow::NodeDef& layer)
{ {
DataLayout layout = getDataLayout(layer); DataLayout layout = getDataLayout(layer);
if (layout != DATA_LAYOUT_UNKNOWN) if (layout != DNN_LAYOUT_UNKNOWN)
{ {
CV_LOG_DEBUG(NULL, "DNN/TF: predictOutputDataLayout(" << layer.name() << " @ " << layer.op() << ") => " << (int)layout << " (from attrs)"); CV_LOG_DEBUG(NULL, "DNN/TF: predictOutputDataLayout(" << layer.name() << " @ " << layer.op() << ") => " << (int)layout << " (from attrs)");
return layout; return layout;
@ -2975,17 +2965,17 @@ DataLayout TFImporter::predictOutputDataLayout(const tensorflow::NodeDef& layer)
std::map<String, DataLayout>::const_iterator it = data_layouts.find(getNodeName(layer.input(i))); std::map<String, DataLayout>::const_iterator it = data_layouts.find(getNodeName(layer.input(i)));
if (it != data_layouts.end()) if (it != data_layouts.end())
{ {
if (layout != DATA_LAYOUT_UNKNOWN) if (layout != DNN_LAYOUT_UNKNOWN)
{ {
if (it->second != layout && it->second != DATA_LAYOUT_UNKNOWN) if (it->second != layout && it->second != DNN_LAYOUT_UNKNOWN)
return DATA_LAYOUT_UNKNOWN; return DNN_LAYOUT_UNKNOWN;
} }
else else
layout = it->second; layout = it->second;
} }
} }
if (layout != DATA_LAYOUT_UNKNOWN) if (layout != DNN_LAYOUT_UNKNOWN)
{ {
CV_LOG_DEBUG(NULL, "DNN/TF: predictOutputDataLayout(" << layer.name() << " @ " << layer.op() << ") => " << (int)layout << " (from inputs)"); CV_LOG_DEBUG(NULL, "DNN/TF: predictOutputDataLayout(" << layer.name() << " @ " << layer.op() << ") => " << (int)layout << " (from inputs)");
return layout; return layout;
@ -3061,14 +3051,14 @@ void TFImporter::populateNet()
std::map<String, DataLayout>::iterator it = data_layouts.find(name); std::map<String, DataLayout>::iterator it = data_layouts.find(name);
if (it != data_layouts.end()) if (it != data_layouts.end())
{ {
if (layout != DATA_LAYOUT_UNKNOWN) if (layout != DNN_LAYOUT_UNKNOWN)
{ {
if (it->second == DATA_LAYOUT_UNKNOWN) if (it->second == DNN_LAYOUT_UNKNOWN)
it->second = layout; it->second = layout;
else if (it->second != layout) else if (it->second != layout)
{ {
it->second = DATA_LAYOUT_UNKNOWN; it->second = DNN_LAYOUT_UNKNOWN;
layout = DATA_LAYOUT_UNKNOWN; layout = DNN_LAYOUT_UNKNOWN;
} }
} }
else else
@ -3084,12 +3074,12 @@ void TFImporter::populateNet()
it = data_layouts.find(name); it = data_layouts.find(name);
if (it != data_layouts.end()) if (it != data_layouts.end())
{ {
if (layout != DATA_LAYOUT_UNKNOWN) if (layout != DNN_LAYOUT_UNKNOWN)
{ {
if (it->second == DATA_LAYOUT_UNKNOWN) if (it->second == DNN_LAYOUT_UNKNOWN)
it->second = layout; it->second = layout;
else if (it->second != layout) else if (it->second != layout)
it->second = DATA_LAYOUT_UNKNOWN; it->second = DNN_LAYOUT_UNKNOWN;
} }
} }
else else

View File

@ -22,18 +22,6 @@ CV__DNN_INLINE_NS_BEGIN
using namespace opencv_tflite; using namespace opencv_tflite;
// This values are used to indicate layer output's data layout where it's possible.
// Approach is similar to TensorFlow importer but TFLite models do not have explicit
// layout field "data_format". So we consider that all 4D inputs are in NHWC data layout.
enum DataLayout
{
DATA_LAYOUT_NHWC,
DATA_LAYOUT_NCHW,
DATA_LAYOUT_NDHWC,
DATA_LAYOUT_UNKNOWN,
DATA_LAYOUT_PLANAR // 2-dimensional outputs (matmul, flatten, reshape to 2d)
};
class TFLiteImporter { class TFLiteImporter {
public: public:
TFLiteImporter(Net& net, const char* modelBuffer, size_t bufSize); TFLiteImporter(Net& net, const char* modelBuffer, size_t bufSize);
@ -139,10 +127,10 @@ DataLayout estimateLayout(const Tensor& t)
const auto t_shape = t.shape(); const auto t_shape = t.shape();
CV_Assert(t_shape); CV_Assert(t_shape);
switch (t_shape->size()) { switch (t_shape->size()) {
case 5: return DATA_LAYOUT_NDHWC; case 5: return DNN_LAYOUT_NDHWC;
case 4: return DATA_LAYOUT_NHWC; case 4: return DNN_LAYOUT_NHWC;
case 2: return DATA_LAYOUT_PLANAR; case 2: return DNN_LAYOUT_PLANAR;
default: return DATA_LAYOUT_UNKNOWN; default: return DNN_LAYOUT_UNKNOWN;
} }
} }
@ -161,7 +149,7 @@ void TFLiteImporter::populateNet()
CV_Assert(opCodes); CV_Assert(opCodes);
CV_Assert(modelTensors); CV_Assert(modelTensors);
layouts.resize(modelTensors->size(), DATA_LAYOUT_UNKNOWN); layouts.resize(modelTensors->size(), DNN_LAYOUT_UNKNOWN);
size_t subgraph_inputs_size = subgraph_inputs->size(); size_t subgraph_inputs_size = subgraph_inputs->size();
std::vector<std::string> inputsNames(subgraph_inputs_size); std::vector<std::string> inputsNames(subgraph_inputs_size);
std::vector<MatShape> inputsShapes(subgraph_inputs_size); std::vector<MatShape> inputsShapes(subgraph_inputs_size);
@ -177,7 +165,7 @@ void TFLiteImporter::populateNet()
// Keep info about origin inputs names and shapes // Keep info about origin inputs names and shapes
inputsNames[i] = tensor->name()->str(); inputsNames[i] = tensor->name()->str();
std::vector<int> shape(tensor->shape()->begin(), tensor->shape()->end()); std::vector<int> shape(tensor->shape()->begin(), tensor->shape()->end());
if (layouts[idx] == DATA_LAYOUT_NHWC) { if (layouts[idx] == DNN_LAYOUT_NHWC) {
CV_CheckEQ(shape.size(), (size_t)4, ""); CV_CheckEQ(shape.size(), (size_t)4, "");
std::swap(shape[2], shape[3]); std::swap(shape[2], shape[3]);
std::swap(shape[1], shape[2]); std::swap(shape[1], shape[2]);
@ -257,14 +245,14 @@ void TFLiteImporter::populateNet()
// Predict output layout. Some layer-specific parsers may set them explicitly. // Predict output layout. Some layer-specific parsers may set them explicitly.
// Otherwise, propagate input layout. // Otherwise, propagate input layout.
if (layouts[op_outputs->Get(0)] == DATA_LAYOUT_UNKNOWN) { if (layouts[op_outputs->Get(0)] == DNN_LAYOUT_UNKNOWN) {
DataLayout predictedLayout = DATA_LAYOUT_UNKNOWN; DataLayout predictedLayout = DNN_LAYOUT_UNKNOWN;
for (auto layout : inpLayouts) { for (auto layout : inpLayouts) {
if (layout != DATA_LAYOUT_UNKNOWN) { if (layout != DNN_LAYOUT_UNKNOWN) {
if (predictedLayout == DATA_LAYOUT_UNKNOWN) if (predictedLayout == DNN_LAYOUT_UNKNOWN)
predictedLayout = layout; predictedLayout = layout;
else if (predictedLayout != layout) { else if (predictedLayout != layout) {
predictedLayout = DATA_LAYOUT_UNKNOWN; predictedLayout = DNN_LAYOUT_UNKNOWN;
break; break;
} }
} }
@ -491,11 +479,11 @@ void TFLiteImporter::parseUnpooling(const Operator& op, const std::string& opcod
void TFLiteImporter::parseReshape(const Operator& op, const std::string& opcode, LayerParams& layerParams) { void TFLiteImporter::parseReshape(const Operator& op, const std::string& opcode, LayerParams& layerParams) {
DataLayout inpLayout = layouts[op.inputs()->Get(0)]; DataLayout inpLayout = layouts[op.inputs()->Get(0)];
if (inpLayout == DATA_LAYOUT_NHWC) { if (inpLayout == DNN_LAYOUT_NHWC) {
// Permute to NCHW // Permute to NCHW
int permId = addPermuteLayer({0, 2, 3, 1}, layerParams.name + "/permute", layerIds[op.inputs()->Get(0)]); // NCHW -> NHWC int permId = addPermuteLayer({0, 2, 3, 1}, layerParams.name + "/permute", layerIds[op.inputs()->Get(0)]); // NCHW -> NHWC
layerIds[op.inputs()->Get(0)] = std::make_pair(permId, 0); layerIds[op.inputs()->Get(0)] = std::make_pair(permId, 0);
layouts[op.outputs()->Get(0)] = DATA_LAYOUT_NCHW; layouts[op.outputs()->Get(0)] = DNN_LAYOUT_NCHW;
} }
layerParams.type = "Reshape"; layerParams.type = "Reshape";
@ -514,7 +502,7 @@ void TFLiteImporter::parseConcat(const Operator& op, const std::string& opcode,
int axis = options->axis(); int axis = options->axis();
DataLayout inpLayout = layouts[op.inputs()->Get(0)]; DataLayout inpLayout = layouts[op.inputs()->Get(0)];
if (inpLayout == DATA_LAYOUT_NHWC) { if (inpLayout == DNN_LAYOUT_NHWC) {
// OpenCV works in NCHW data layout. So change the axis correspondingly. // OpenCV works in NCHW data layout. So change the axis correspondingly.
axis = normalize_axis(axis, 4); axis = normalize_axis(axis, 4);
static const int remap[] = {0, 2, 3, 1}; static const int remap[] = {0, 2, 3, 1};

View File

@ -63,6 +63,63 @@ TEST(imagesFromBlob, Regression)
} }
} }
TEST(blobFromImageWithParams_4ch, NHWC_scalar_scale)
{
Mat img(10, 10, CV_8UC4, cv::Scalar(0,1,2,3));
std::vector<double> factorVec = {0.1, 0.2, 0.3, 0.4};
Scalar scalefactor(factorVec[0], factorVec[1], factorVec[2], factorVec[3]);
Image2BlobParams param;
param.scalefactor = scalefactor;
param.datalayout = DNN_LAYOUT_NHWC;
Mat blob = dnn::blobFromImageWithParams(img, param); // [1, 10, 10, 4]
float* blobPtr = blob.ptr<float>(0);
std::vector<float> targetVec = {(float )factorVec[0] * 0, (float )factorVec[1] * 1, (float )factorVec[2] * 2, (float )factorVec[3] * 3}; // Target Value.
for (int hi = 0; hi < 10; hi++)
{
for (int wi = 0; wi < 10; wi++)
{
float* hwPtr = blobPtr + hi * 10 * 4 + wi * 4;
// Check equal
EXPECT_NEAR(hwPtr[0], targetVec[0], 1e-5);
EXPECT_NEAR(hwPtr[1], targetVec[1], 1e-5);
EXPECT_NEAR(hwPtr[2], targetVec[2], 1e-5);
EXPECT_NEAR(hwPtr[3], targetVec[3], 1e-5);
}
}
}
TEST(blobFromImageWithParams_4ch, letter_box)
{
Mat img(40, 20, CV_8UC4, cv::Scalar(0,1,2,3));
// Construct target mat.
Mat targetCh[4];
// The letterbox will add zero at the left and right of output blob.
// After the letterbox, every row data would have same value showing as valVec.
std::vector<uint8_t> valVec = {0,0,0,0,0, 1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0};
Mat rowM(1, 20, CV_8UC1, valVec.data());
for(int i = 0; i < 4; i++)
{
targetCh[i] = rowM * i;
}
Mat targetImg;
merge(targetCh, 4, targetImg);
Size targeSize(20, 20);
Image2BlobParams param;
param.size = targeSize;
param.paddingmode = DNN_PMODE_LETTERBOX;
Mat blob = dnn::blobFromImageWithParams(img, param);
Mat targetBlob = dnn::blobFromImage(targetImg, 1.0, targeSize); // only convert data from uint8 to float32.
EXPECT_EQ(0, cvtest::norm(targetBlob, blob, NORM_INF));
}
TEST(readNet, Regression) TEST(readNet, Regression)
{ {
Net net = readNet(findDataFile("dnn/squeezenet_v1.1.prototxt"), Net net = readNet(findDataFile("dnn/squeezenet_v1.1.prototxt"),