mirror of
https://github.com/opencv/opencv.git
synced 2024-11-25 03:30:34 +08:00
Merge pull request #22750 from zihaomu:improve_blobFromImage
DNN: Add New API blobFromImageParam #22750 The purpose of this PR: 1. Add new API `blobFromImageParam` to extend `blobFromImage` API. It can support the different data layout (NCHW or NHWC), and letter_box. 2. ~~`blobFromImage` can output `CV_16F`~~ ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
810096c276
commit
601778e0e6
@ -108,6 +108,21 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
DNN_TARGET_NPU,
|
DNN_TARGET_NPU,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Enum of data layout for model inference.
|
||||||
|
* @see Image2BlobParams
|
||||||
|
*/
|
||||||
|
enum DataLayout
|
||||||
|
{
|
||||||
|
DNN_LAYOUT_UNKNOWN = 0,
|
||||||
|
DNN_LAYOUT_ND = 1, //!< OpenCV data layout for 2D data.
|
||||||
|
DNN_LAYOUT_NCHW = 2, //!< OpenCV data layout for 4D data.
|
||||||
|
DNN_LAYOUT_NCDHW = 3, //!< OpenCV data layout for 5D data.
|
||||||
|
DNN_LAYOUT_NHWC = 4, //!< Tensorflow-like data layout for 4D data.
|
||||||
|
DNN_LAYOUT_NDHWC = 5, //!< Tensorflow-like data layout for 5D data.
|
||||||
|
DNN_LAYOUT_PLANAR = 6, //!< Tensorflow-like data layout, it should only be used at tf or tflite model parsing.
|
||||||
|
};
|
||||||
|
|
||||||
CV_EXPORTS std::vector< std::pair<Backend, Target> > getAvailableBackends();
|
CV_EXPORTS std::vector< std::pair<Backend, Target> > getAvailableBackends();
|
||||||
CV_EXPORTS_W std::vector<Target> getAvailableTargets(dnn::Backend be);
|
CV_EXPORTS_W std::vector<Target> getAvailableTargets(dnn::Backend be);
|
||||||
|
|
||||||
@ -1111,10 +1126,10 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
/** @brief Creates 4-dimensional blob from image. Optionally resizes and crops @p image from center,
|
/** @brief Creates 4-dimensional blob from image. Optionally resizes and crops @p image from center,
|
||||||
* subtract @p mean values, scales values by @p scalefactor, swap Blue and Red channels.
|
* subtract @p mean values, scales values by @p scalefactor, swap Blue and Red channels.
|
||||||
* @param image input image (with 1-, 3- or 4-channels).
|
* @param image input image (with 1-, 3- or 4-channels).
|
||||||
|
* @param scalefactor multiplier for @p images values.
|
||||||
* @param size spatial size for output image
|
* @param size spatial size for output image
|
||||||
* @param mean scalar with mean values which are subtracted from channels. Values are intended
|
* @param mean scalar with mean values which are subtracted from channels. Values are intended
|
||||||
* to be in (mean-R, mean-G, mean-B) order if @p image has BGR ordering and @p swapRB is true.
|
* to be in (mean-R, mean-G, mean-B) order if @p image has BGR ordering and @p swapRB is true.
|
||||||
* @param scalefactor multiplier for @p image values.
|
|
||||||
* @param swapRB flag which indicates that swap first and last channels
|
* @param swapRB flag which indicates that swap first and last channels
|
||||||
* in 3-channel image is necessary.
|
* in 3-channel image is necessary.
|
||||||
* @param crop flag which indicates whether image will be cropped after resize or not
|
* @param crop flag which indicates whether image will be cropped after resize or not
|
||||||
@ -1123,6 +1138,9 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
* dimension in @p size and another one is equal or larger. Then, crop from the center is performed.
|
* dimension in @p size and another one is equal or larger. Then, crop from the center is performed.
|
||||||
* If @p crop is false, direct resize without cropping and preserving aspect ratio is performed.
|
* If @p crop is false, direct resize without cropping and preserving aspect ratio is performed.
|
||||||
* @returns 4-dimensional Mat with NCHW dimensions order.
|
* @returns 4-dimensional Mat with NCHW dimensions order.
|
||||||
|
*
|
||||||
|
* @note
|
||||||
|
* The order and usage of `scalefactor` and `mean` are (input - mean) * scalefactor.
|
||||||
*/
|
*/
|
||||||
CV_EXPORTS_W Mat blobFromImage(InputArray image, double scalefactor=1.0, const Size& size = Size(),
|
CV_EXPORTS_W Mat blobFromImage(InputArray image, double scalefactor=1.0, const Size& size = Size(),
|
||||||
const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false,
|
const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false,
|
||||||
@ -1153,6 +1171,9 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
* dimension in @p size and another one is equal or larger. Then, crop from the center is performed.
|
* dimension in @p size and another one is equal or larger. Then, crop from the center is performed.
|
||||||
* If @p crop is false, direct resize without cropping and preserving aspect ratio is performed.
|
* If @p crop is false, direct resize without cropping and preserving aspect ratio is performed.
|
||||||
* @returns 4-dimensional Mat with NCHW dimensions order.
|
* @returns 4-dimensional Mat with NCHW dimensions order.
|
||||||
|
*
|
||||||
|
* @note
|
||||||
|
* The order and usage of `scalefactor` and `mean` are (input - mean) * scalefactor.
|
||||||
*/
|
*/
|
||||||
CV_EXPORTS_W Mat blobFromImages(InputArrayOfArrays images, double scalefactor=1.0,
|
CV_EXPORTS_W Mat blobFromImages(InputArrayOfArrays images, double scalefactor=1.0,
|
||||||
Size size = Size(), const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false,
|
Size size = Size(), const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false,
|
||||||
@ -1167,6 +1188,74 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false,
|
const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false,
|
||||||
int ddepth=CV_32F);
|
int ddepth=CV_32F);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Enum of image processing mode.
|
||||||
|
* To facilitate the specialization pre-processing requirements of the dnn model.
|
||||||
|
* For example, the `letter box` often used in the Yolo series of models.
|
||||||
|
* @see Image2BlobParams
|
||||||
|
*/
|
||||||
|
enum ImagePaddingMode
|
||||||
|
{
|
||||||
|
DNN_PMODE_NULL = 0, // !< Default. Resize to required input size without extra processing.
|
||||||
|
DNN_PMODE_CROP_CENTER = 1, // !< Image will be cropped after resize.
|
||||||
|
DNN_PMODE_LETTERBOX = 2, // !< Resize image to the desired size while preserving the aspect ratio of original image.
|
||||||
|
};
|
||||||
|
|
||||||
|
/** @brief Processing params of image to blob.
|
||||||
|
*
|
||||||
|
* It includes all possible image processing operations and corresponding parameters.
|
||||||
|
*
|
||||||
|
* @see blobFromImageWithParams
|
||||||
|
*
|
||||||
|
* @note
|
||||||
|
* The order and usage of `scalefactor` and `mean` are (input - mean) * scalefactor.
|
||||||
|
* The order and usage of `scalefactor`, `size`, `mean`, `swapRB`, and `ddepth` are consistent
|
||||||
|
* with the function of @ref blobFromImage.
|
||||||
|
*/
|
||||||
|
struct CV_EXPORTS_W_SIMPLE Image2BlobParams
|
||||||
|
{
|
||||||
|
CV_WRAP Image2BlobParams();
|
||||||
|
CV_WRAP Image2BlobParams(const Scalar& scalefactor, const Size& size = Size(), const Scalar& mean = Scalar(),
|
||||||
|
bool swapRB = false, int ddepth = CV_32F, DataLayout datalayout = DNN_LAYOUT_NCHW,
|
||||||
|
ImagePaddingMode mode = DNN_PMODE_NULL);
|
||||||
|
|
||||||
|
CV_PROP_RW Scalar scalefactor; //!< scalefactor multiplier for input image values.
|
||||||
|
CV_PROP_RW Size size; //!< Spatial size for output image.
|
||||||
|
CV_PROP_RW Scalar mean; //!< Scalar with mean values which are subtracted from channels.
|
||||||
|
CV_PROP_RW bool swapRB; //!< Flag which indicates that swap first and last channels
|
||||||
|
CV_PROP_RW int ddepth; //!< Depth of output blob. Choose CV_32F or CV_8U.
|
||||||
|
CV_PROP_RW DataLayout datalayout; //!< Order of output dimensions. Choose DNN_LAYOUT_NCHW or DNN_LAYOUT_NHWC.
|
||||||
|
CV_PROP_RW ImagePaddingMode paddingmode; //!< Image padding mode. @see ImagePaddingMode.
|
||||||
|
};
|
||||||
|
|
||||||
|
/** @brief Creates 4-dimensional blob from image with given params.
|
||||||
|
*
|
||||||
|
* @details This function is an extension of @ref blobFromImage to meet more image preprocess needs.
|
||||||
|
* Given input image and preprocessing parameters, and function outputs the blob.
|
||||||
|
*
|
||||||
|
* @param image input image (all with 1-, 3- or 4-channels).
|
||||||
|
* @param param struct of Image2BlobParams, contains all parameters needed by processing of image to blob.
|
||||||
|
* @return 4-dimensional Mat.
|
||||||
|
*/
|
||||||
|
CV_EXPORTS_W Mat blobFromImageWithParams(InputArray image, const Image2BlobParams& param = Image2BlobParams());
|
||||||
|
|
||||||
|
/** @overload */
|
||||||
|
CV_EXPORTS_W void blobFromImageWithParams(InputArray image, OutputArray blob, const Image2BlobParams& param = Image2BlobParams());
|
||||||
|
|
||||||
|
/** @brief Creates 4-dimensional blob from series of images with given params.
|
||||||
|
*
|
||||||
|
* @details This function is an extension of @ref blobFromImages to meet more image preprocess needs.
|
||||||
|
* Given input image and preprocessing parameters, and function outputs the blob.
|
||||||
|
*
|
||||||
|
* @param images input image (all with 1-, 3- or 4-channels).
|
||||||
|
* @param param struct of Image2BlobParams, contains all parameters needed by processing of image to blob.
|
||||||
|
* @returns 4-dimensional Mat.
|
||||||
|
*/
|
||||||
|
CV_EXPORTS_W Mat blobFromImagesWithParams(InputArrayOfArrays images, const Image2BlobParams& param = Image2BlobParams());
|
||||||
|
|
||||||
|
/** @overload */
|
||||||
|
CV_EXPORTS_W void blobFromImagesWithParams(InputArrayOfArrays images, OutputArray blob, const Image2BlobParams& param = Image2BlobParams());
|
||||||
|
|
||||||
/** @brief Parse a 4D blob and output the images it contains as 2D arrays through a simpler data structure
|
/** @brief Parse a 4D blob and output the images it contains as 2D arrays through a simpler data structure
|
||||||
* (std::vector<cv::Mat>).
|
* (std::vector<cv::Mat>).
|
||||||
* @param[in] blob_ 4 dimensional array (images, channels, height, width) in floating point precision (CV_32F) from
|
* @param[in] blob_ 4 dimensional array (images, channels, height, width) in floating point precision (CV_32F) from
|
||||||
|
@ -119,7 +119,7 @@ class dnn_test(NewOpenCVTests):
|
|||||||
inp = np.random.standard_normal([1, 2, 10, 11]).astype(np.float32)
|
inp = np.random.standard_normal([1, 2, 10, 11]).astype(np.float32)
|
||||||
net.setInput(inp)
|
net.setInput(inp)
|
||||||
net.forward()
|
net.forward()
|
||||||
except BaseException as e:
|
except BaseException:
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@ -153,6 +153,41 @@ class dnn_test(NewOpenCVTests):
|
|||||||
target = target.transpose(2, 0, 1).reshape(1, 3, height, width) # to NCHW
|
target = target.transpose(2, 0, 1).reshape(1, 3, height, width) # to NCHW
|
||||||
normAssert(self, blob, target)
|
normAssert(self, blob, target)
|
||||||
|
|
||||||
|
def test_blobFromImageWithParams(self):
|
||||||
|
np.random.seed(324)
|
||||||
|
|
||||||
|
width = 6
|
||||||
|
height = 7
|
||||||
|
stddev = np.array([0.2, 0.3, 0.4])
|
||||||
|
scalefactor = 1.0/127.5 * stddev
|
||||||
|
mean = (10, 20, 30)
|
||||||
|
|
||||||
|
# Test arguments names.
|
||||||
|
img = np.random.randint(0, 255, [4, 5, 3]).astype(np.uint8)
|
||||||
|
|
||||||
|
param = cv.dnn.Image2BlobParams()
|
||||||
|
param.scalefactor = scalefactor
|
||||||
|
param.size = (6, 7)
|
||||||
|
param.mean = mean
|
||||||
|
param.swapRB=True
|
||||||
|
param.datalayout = cv.dnn.DNN_LAYOUT_NHWC
|
||||||
|
|
||||||
|
blob = cv.dnn.blobFromImageWithParams(img, param)
|
||||||
|
blob_args = cv.dnn.blobFromImageWithParams(img, cv.dnn.Image2BlobParams(scalefactor=scalefactor, size=(6, 7), mean=mean,
|
||||||
|
swapRB=True, datalayout=cv.dnn.DNN_LAYOUT_NHWC))
|
||||||
|
normAssert(self, blob, blob_args)
|
||||||
|
|
||||||
|
target2 = cv.resize(img, (width, height), interpolation=cv.INTER_LINEAR).astype(np.float32)
|
||||||
|
target2 = target2[:,:,[2, 1, 0]] # BGR2RGB
|
||||||
|
target2[:,:,0] -= mean[0]
|
||||||
|
target2[:,:,1] -= mean[1]
|
||||||
|
target2[:,:,2] -= mean[2]
|
||||||
|
|
||||||
|
target2[:,:,0] *= scalefactor[0]
|
||||||
|
target2[:,:,1] *= scalefactor[1]
|
||||||
|
target2[:,:,2] *= scalefactor[2]
|
||||||
|
target2 = target2.reshape(1, height, width, 3) # to NHWC
|
||||||
|
normAssert(self, blob, target2)
|
||||||
|
|
||||||
def test_model(self):
|
def test_model(self):
|
||||||
img_path = self.find_dnn_file("dnn/street.png")
|
img_path = self.find_dnn_file("dnn/street.png")
|
||||||
|
@ -11,8 +11,17 @@ namespace cv {
|
|||||||
namespace dnn {
|
namespace dnn {
|
||||||
CV__DNN_INLINE_NS_BEGIN
|
CV__DNN_INLINE_NS_BEGIN
|
||||||
|
|
||||||
|
Image2BlobParams::Image2BlobParams():scalefactor(Scalar::all(1.0)), size(Size()), mean(Scalar()), swapRB(false), ddepth(CV_32F),
|
||||||
|
datalayout(DNN_LAYOUT_NCHW), paddingmode(DNN_PMODE_NULL)
|
||||||
|
{}
|
||||||
|
|
||||||
Mat blobFromImage(InputArray image, double scalefactor, const Size& size,
|
Image2BlobParams::Image2BlobParams(const Scalar& scalefactor_, const Size& size_, const Scalar& mean_, bool swapRB_,
|
||||||
|
int ddepth_, DataLayout datalayout_, ImagePaddingMode mode_):
|
||||||
|
scalefactor(scalefactor_), size(size_), mean(mean_), swapRB(swapRB_), ddepth(ddepth_),
|
||||||
|
datalayout(datalayout_), paddingmode(mode_)
|
||||||
|
{}
|
||||||
|
|
||||||
|
Mat blobFromImage(InputArray image, const double scalefactor, const Size& size,
|
||||||
const Scalar& mean, bool swapRB, bool crop, int ddepth)
|
const Scalar& mean, bool swapRB, bool crop, int ddepth)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
@ -42,16 +51,55 @@ void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalef
|
|||||||
Size size, const Scalar& mean_, bool swapRB, bool crop, int ddepth)
|
Size size, const Scalar& mean_, bool swapRB, bool crop, int ddepth)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
CV_CheckType(ddepth, ddepth == CV_32F || ddepth == CV_8U, "Blob depth should be CV_32F or CV_8U");
|
Image2BlobParams param(Scalar::all(scalefactor), size, mean_, swapRB, ddepth);
|
||||||
if (ddepth == CV_8U)
|
if (crop)
|
||||||
{
|
param.paddingmode = DNN_PMODE_CROP_CENTER;
|
||||||
CV_CheckEQ(scalefactor, 1.0, "Scaling is not supported for CV_8U blob depth");
|
blobFromImagesWithParams(images_, blob_, param);
|
||||||
CV_Assert(mean_ == Scalar() && "Mean subtraction is not supported for CV_8U blob depth");
|
}
|
||||||
}
|
|
||||||
|
|
||||||
|
Mat blobFromImageWithParams(InputArray image, const Image2BlobParams& param)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
Mat blob;
|
||||||
|
blobFromImageWithParams(image, blob, param);
|
||||||
|
return blob;
|
||||||
|
}
|
||||||
|
|
||||||
|
void blobFromImageWithParams(InputArray image, OutputArray blob, const Image2BlobParams& param)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
std::vector<Mat> images(1, image.getMat());
|
||||||
|
blobFromImagesWithParams(images, blob, param);
|
||||||
|
}
|
||||||
|
|
||||||
|
Mat blobFromImagesWithParams(InputArrayOfArrays images, const Image2BlobParams& param)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
Mat blob;
|
||||||
|
blobFromImagesWithParams(images, blob, param);
|
||||||
|
return blob;
|
||||||
|
}
|
||||||
|
|
||||||
|
void blobFromImagesWithParams(InputArrayOfArrays images_, OutputArray blob_, const Image2BlobParams& param)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_CheckType(param.ddepth, param.ddepth == CV_32F || param.ddepth == CV_8U,
|
||||||
|
"Blob depth should be CV_32F or CV_8U");
|
||||||
|
|
||||||
|
Size size = param.size;
|
||||||
std::vector<Mat> images;
|
std::vector<Mat> images;
|
||||||
images_.getMatVector(images);
|
images_.getMatVector(images);
|
||||||
CV_Assert(!images.empty());
|
CV_Assert(!images.empty());
|
||||||
|
|
||||||
|
int nch = images[0].channels();
|
||||||
|
Scalar scalefactor = param.scalefactor;
|
||||||
|
|
||||||
|
if (param.ddepth == CV_8U)
|
||||||
|
{
|
||||||
|
CV_Assert(scalefactor == Scalar::all(1.0) && "Scaling is not supported for CV_8U blob depth");
|
||||||
|
CV_Assert(param.mean == Scalar() && "Mean subtraction is not supported for CV_8U blob depth");
|
||||||
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < images.size(); i++)
|
for (size_t i = 0; i < images.size(); i++)
|
||||||
{
|
{
|
||||||
Size imgSize = images[i].size();
|
Size imgSize = images[i].size();
|
||||||
@ -59,73 +107,122 @@ void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalef
|
|||||||
size = imgSize;
|
size = imgSize;
|
||||||
if (size != imgSize)
|
if (size != imgSize)
|
||||||
{
|
{
|
||||||
if (crop)
|
if (param.paddingmode == DNN_PMODE_CROP_CENTER)
|
||||||
{
|
{
|
||||||
float resizeFactor = std::max(size.width / (float)imgSize.width,
|
float resizeFactor = std::max(size.width / (float)imgSize.width,
|
||||||
size.height / (float)imgSize.height);
|
size.height / (float)imgSize.height);
|
||||||
resize(images[i], images[i], Size(), resizeFactor, resizeFactor, INTER_LINEAR);
|
resize(images[i], images[i], Size(), resizeFactor, resizeFactor, INTER_LINEAR);
|
||||||
Rect crop(Point(0.5 * (images[i].cols - size.width),
|
Rect crop(Point(0.5 * (images[i].cols - size.width),
|
||||||
0.5 * (images[i].rows - size.height)),
|
0.5 * (images[i].rows - size.height)),
|
||||||
size);
|
size);
|
||||||
images[i] = images[i](crop);
|
images[i] = images[i](crop);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
resize(images[i], images[i], size, 0, 0, INTER_LINEAR);
|
{
|
||||||
|
if (param.paddingmode == DNN_PMODE_LETTERBOX)
|
||||||
|
{
|
||||||
|
float resizeFactor = std::min(size.width / (float)imgSize.width,
|
||||||
|
size.height / (float)imgSize.height);
|
||||||
|
int rh = int(imgSize.height * resizeFactor);
|
||||||
|
int rw = int(imgSize.width * resizeFactor);
|
||||||
|
resize(images[i], images[i], Size(rw, rh), INTER_LINEAR);
|
||||||
|
|
||||||
|
int top = (size.height - rh)/2;
|
||||||
|
int bottom = size.height - top - rh;
|
||||||
|
int left = (size.width - rw)/2;
|
||||||
|
int right = size.width - left - rw;
|
||||||
|
copyMakeBorder(images[i], images[i], top, bottom, left, right, BORDER_CONSTANT);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
resize(images[i], images[i], size, 0, 0, INTER_LINEAR);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (images[i].depth() == CV_8U && ddepth == CV_32F)
|
|
||||||
images[i].convertTo(images[i], CV_32F);
|
Scalar mean = param.mean;
|
||||||
Scalar mean = mean_;
|
if (param.swapRB)
|
||||||
if (swapRB)
|
{
|
||||||
std::swap(mean[0], mean[2]);
|
std::swap(mean[0], mean[2]);
|
||||||
|
std::swap(scalefactor[0], scalefactor[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (images[i].depth() == CV_8U && param.ddepth == CV_32F)
|
||||||
|
images[i].convertTo(images[i], CV_32F);
|
||||||
|
|
||||||
images[i] -= mean;
|
images[i] -= mean;
|
||||||
images[i] *= scalefactor;
|
multiply(images[i], scalefactor, images[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t nimages = images.size();
|
size_t nimages = images.size();
|
||||||
Mat image0 = images[0];
|
Mat image0 = images[0];
|
||||||
int nch = image0.channels();
|
|
||||||
CV_Assert(image0.dims == 2);
|
CV_Assert(image0.dims == 2);
|
||||||
if (nch == 3 || nch == 4)
|
|
||||||
{
|
|
||||||
int sz[] = { (int)nimages, nch, image0.rows, image0.cols };
|
|
||||||
blob_.create(4, sz, ddepth);
|
|
||||||
Mat blob = blob_.getMat();
|
|
||||||
Mat ch[4];
|
|
||||||
|
|
||||||
|
if (param.datalayout == DNN_LAYOUT_NCHW)
|
||||||
|
{
|
||||||
|
if (nch == 3 || nch == 4)
|
||||||
|
{
|
||||||
|
int sz[] = { (int)nimages, nch, image0.rows, image0.cols };
|
||||||
|
blob_.create(4, sz, param.ddepth);
|
||||||
|
Mat blob = blob_.getMat();
|
||||||
|
Mat ch[4];
|
||||||
|
|
||||||
|
for (size_t i = 0; i < nimages; i++)
|
||||||
|
{
|
||||||
|
const Mat& image = images[i];
|
||||||
|
CV_Assert(image.depth() == blob_.depth());
|
||||||
|
nch = image.channels();
|
||||||
|
CV_Assert(image.dims == 2 && (nch == 3 || nch == 4));
|
||||||
|
CV_Assert(image.size() == image0.size());
|
||||||
|
|
||||||
|
for (int j = 0; j < nch; j++)
|
||||||
|
ch[j] = Mat(image.rows, image.cols, param.ddepth, blob.ptr((int)i, j));
|
||||||
|
if (param.swapRB)
|
||||||
|
std::swap(ch[0], ch[2]);
|
||||||
|
split(image, ch);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
CV_Assert(nch == 1);
|
||||||
|
int sz[] = { (int)nimages, 1, image0.rows, image0.cols };
|
||||||
|
blob_.create(4, sz, param.ddepth);
|
||||||
|
Mat blob = blob_.getMat();
|
||||||
|
|
||||||
|
for (size_t i = 0; i < nimages; i++)
|
||||||
|
{
|
||||||
|
const Mat& image = images[i];
|
||||||
|
CV_Assert(image.depth() == blob_.depth());
|
||||||
|
nch = image.channels();
|
||||||
|
CV_Assert(image.dims == 2 && (nch == 1));
|
||||||
|
CV_Assert(image.size() == image0.size());
|
||||||
|
|
||||||
|
image.copyTo(Mat(image.rows, image.cols, param.ddepth, blob.ptr((int)i, 0)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (param.datalayout == DNN_LAYOUT_NHWC)
|
||||||
|
{
|
||||||
|
int sz[] = { (int)nimages, image0.rows, image0.cols, nch};
|
||||||
|
blob_.create(4, sz, param.ddepth);
|
||||||
|
Mat blob = blob_.getMat();
|
||||||
|
int subMatType = CV_MAKETYPE(param.ddepth, nch);
|
||||||
for (size_t i = 0; i < nimages; i++)
|
for (size_t i = 0; i < nimages; i++)
|
||||||
{
|
{
|
||||||
const Mat& image = images[i];
|
const Mat& image = images[i];
|
||||||
CV_Assert(image.depth() == blob_.depth());
|
CV_Assert(image.depth() == blob_.depth());
|
||||||
nch = image.channels();
|
CV_Assert(image.channels() == image0.channels());
|
||||||
CV_Assert(image.dims == 2 && (nch == 3 || nch == 4));
|
|
||||||
CV_Assert(image.size() == image0.size());
|
CV_Assert(image.size() == image0.size());
|
||||||
|
if (param.swapRB)
|
||||||
for (int j = 0; j < nch; j++)
|
{
|
||||||
ch[j] = Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, j));
|
Mat tmpRB;
|
||||||
if (swapRB)
|
cvtColor(image, tmpRB, COLOR_BGR2RGB);
|
||||||
std::swap(ch[0], ch[2]);
|
tmpRB.copyTo(Mat(tmpRB.rows, tmpRB.cols, subMatType, blob.ptr((int)i, 0)));
|
||||||
split(image, ch);
|
}
|
||||||
|
else
|
||||||
|
image.copyTo(Mat(image.rows, image.cols, subMatType, blob.ptr((int)i, 0)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
CV_Error(Error::StsUnsupportedFormat, "Unsupported data layout in blobFromImagesWithParams function.");
|
||||||
CV_Assert(nch == 1);
|
|
||||||
int sz[] = { (int)nimages, 1, image0.rows, image0.cols };
|
|
||||||
blob_.create(4, sz, ddepth);
|
|
||||||
Mat blob = blob_.getMat();
|
|
||||||
|
|
||||||
for (size_t i = 0; i < nimages; i++)
|
|
||||||
{
|
|
||||||
const Mat& image = images[i];
|
|
||||||
CV_Assert(image.depth() == blob_.depth());
|
|
||||||
nch = image.channels();
|
|
||||||
CV_Assert(image.dims == 2 && (nch == 1));
|
|
||||||
CV_Assert(image.size() == image0.size());
|
|
||||||
|
|
||||||
image.copyTo(Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, 0)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_)
|
void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_)
|
||||||
|
@ -64,16 +64,6 @@ static int toNCDHW(int idx)
|
|||||||
else return (5 + idx) % 4 + 1;
|
else return (5 + idx) % 4 + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// This values are used to indicate layer output's data layout where it's possible.
|
|
||||||
enum DataLayout
|
|
||||||
{
|
|
||||||
DATA_LAYOUT_NHWC,
|
|
||||||
DATA_LAYOUT_NCHW,
|
|
||||||
DATA_LAYOUT_NDHWC,
|
|
||||||
DATA_LAYOUT_UNKNOWN,
|
|
||||||
DATA_LAYOUT_PLANAR // 2-dimensional outputs (matmul, flatten, reshape to 2d)
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef std::vector<std::pair<String, int> > StrIntVector;
|
typedef std::vector<std::pair<String, int> > StrIntVector;
|
||||||
|
|
||||||
struct Pin
|
struct Pin
|
||||||
@ -276,15 +266,15 @@ static DataLayout getDataLayout(const tensorflow::NodeDef& layer)
|
|||||||
{
|
{
|
||||||
std::string format = getLayerAttr(layer, "data_format").s();
|
std::string format = getLayerAttr(layer, "data_format").s();
|
||||||
if (format == "NHWC" || format == "channels_last")
|
if (format == "NHWC" || format == "channels_last")
|
||||||
return DATA_LAYOUT_NHWC;
|
return DNN_LAYOUT_NHWC;
|
||||||
else if (format == "NCHW" || format == "channels_first")
|
else if (format == "NCHW" || format == "channels_first")
|
||||||
return DATA_LAYOUT_NCHW;
|
return DNN_LAYOUT_NCHW;
|
||||||
else if (format == "NDHWC")
|
else if (format == "NDHWC")
|
||||||
return DATA_LAYOUT_NDHWC;
|
return DNN_LAYOUT_NDHWC;
|
||||||
else
|
else
|
||||||
CV_Error(Error::StsParseError, "Unknown data_format value: " + format);
|
CV_Error(Error::StsParseError, "Unknown data_format value: " + format);
|
||||||
}
|
}
|
||||||
return DATA_LAYOUT_UNKNOWN;
|
return DNN_LAYOUT_UNKNOWN;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline std::string getNodeName(const std::string& tensorName)
|
static inline std::string getNodeName(const std::string& tensorName)
|
||||||
@ -299,7 +289,7 @@ DataLayout getDataLayout(
|
|||||||
)
|
)
|
||||||
{
|
{
|
||||||
std::map<String, DataLayout>::const_iterator it = data_layouts.find(getNodeName(layerName));
|
std::map<String, DataLayout>::const_iterator it = data_layouts.find(getNodeName(layerName));
|
||||||
return it != data_layouts.end() ? it->second : DATA_LAYOUT_UNKNOWN;
|
return it != data_layouts.end() ? it->second : DNN_LAYOUT_UNKNOWN;
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
@ -325,11 +315,11 @@ void setStrides(LayerParams &layerParams, const tensorflow::NodeDef &layer)
|
|||||||
const tensorflow::AttrValue& val = getLayerAttr(layer, "strides");
|
const tensorflow::AttrValue& val = getLayerAttr(layer, "strides");
|
||||||
int dimX, dimY, dimC, dimD;
|
int dimX, dimY, dimC, dimD;
|
||||||
int layout = getDataLayout(layer);
|
int layout = getDataLayout(layer);
|
||||||
if (layout == DATA_LAYOUT_NCHW)
|
if (layout == DNN_LAYOUT_NCHW)
|
||||||
{
|
{
|
||||||
dimC = 1; dimY = 2; dimX = 3;
|
dimC = 1; dimY = 2; dimX = 3;
|
||||||
}
|
}
|
||||||
else if (layout == DATA_LAYOUT_NDHWC)
|
else if (layout == DNN_LAYOUT_NDHWC)
|
||||||
{
|
{
|
||||||
dimD = 1; dimY = 2; dimX = 3; dimC = 4;
|
dimD = 1; dimY = 2; dimX = 3; dimC = 4;
|
||||||
}
|
}
|
||||||
@ -340,7 +330,7 @@ void setStrides(LayerParams &layerParams, const tensorflow::NodeDef &layer)
|
|||||||
if (!(val.list().i_size() == 4 || val.list().i_size() == 5) ||
|
if (!(val.list().i_size() == 4 || val.list().i_size() == 5) ||
|
||||||
val.list().i(0) != 1 || val.list().i(dimC) != 1)
|
val.list().i(0) != 1 || val.list().i(dimC) != 1)
|
||||||
CV_Error(Error::StsError, "Unsupported strides");
|
CV_Error(Error::StsError, "Unsupported strides");
|
||||||
if (layout == DATA_LAYOUT_NDHWC) {
|
if (layout == DNN_LAYOUT_NDHWC) {
|
||||||
int strides[] = {static_cast<int>(val.list().i(dimD)),
|
int strides[] = {static_cast<int>(val.list().i(dimD)),
|
||||||
static_cast<int>(val.list().i(dimY)),
|
static_cast<int>(val.list().i(dimY)),
|
||||||
static_cast<int>(val.list().i(dimX))};
|
static_cast<int>(val.list().i(dimX))};
|
||||||
@ -375,11 +365,11 @@ void setKSize(LayerParams &layerParams, const tensorflow::NodeDef &layer)
|
|||||||
const tensorflow::AttrValue& val = getLayerAttr(layer, "ksize");
|
const tensorflow::AttrValue& val = getLayerAttr(layer, "ksize");
|
||||||
int dimX, dimY, dimC, dimD;
|
int dimX, dimY, dimC, dimD;
|
||||||
int layout = getDataLayout(layer);
|
int layout = getDataLayout(layer);
|
||||||
if (layout == DATA_LAYOUT_NCHW)
|
if (layout == DNN_LAYOUT_NCHW)
|
||||||
{
|
{
|
||||||
dimC = 1; dimY = 2; dimX = 3;
|
dimC = 1; dimY = 2; dimX = 3;
|
||||||
}
|
}
|
||||||
else if (layout == DATA_LAYOUT_NDHWC)
|
else if (layout == DNN_LAYOUT_NDHWC)
|
||||||
{
|
{
|
||||||
dimD = 1; dimY = 2; dimX = 3; dimC = 4;
|
dimD = 1; dimY = 2; dimX = 3; dimC = 4;
|
||||||
}
|
}
|
||||||
@ -391,7 +381,7 @@ void setKSize(LayerParams &layerParams, const tensorflow::NodeDef &layer)
|
|||||||
val.list().i(0) != 1 || val.list().i(dimC) != 1)
|
val.list().i(0) != 1 || val.list().i(dimC) != 1)
|
||||||
CV_Error(Error::StsError, "Unsupported ksize");
|
CV_Error(Error::StsError, "Unsupported ksize");
|
||||||
|
|
||||||
if (layout == DATA_LAYOUT_NDHWC) {
|
if (layout == DNN_LAYOUT_NDHWC) {
|
||||||
int kernel[] = {static_cast<int>(val.list().i(dimD)),
|
int kernel[] = {static_cast<int>(val.list().i(dimD)),
|
||||||
static_cast<int>(val.list().i(dimY)),
|
static_cast<int>(val.list().i(dimY)),
|
||||||
static_cast<int>(val.list().i(dimX))};
|
static_cast<int>(val.list().i(dimX))};
|
||||||
@ -438,7 +428,7 @@ bool getExplicitPadding(LayerParams &layerParams, const tensorflow::NodeDef &lay
|
|||||||
pads[i] = protoPads.list().i(i);
|
pads[i] = protoPads.list().i(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (getDataLayout(layer) != DATA_LAYOUT_NCHW)
|
if (getDataLayout(layer) != DNN_LAYOUT_NCHW)
|
||||||
{
|
{
|
||||||
CV_LOG_DEBUG(NULL, "DNN/TF: Data format " << getLayerAttr(layer, "data_format").s() << ", assuming NHWC.");
|
CV_LOG_DEBUG(NULL, "DNN/TF: Data format " << getLayerAttr(layer, "data_format").s() << ", assuming NHWC.");
|
||||||
// Perhaps, we have NHWC padding dimensions order.
|
// Perhaps, we have NHWC padding dimensions order.
|
||||||
@ -903,8 +893,8 @@ void TFImporter::parseConvolution(tensorflow::GraphDef& net, const tensorflow::N
|
|||||||
connect(layer_id, dstNet, parsePin(input), id, 0);
|
connect(layer_id, dstNet, parsePin(input), id, 0);
|
||||||
|
|
||||||
|
|
||||||
if (getDataLayout(name, data_layouts) == DATA_LAYOUT_UNKNOWN)
|
if (getDataLayout(name, data_layouts) == DNN_LAYOUT_UNKNOWN)
|
||||||
data_layouts[name] = DATA_LAYOUT_NHWC;
|
data_layouts[name] = DNN_LAYOUT_NHWC;
|
||||||
}
|
}
|
||||||
|
|
||||||
// "BiasAdd" "Add" "AddV2" "Sub" "AddN"
|
// "BiasAdd" "Add" "AddV2" "Sub" "AddN"
|
||||||
@ -1072,7 +1062,7 @@ void TFImporter::parseMatMul(tensorflow::GraphDef& net, const tensorflow::NodeDe
|
|||||||
// one input only
|
// one input only
|
||||||
int input_blob_index = kernel_blob_index == 0 ? 1 : 0;
|
int input_blob_index = kernel_blob_index == 0 ? 1 : 0;
|
||||||
connect(layer_id, dstNet, parsePin(layer.input(input_blob_index)), id, 0);
|
connect(layer_id, dstNet, parsePin(layer.input(input_blob_index)), id, 0);
|
||||||
data_layouts[name] = DATA_LAYOUT_PLANAR;
|
data_layouts[name] = DNN_LAYOUT_PLANAR;
|
||||||
}
|
}
|
||||||
|
|
||||||
void TFImporter::parseReshape(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams)
|
void TFImporter::parseReshape(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams)
|
||||||
@ -1100,7 +1090,7 @@ void TFImporter::parseReshape(tensorflow::GraphDef& net, const tensorflow::NodeD
|
|||||||
|
|
||||||
bool changedType{false};
|
bool changedType{false};
|
||||||
|
|
||||||
if (inpLayout == DATA_LAYOUT_NHWC)
|
if (inpLayout == DNN_LAYOUT_NHWC)
|
||||||
{
|
{
|
||||||
if (newShapeSize >= 2 || newShape.at<int>(1) == 1)
|
if (newShapeSize >= 2 || newShape.at<int>(1) == 1)
|
||||||
{
|
{
|
||||||
@ -1108,11 +1098,11 @@ void TFImporter::parseReshape(tensorflow::GraphDef& net, const tensorflow::NodeD
|
|||||||
addPermuteLayer(order, name + "/nhwc", inpId);
|
addPermuteLayer(order, name + "/nhwc", inpId);
|
||||||
if (newShapeSize < 4)
|
if (newShapeSize < 4)
|
||||||
{
|
{
|
||||||
inpLayout = DATA_LAYOUT_NCHW;
|
inpLayout = DNN_LAYOUT_NCHW;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
inpLayout = DATA_LAYOUT_NHWC;
|
inpLayout = DNN_LAYOUT_NHWC;
|
||||||
changedType = newShapeSize == 4 && !hasSwap;
|
changedType = newShapeSize == 4 && !hasSwap;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1128,17 +1118,17 @@ void TFImporter::parseReshape(tensorflow::GraphDef& net, const tensorflow::NodeD
|
|||||||
connect(layer_id, dstNet, inpId, id, 0);
|
connect(layer_id, dstNet, inpId, id, 0);
|
||||||
inpId = Pin(setName);
|
inpId = Pin(setName);
|
||||||
|
|
||||||
if ((inpLayout == DATA_LAYOUT_NHWC || inpLayout == DATA_LAYOUT_UNKNOWN || inpLayout == DATA_LAYOUT_PLANAR) &&
|
if ((inpLayout == DNN_LAYOUT_NHWC || inpLayout == DNN_LAYOUT_UNKNOWN || inpLayout == DNN_LAYOUT_PLANAR) &&
|
||||||
newShapeSize == 4 && !hasSwap)
|
newShapeSize == 4 && !hasSwap)
|
||||||
{
|
{
|
||||||
int order[] = {0, 3, 1, 2}; // Transform back to OpenCV's NCHW.
|
int order[] = {0, 3, 1, 2}; // Transform back to OpenCV's NCHW.
|
||||||
|
|
||||||
setName = changedType ? name : name + "/nchw";
|
setName = changedType ? name : name + "/nchw";
|
||||||
addPermuteLayer(order, setName, inpId);
|
addPermuteLayer(order, setName, inpId);
|
||||||
inpLayout = DATA_LAYOUT_NCHW;
|
inpLayout = DNN_LAYOUT_NCHW;
|
||||||
}
|
}
|
||||||
|
|
||||||
data_layouts[name] = newShapeSize == 2 ? DATA_LAYOUT_PLANAR : inpLayout;
|
data_layouts[name] = newShapeSize == 2 ? DNN_LAYOUT_PLANAR : inpLayout;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -1206,13 +1196,13 @@ void TFImporter::parseExpandDims(tensorflow::GraphDef& net, const tensorflow::No
|
|||||||
addPermuteLayer(order, name + "/nhwc", inpId);
|
addPermuteLayer(order, name + "/nhwc", inpId);
|
||||||
|
|
||||||
// Convert shape From OpenCV's NCHW to NHWC.
|
// Convert shape From OpenCV's NCHW to NHWC.
|
||||||
if(inpLayout == DATA_LAYOUT_NHWC)
|
if(inpLayout == DNN_LAYOUT_NHWC)
|
||||||
{
|
{
|
||||||
std::swap(outShape[1], outShape[2]);
|
std::swap(outShape[1], outShape[2]);
|
||||||
std::swap(outShape[2], outShape[3]);
|
std::swap(outShape[2], outShape[3]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(inpLayout == DATA_LAYOUT_NHWC || inpLayout == DATA_LAYOUT_NCHW)
|
if(inpLayout == DNN_LAYOUT_NHWC || inpLayout == DNN_LAYOUT_NCHW)
|
||||||
{
|
{
|
||||||
// toNCHW
|
// toNCHW
|
||||||
axis = (axis != 0)?(axis % outShapeSize + 1):0;
|
axis = (axis != 0)?(axis % outShapeSize + 1):0;
|
||||||
@ -1221,13 +1211,13 @@ void TFImporter::parseExpandDims(tensorflow::GraphDef& net, const tensorflow::No
|
|||||||
|
|
||||||
// After ExpendDims, 5-dim data will become 6-dim data, and OpenCV retains 6-dim data as original data layout.
|
// After ExpendDims, 5-dim data will become 6-dim data, and OpenCV retains 6-dim data as original data layout.
|
||||||
// Convert OpenCV's NCDHW to NDHWC first.
|
// Convert OpenCV's NCDHW to NDHWC first.
|
||||||
if (inpShape.size() == 5 && (inpLayout == DATA_LAYOUT_NDHWC || inpLayout == DATA_LAYOUT_UNKNOWN))
|
if (inpShape.size() == 5 && (inpLayout == DNN_LAYOUT_NDHWC || inpLayout == DNN_LAYOUT_UNKNOWN))
|
||||||
{
|
{
|
||||||
int order[] = {0, 2, 3, 4, 1}; // From OpenCV's NCDHW to NDHWC.
|
int order[] = {0, 2, 3, 4, 1}; // From OpenCV's NCDHW to NDHWC.
|
||||||
addPermuteLayer(order, name + "/ndhwc", inpId, 5);
|
addPermuteLayer(order, name + "/ndhwc", inpId, 5);
|
||||||
|
|
||||||
// Convert shape From OpenCV's NCDHW to NDHWC.
|
// Convert shape From OpenCV's NCDHW to NDHWC.
|
||||||
if(inpLayout == DATA_LAYOUT_NDHWC)
|
if(inpLayout == DNN_LAYOUT_NDHWC)
|
||||||
{
|
{
|
||||||
std::swap(outShape[1], outShape[2]);
|
std::swap(outShape[1], outShape[2]);
|
||||||
std::swap(outShape[2], outShape[3]);
|
std::swap(outShape[2], outShape[3]);
|
||||||
@ -1239,7 +1229,7 @@ void TFImporter::parseExpandDims(tensorflow::GraphDef& net, const tensorflow::No
|
|||||||
outShapeSize += 1;
|
outShapeSize += 1;
|
||||||
|
|
||||||
// From OpenCV's NCDHW to NDHWC.
|
// From OpenCV's NCDHW to NDHWC.
|
||||||
if((inpLayout != DATA_LAYOUT_NHWC && inpLayout != DATA_LAYOUT_NCHW) && outShapeSize == 5)
|
if((inpLayout != DNN_LAYOUT_NHWC && inpLayout != DNN_LAYOUT_NCHW) && outShapeSize == 5)
|
||||||
{
|
{
|
||||||
for(int i = 1; i < outShapeSize - 1; i++)
|
for(int i = 1; i < outShapeSize - 1; i++)
|
||||||
{
|
{
|
||||||
@ -1255,11 +1245,11 @@ void TFImporter::parseExpandDims(tensorflow::GraphDef& net, const tensorflow::No
|
|||||||
|
|
||||||
if(outShapeSize == 5)
|
if(outShapeSize == 5)
|
||||||
{
|
{
|
||||||
data_layouts[name] = DATA_LAYOUT_NDHWC;
|
data_layouts[name] = DNN_LAYOUT_NDHWC;
|
||||||
}
|
}
|
||||||
else if(outShapeSize == 4)
|
else if(outShapeSize == 4)
|
||||||
{
|
{
|
||||||
data_layouts[name] = DATA_LAYOUT_NCHW;
|
data_layouts[name] = DNN_LAYOUT_NCHW;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -1320,7 +1310,7 @@ void TFImporter::parseFlatten(tensorflow::GraphDef& net, const tensorflow::NodeD
|
|||||||
layerParams.set("axis", start);
|
layerParams.set("axis", start);
|
||||||
layerParams.set("end_axis", end);
|
layerParams.set("end_axis", end);
|
||||||
}
|
}
|
||||||
if (inpLayout == DATA_LAYOUT_NHWC)
|
if (inpLayout == DNN_LAYOUT_NHWC)
|
||||||
{
|
{
|
||||||
LayerParams permLP;
|
LayerParams permLP;
|
||||||
int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC.
|
int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC.
|
||||||
@ -1336,7 +1326,7 @@ void TFImporter::parseFlatten(tensorflow::GraphDef& net, const tensorflow::NodeD
|
|||||||
int id = dstNet.addLayer(name, "Flatten", layerParams);
|
int id = dstNet.addLayer(name, "Flatten", layerParams);
|
||||||
layer_id[name] = id;
|
layer_id[name] = id;
|
||||||
connect(layer_id, dstNet, inpId, id, 0);
|
connect(layer_id, dstNet, inpId, id, 0);
|
||||||
data_layouts[name] = DATA_LAYOUT_PLANAR;
|
data_layouts[name] = DNN_LAYOUT_PLANAR;
|
||||||
}
|
}
|
||||||
|
|
||||||
void TFImporter::parseTranspose(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams)
|
void TFImporter::parseTranspose(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams)
|
||||||
@ -1354,19 +1344,19 @@ void TFImporter::parseTranspose(tensorflow::GraphDef& net, const tensorflow::Nod
|
|||||||
// keep NCHW layout this way.
|
// keep NCHW layout this way.
|
||||||
int inpLayout = getDataLayout(layer.input(0), data_layouts);
|
int inpLayout = getDataLayout(layer.input(0), data_layouts);
|
||||||
std::string type = "Identity";
|
std::string type = "Identity";
|
||||||
if (inpLayout == DATA_LAYOUT_NHWC)
|
if (inpLayout == DNN_LAYOUT_NHWC)
|
||||||
{
|
{
|
||||||
if (permData[0] == 0 && permData[1] == 3 && permData[2] == 1 && permData[3] == 2)
|
if (permData[0] == 0 && permData[1] == 3 && permData[2] == 1 && permData[3] == 2)
|
||||||
{
|
{
|
||||||
// in TensorFlow: NHWC->NCHW
|
// in TensorFlow: NHWC->NCHW
|
||||||
// in OpenCV: NCHW->NCHW
|
// in OpenCV: NCHW->NCHW
|
||||||
data_layouts[name] = DATA_LAYOUT_NCHW;
|
data_layouts[name] = DNN_LAYOUT_NCHW;
|
||||||
}
|
}
|
||||||
else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3)
|
else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3)
|
||||||
{
|
{
|
||||||
// in TensorFlow: NHWC->NHWC
|
// in TensorFlow: NHWC->NHWC
|
||||||
// in OpenCV: NCHW->NCHW
|
// in OpenCV: NCHW->NCHW
|
||||||
data_layouts[name] = DATA_LAYOUT_NHWC;
|
data_layouts[name] = DNN_LAYOUT_NHWC;
|
||||||
}
|
}
|
||||||
else if (permData[0] == 0 && permData[1] == 3 && permData[2] == 2 && permData[3] == 1)
|
else if (permData[0] == 0 && permData[1] == 3 && permData[2] == 2 && permData[3] == 1)
|
||||||
{
|
{
|
||||||
@ -1374,25 +1364,25 @@ void TFImporter::parseTranspose(tensorflow::GraphDef& net, const tensorflow::Nod
|
|||||||
// in OpenCV: NCHW->NCWH
|
// in OpenCV: NCHW->NCWH
|
||||||
int permData[] = {0, 1, 3, 2};
|
int permData[] = {0, 1, 3, 2};
|
||||||
layerParams.set("order", DictValue::arrayInt<int*>(permData, perm.total()));
|
layerParams.set("order", DictValue::arrayInt<int*>(permData, perm.total()));
|
||||||
data_layouts[name] = DATA_LAYOUT_NCHW; // we keep track NCHW because channels position only matters
|
data_layouts[name] = DNN_LAYOUT_NCHW; // we keep track NCHW because channels position only matters
|
||||||
type = "Permute";
|
type = "Permute";
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed.");
|
CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed.");
|
||||||
}
|
}
|
||||||
else if (inpLayout == DATA_LAYOUT_NCHW)
|
else if (inpLayout == DNN_LAYOUT_NCHW)
|
||||||
{
|
{
|
||||||
if (permData[0] == 0 && permData[1] == 2 && permData[2] == 3 && permData[3] == 1)
|
if (permData[0] == 0 && permData[1] == 2 && permData[2] == 3 && permData[3] == 1)
|
||||||
{
|
{
|
||||||
// in TensorFlow: NCHW->NHWC
|
// in TensorFlow: NCHW->NHWC
|
||||||
// in OpenCV: NCHW->NCHW
|
// in OpenCV: NCHW->NCHW
|
||||||
data_layouts[name] = DATA_LAYOUT_NHWC;
|
data_layouts[name] = DNN_LAYOUT_NHWC;
|
||||||
}
|
}
|
||||||
else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3)
|
else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3)
|
||||||
{
|
{
|
||||||
// in TensorFlow: NCHW->NCHW
|
// in TensorFlow: NCHW->NCHW
|
||||||
// in OpenCV: NCHW->NCHW
|
// in OpenCV: NCHW->NCHW
|
||||||
data_layouts[name] = DATA_LAYOUT_NCHW;
|
data_layouts[name] = DNN_LAYOUT_NCHW;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed.");
|
CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed.");
|
||||||
@ -1410,7 +1400,7 @@ void TFImporter::parseTranspose(tensorflow::GraphDef& net, const tensorflow::Nod
|
|||||||
|
|
||||||
// one input only
|
// one input only
|
||||||
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
|
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
|
||||||
data_layouts[name] = DATA_LAYOUT_UNKNOWN;
|
data_layouts[name] = DNN_LAYOUT_UNKNOWN;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1456,9 +1446,9 @@ void TFImporter::parseConcat(tensorflow::GraphDef& net, const tensorflow::NodeDe
|
|||||||
int axisId = (type == "Concat" ? 0 : num_inputs - 1);
|
int axisId = (type == "Concat" ? 0 : num_inputs - 1);
|
||||||
int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0);
|
int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0);
|
||||||
|
|
||||||
if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)
|
if (getDataLayout(name, data_layouts) == DNN_LAYOUT_NHWC)
|
||||||
axis = toNCHW(axis);
|
axis = toNCHW(axis);
|
||||||
else if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NDHWC)
|
else if (getDataLayout(name, data_layouts) == DNN_LAYOUT_NDHWC)
|
||||||
axis = toNCDHW(axis);
|
axis = toNCDHW(axis);
|
||||||
layerParams.set("axis", axis);
|
layerParams.set("axis", axis);
|
||||||
|
|
||||||
@ -1585,7 +1575,7 @@ void TFImporter::parsePlaceholder(tensorflow::GraphDef& net, const tensorflow::N
|
|||||||
MatShape dims(shape.dim_size());
|
MatShape dims(shape.dim_size());
|
||||||
for (int i = 0; i < dims.size(); ++i)
|
for (int i = 0; i < dims.size(); ++i)
|
||||||
dims[i] = shape.dim(i).size();
|
dims[i] = shape.dim(i).size();
|
||||||
if (dims.size() == 4 && predictedLayout == DATA_LAYOUT_NHWC)
|
if (dims.size() == 4 && predictedLayout == DNN_LAYOUT_NHWC)
|
||||||
{
|
{
|
||||||
std::swap(dims[1], dims[3]); // NHWC->NCWH
|
std::swap(dims[1], dims[3]); // NHWC->NCWH
|
||||||
std::swap(dims[2], dims[3]); // NCWH->NCHW
|
std::swap(dims[2], dims[3]); // NCWH->NCHW
|
||||||
@ -1593,7 +1583,7 @@ void TFImporter::parsePlaceholder(tensorflow::GraphDef& net, const tensorflow::N
|
|||||||
dims[0] = 1;
|
dims[0] = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dims.size() == 5 && predictedLayout == DATA_LAYOUT_NDHWC)
|
if (dims.size() == 5 && predictedLayout == DNN_LAYOUT_NDHWC)
|
||||||
{
|
{
|
||||||
std::swap(dims[3], dims[4]); // NDHWC->NDHCW
|
std::swap(dims[3], dims[4]); // NDHWC->NDHCW
|
||||||
std::swap(dims[2], dims[3]); // NDHCW->NDCHW
|
std::swap(dims[2], dims[3]); // NDHCW->NDCHW
|
||||||
@ -1624,7 +1614,7 @@ void TFImporter::parseSplit(tensorflow::GraphDef& net, const tensorflow::NodeDef
|
|||||||
// num_split
|
// num_split
|
||||||
// 1st blob is dims tensor
|
// 1st blob is dims tensor
|
||||||
int axis = getConstBlob(layer, value_id, 0).int_val().Get(0);
|
int axis = getConstBlob(layer, value_id, 0).int_val().Get(0);
|
||||||
if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)
|
if (getDataLayout(name, data_layouts) == DNN_LAYOUT_NHWC)
|
||||||
axis = toNCHW(axis);
|
axis = toNCHW(axis);
|
||||||
layerParams.set("axis", axis);
|
layerParams.set("axis", axis);
|
||||||
|
|
||||||
@ -1654,7 +1644,7 @@ void TFImporter::parseSlice(tensorflow::GraphDef& net, const tensorflow::NodeDef
|
|||||||
CV_CheckTypeEQ(begins.type(), CV_32SC1, "");
|
CV_CheckTypeEQ(begins.type(), CV_32SC1, "");
|
||||||
CV_CheckTypeEQ(sizes.type(), CV_32SC1, "");
|
CV_CheckTypeEQ(sizes.type(), CV_32SC1, "");
|
||||||
|
|
||||||
if (begins.total() == 4 && getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)
|
if (begins.total() == 4 && getDataLayout(name, data_layouts) == DNN_LAYOUT_NHWC)
|
||||||
{
|
{
|
||||||
// Swap NHWC parameters' order to NCHW.
|
// Swap NHWC parameters' order to NCHW.
|
||||||
std::swap(*begins.ptr<int32_t>(0, 2), *begins.ptr<int32_t>(0, 3));
|
std::swap(*begins.ptr<int32_t>(0, 2), *begins.ptr<int32_t>(0, 3));
|
||||||
@ -1695,7 +1685,7 @@ void TFImporter::parseStridedSlice(tensorflow::GraphDef& net, const tensorflow::
|
|||||||
CV_Error(Error::StsNotImplemented,
|
CV_Error(Error::StsNotImplemented,
|
||||||
format("StridedSlice with stride %d", strides.at<int>(i)));
|
format("StridedSlice with stride %d", strides.at<int>(i)));
|
||||||
}
|
}
|
||||||
if (begins.total() == 4 && getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)
|
if (begins.total() == 4 && getDataLayout(name, data_layouts) == DNN_LAYOUT_NHWC)
|
||||||
{
|
{
|
||||||
// Swap NHWC parameters' order to NCHW.
|
// Swap NHWC parameters' order to NCHW.
|
||||||
std::swap(begins.at<int>(2), begins.at<int>(3));
|
std::swap(begins.at<int>(2), begins.at<int>(3));
|
||||||
@ -2029,7 +2019,7 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso
|
|||||||
const int strideY = layerParams.get<int>("stride_h");
|
const int strideY = layerParams.get<int>("stride_h");
|
||||||
const int strideX = layerParams.get<int>("stride_w");
|
const int strideX = layerParams.get<int>("stride_w");
|
||||||
Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0));
|
Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0));
|
||||||
int shift = (getDataLayout(layer) == DATA_LAYOUT_NCHW);
|
int shift = (getDataLayout(layer) == DNN_LAYOUT_NCHW);
|
||||||
const int outH = outShape.at<int>(1 + shift) + begs[2] - ends[2];
|
const int outH = outShape.at<int>(1 + shift) + begs[2] - ends[2];
|
||||||
const int outW = outShape.at<int>(2 + shift) + begs[3] - ends[3];
|
const int outW = outShape.at<int>(2 + shift) + begs[3] - ends[3];
|
||||||
if (layerParams.get<String>("pad_mode") == "SAME")
|
if (layerParams.get<String>("pad_mode") == "SAME")
|
||||||
@ -2141,7 +2131,7 @@ void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::Nod
|
|||||||
|
|
||||||
// one input only
|
// one input only
|
||||||
connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
|
connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
|
||||||
data_layouts[name] = DATA_LAYOUT_UNKNOWN;
|
data_layouts[name] = DNN_LAYOUT_UNKNOWN;
|
||||||
}
|
}
|
||||||
|
|
||||||
// "ResizeNearestNeighbor" "ResizeBilinear" "FusedResizeAndPadConv2D"
|
// "ResizeNearestNeighbor" "ResizeBilinear" "FusedResizeAndPadConv2D"
|
||||||
@ -2239,7 +2229,7 @@ void TFImporter::parseL2Normalize(tensorflow::GraphDef& net, const tensorflow::N
|
|||||||
CV_Assert(reductionIndices.type() == CV_32SC1);
|
CV_Assert(reductionIndices.type() == CV_32SC1);
|
||||||
|
|
||||||
const int numAxes = reductionIndices.total();
|
const int numAxes = reductionIndices.total();
|
||||||
if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC)
|
if (getDataLayout(name, data_layouts) == DNN_LAYOUT_NHWC)
|
||||||
for (int i = 0; i < numAxes; ++i)
|
for (int i = 0; i < numAxes; ++i)
|
||||||
reductionIndices.at<int>(i) = toNCHW(reductionIndices.at<int>(i));
|
reductionIndices.at<int>(i) = toNCHW(reductionIndices.at<int>(i));
|
||||||
|
|
||||||
@ -2292,7 +2282,7 @@ void TFImporter::parsePriorBox(tensorflow::GraphDef& net, const tensorflow::Node
|
|||||||
layer_id[name] = id;
|
layer_id[name] = id;
|
||||||
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
|
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
|
||||||
connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1);
|
connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1);
|
||||||
data_layouts[name] = DATA_LAYOUT_UNKNOWN;
|
data_layouts[name] = DNN_LAYOUT_UNKNOWN;
|
||||||
}
|
}
|
||||||
|
|
||||||
void TFImporter::parseSoftmax(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams)
|
void TFImporter::parseSoftmax(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams)
|
||||||
@ -2417,7 +2407,7 @@ void TFImporter::parseMean(tensorflow::GraphDef& net, const tensorflow::NodeDef&
|
|||||||
|
|
||||||
if (!keepDims)
|
if (!keepDims)
|
||||||
{
|
{
|
||||||
if (layout == DATA_LAYOUT_NHWC)
|
if (layout == DNN_LAYOUT_NHWC)
|
||||||
{
|
{
|
||||||
LayerParams permLP;
|
LayerParams permLP;
|
||||||
int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC.
|
int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC.
|
||||||
@ -2539,7 +2529,7 @@ void TFImporter::parseMean(tensorflow::GraphDef& net, const tensorflow::NodeDef&
|
|||||||
int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp);
|
int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp);
|
||||||
layer_id[flattenName] = flattenId;
|
layer_id[flattenName] = flattenId;
|
||||||
connect(layer_id, dstNet, Pin(poolingName), flattenId, 0);
|
connect(layer_id, dstNet, Pin(poolingName), flattenId, 0);
|
||||||
data_layouts[name] = DATA_LAYOUT_PLANAR;
|
data_layouts[name] = DNN_LAYOUT_PLANAR;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2562,7 +2552,7 @@ void TFImporter::parsePack(tensorflow::GraphDef& net, const tensorflow::NodeDef&
|
|||||||
if (dim != 0)
|
if (dim != 0)
|
||||||
CV_Error(Error::StsNotImplemented, "Unsupported mode of pack operation.");
|
CV_Error(Error::StsNotImplemented, "Unsupported mode of pack operation.");
|
||||||
|
|
||||||
data_layouts[name] = DATA_LAYOUT_UNKNOWN;
|
data_layouts[name] = DNN_LAYOUT_UNKNOWN;
|
||||||
|
|
||||||
CV_Assert(hasLayerAttr(layer, "N"));
|
CV_Assert(hasLayerAttr(layer, "N"));
|
||||||
int num = (int)getLayerAttr(layer, "N").i();
|
int num = (int)getLayerAttr(layer, "N").i();
|
||||||
@ -2959,11 +2949,11 @@ static void addConstNodes(tensorflow::GraphDef& net, std::map<String, int>& cons
|
|||||||
}
|
}
|
||||||
|
|
||||||
// If all inputs of specific layer have the same data layout we can say that
|
// If all inputs of specific layer have the same data layout we can say that
|
||||||
// this layer's output has this data layout too. Returns DATA_LAYOUT_UNKNOWN otherwise.
|
// this layer's output has this data layout too. Returns DNN_LAYOUT_UNKNOWN otherwise.
|
||||||
DataLayout TFImporter::predictOutputDataLayout(const tensorflow::NodeDef& layer)
|
DataLayout TFImporter::predictOutputDataLayout(const tensorflow::NodeDef& layer)
|
||||||
{
|
{
|
||||||
DataLayout layout = getDataLayout(layer);
|
DataLayout layout = getDataLayout(layer);
|
||||||
if (layout != DATA_LAYOUT_UNKNOWN)
|
if (layout != DNN_LAYOUT_UNKNOWN)
|
||||||
{
|
{
|
||||||
CV_LOG_DEBUG(NULL, "DNN/TF: predictOutputDataLayout(" << layer.name() << " @ " << layer.op() << ") => " << (int)layout << " (from attrs)");
|
CV_LOG_DEBUG(NULL, "DNN/TF: predictOutputDataLayout(" << layer.name() << " @ " << layer.op() << ") => " << (int)layout << " (from attrs)");
|
||||||
return layout;
|
return layout;
|
||||||
@ -2975,17 +2965,17 @@ DataLayout TFImporter::predictOutputDataLayout(const tensorflow::NodeDef& layer)
|
|||||||
std::map<String, DataLayout>::const_iterator it = data_layouts.find(getNodeName(layer.input(i)));
|
std::map<String, DataLayout>::const_iterator it = data_layouts.find(getNodeName(layer.input(i)));
|
||||||
if (it != data_layouts.end())
|
if (it != data_layouts.end())
|
||||||
{
|
{
|
||||||
if (layout != DATA_LAYOUT_UNKNOWN)
|
if (layout != DNN_LAYOUT_UNKNOWN)
|
||||||
{
|
{
|
||||||
if (it->second != layout && it->second != DATA_LAYOUT_UNKNOWN)
|
if (it->second != layout && it->second != DNN_LAYOUT_UNKNOWN)
|
||||||
return DATA_LAYOUT_UNKNOWN;
|
return DNN_LAYOUT_UNKNOWN;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
layout = it->second;
|
layout = it->second;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (layout != DATA_LAYOUT_UNKNOWN)
|
if (layout != DNN_LAYOUT_UNKNOWN)
|
||||||
{
|
{
|
||||||
CV_LOG_DEBUG(NULL, "DNN/TF: predictOutputDataLayout(" << layer.name() << " @ " << layer.op() << ") => " << (int)layout << " (from inputs)");
|
CV_LOG_DEBUG(NULL, "DNN/TF: predictOutputDataLayout(" << layer.name() << " @ " << layer.op() << ") => " << (int)layout << " (from inputs)");
|
||||||
return layout;
|
return layout;
|
||||||
@ -3061,14 +3051,14 @@ void TFImporter::populateNet()
|
|||||||
std::map<String, DataLayout>::iterator it = data_layouts.find(name);
|
std::map<String, DataLayout>::iterator it = data_layouts.find(name);
|
||||||
if (it != data_layouts.end())
|
if (it != data_layouts.end())
|
||||||
{
|
{
|
||||||
if (layout != DATA_LAYOUT_UNKNOWN)
|
if (layout != DNN_LAYOUT_UNKNOWN)
|
||||||
{
|
{
|
||||||
if (it->second == DATA_LAYOUT_UNKNOWN)
|
if (it->second == DNN_LAYOUT_UNKNOWN)
|
||||||
it->second = layout;
|
it->second = layout;
|
||||||
else if (it->second != layout)
|
else if (it->second != layout)
|
||||||
{
|
{
|
||||||
it->second = DATA_LAYOUT_UNKNOWN;
|
it->second = DNN_LAYOUT_UNKNOWN;
|
||||||
layout = DATA_LAYOUT_UNKNOWN;
|
layout = DNN_LAYOUT_UNKNOWN;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -3084,12 +3074,12 @@ void TFImporter::populateNet()
|
|||||||
it = data_layouts.find(name);
|
it = data_layouts.find(name);
|
||||||
if (it != data_layouts.end())
|
if (it != data_layouts.end())
|
||||||
{
|
{
|
||||||
if (layout != DATA_LAYOUT_UNKNOWN)
|
if (layout != DNN_LAYOUT_UNKNOWN)
|
||||||
{
|
{
|
||||||
if (it->second == DATA_LAYOUT_UNKNOWN)
|
if (it->second == DNN_LAYOUT_UNKNOWN)
|
||||||
it->second = layout;
|
it->second = layout;
|
||||||
else if (it->second != layout)
|
else if (it->second != layout)
|
||||||
it->second = DATA_LAYOUT_UNKNOWN;
|
it->second = DNN_LAYOUT_UNKNOWN;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -22,18 +22,6 @@ CV__DNN_INLINE_NS_BEGIN
|
|||||||
|
|
||||||
using namespace opencv_tflite;
|
using namespace opencv_tflite;
|
||||||
|
|
||||||
// This values are used to indicate layer output's data layout where it's possible.
|
|
||||||
// Approach is similar to TensorFlow importer but TFLite models do not have explicit
|
|
||||||
// layout field "data_format". So we consider that all 4D inputs are in NHWC data layout.
|
|
||||||
enum DataLayout
|
|
||||||
{
|
|
||||||
DATA_LAYOUT_NHWC,
|
|
||||||
DATA_LAYOUT_NCHW,
|
|
||||||
DATA_LAYOUT_NDHWC,
|
|
||||||
DATA_LAYOUT_UNKNOWN,
|
|
||||||
DATA_LAYOUT_PLANAR // 2-dimensional outputs (matmul, flatten, reshape to 2d)
|
|
||||||
};
|
|
||||||
|
|
||||||
class TFLiteImporter {
|
class TFLiteImporter {
|
||||||
public:
|
public:
|
||||||
TFLiteImporter(Net& net, const char* modelBuffer, size_t bufSize);
|
TFLiteImporter(Net& net, const char* modelBuffer, size_t bufSize);
|
||||||
@ -139,10 +127,10 @@ DataLayout estimateLayout(const Tensor& t)
|
|||||||
const auto t_shape = t.shape();
|
const auto t_shape = t.shape();
|
||||||
CV_Assert(t_shape);
|
CV_Assert(t_shape);
|
||||||
switch (t_shape->size()) {
|
switch (t_shape->size()) {
|
||||||
case 5: return DATA_LAYOUT_NDHWC;
|
case 5: return DNN_LAYOUT_NDHWC;
|
||||||
case 4: return DATA_LAYOUT_NHWC;
|
case 4: return DNN_LAYOUT_NHWC;
|
||||||
case 2: return DATA_LAYOUT_PLANAR;
|
case 2: return DNN_LAYOUT_PLANAR;
|
||||||
default: return DATA_LAYOUT_UNKNOWN;
|
default: return DNN_LAYOUT_UNKNOWN;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -161,7 +149,7 @@ void TFLiteImporter::populateNet()
|
|||||||
CV_Assert(opCodes);
|
CV_Assert(opCodes);
|
||||||
|
|
||||||
CV_Assert(modelTensors);
|
CV_Assert(modelTensors);
|
||||||
layouts.resize(modelTensors->size(), DATA_LAYOUT_UNKNOWN);
|
layouts.resize(modelTensors->size(), DNN_LAYOUT_UNKNOWN);
|
||||||
size_t subgraph_inputs_size = subgraph_inputs->size();
|
size_t subgraph_inputs_size = subgraph_inputs->size();
|
||||||
std::vector<std::string> inputsNames(subgraph_inputs_size);
|
std::vector<std::string> inputsNames(subgraph_inputs_size);
|
||||||
std::vector<MatShape> inputsShapes(subgraph_inputs_size);
|
std::vector<MatShape> inputsShapes(subgraph_inputs_size);
|
||||||
@ -177,7 +165,7 @@ void TFLiteImporter::populateNet()
|
|||||||
// Keep info about origin inputs names and shapes
|
// Keep info about origin inputs names and shapes
|
||||||
inputsNames[i] = tensor->name()->str();
|
inputsNames[i] = tensor->name()->str();
|
||||||
std::vector<int> shape(tensor->shape()->begin(), tensor->shape()->end());
|
std::vector<int> shape(tensor->shape()->begin(), tensor->shape()->end());
|
||||||
if (layouts[idx] == DATA_LAYOUT_NHWC) {
|
if (layouts[idx] == DNN_LAYOUT_NHWC) {
|
||||||
CV_CheckEQ(shape.size(), (size_t)4, "");
|
CV_CheckEQ(shape.size(), (size_t)4, "");
|
||||||
std::swap(shape[2], shape[3]);
|
std::swap(shape[2], shape[3]);
|
||||||
std::swap(shape[1], shape[2]);
|
std::swap(shape[1], shape[2]);
|
||||||
@ -257,14 +245,14 @@ void TFLiteImporter::populateNet()
|
|||||||
|
|
||||||
// Predict output layout. Some layer-specific parsers may set them explicitly.
|
// Predict output layout. Some layer-specific parsers may set them explicitly.
|
||||||
// Otherwise, propagate input layout.
|
// Otherwise, propagate input layout.
|
||||||
if (layouts[op_outputs->Get(0)] == DATA_LAYOUT_UNKNOWN) {
|
if (layouts[op_outputs->Get(0)] == DNN_LAYOUT_UNKNOWN) {
|
||||||
DataLayout predictedLayout = DATA_LAYOUT_UNKNOWN;
|
DataLayout predictedLayout = DNN_LAYOUT_UNKNOWN;
|
||||||
for (auto layout : inpLayouts) {
|
for (auto layout : inpLayouts) {
|
||||||
if (layout != DATA_LAYOUT_UNKNOWN) {
|
if (layout != DNN_LAYOUT_UNKNOWN) {
|
||||||
if (predictedLayout == DATA_LAYOUT_UNKNOWN)
|
if (predictedLayout == DNN_LAYOUT_UNKNOWN)
|
||||||
predictedLayout = layout;
|
predictedLayout = layout;
|
||||||
else if (predictedLayout != layout) {
|
else if (predictedLayout != layout) {
|
||||||
predictedLayout = DATA_LAYOUT_UNKNOWN;
|
predictedLayout = DNN_LAYOUT_UNKNOWN;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -491,11 +479,11 @@ void TFLiteImporter::parseUnpooling(const Operator& op, const std::string& opcod
|
|||||||
void TFLiteImporter::parseReshape(const Operator& op, const std::string& opcode, LayerParams& layerParams) {
|
void TFLiteImporter::parseReshape(const Operator& op, const std::string& opcode, LayerParams& layerParams) {
|
||||||
DataLayout inpLayout = layouts[op.inputs()->Get(0)];
|
DataLayout inpLayout = layouts[op.inputs()->Get(0)];
|
||||||
|
|
||||||
if (inpLayout == DATA_LAYOUT_NHWC) {
|
if (inpLayout == DNN_LAYOUT_NHWC) {
|
||||||
// Permute to NCHW
|
// Permute to NCHW
|
||||||
int permId = addPermuteLayer({0, 2, 3, 1}, layerParams.name + "/permute", layerIds[op.inputs()->Get(0)]); // NCHW -> NHWC
|
int permId = addPermuteLayer({0, 2, 3, 1}, layerParams.name + "/permute", layerIds[op.inputs()->Get(0)]); // NCHW -> NHWC
|
||||||
layerIds[op.inputs()->Get(0)] = std::make_pair(permId, 0);
|
layerIds[op.inputs()->Get(0)] = std::make_pair(permId, 0);
|
||||||
layouts[op.outputs()->Get(0)] = DATA_LAYOUT_NCHW;
|
layouts[op.outputs()->Get(0)] = DNN_LAYOUT_NCHW;
|
||||||
}
|
}
|
||||||
|
|
||||||
layerParams.type = "Reshape";
|
layerParams.type = "Reshape";
|
||||||
@ -514,7 +502,7 @@ void TFLiteImporter::parseConcat(const Operator& op, const std::string& opcode,
|
|||||||
int axis = options->axis();
|
int axis = options->axis();
|
||||||
|
|
||||||
DataLayout inpLayout = layouts[op.inputs()->Get(0)];
|
DataLayout inpLayout = layouts[op.inputs()->Get(0)];
|
||||||
if (inpLayout == DATA_LAYOUT_NHWC) {
|
if (inpLayout == DNN_LAYOUT_NHWC) {
|
||||||
// OpenCV works in NCHW data layout. So change the axis correspondingly.
|
// OpenCV works in NCHW data layout. So change the axis correspondingly.
|
||||||
axis = normalize_axis(axis, 4);
|
axis = normalize_axis(axis, 4);
|
||||||
static const int remap[] = {0, 2, 3, 1};
|
static const int remap[] = {0, 2, 3, 1};
|
||||||
|
@ -63,6 +63,63 @@ TEST(imagesFromBlob, Regression)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(blobFromImageWithParams_4ch, NHWC_scalar_scale)
|
||||||
|
{
|
||||||
|
Mat img(10, 10, CV_8UC4, cv::Scalar(0,1,2,3));
|
||||||
|
std::vector<double> factorVec = {0.1, 0.2, 0.3, 0.4};
|
||||||
|
|
||||||
|
Scalar scalefactor(factorVec[0], factorVec[1], factorVec[2], factorVec[3]);
|
||||||
|
|
||||||
|
Image2BlobParams param;
|
||||||
|
param.scalefactor = scalefactor;
|
||||||
|
param.datalayout = DNN_LAYOUT_NHWC;
|
||||||
|
Mat blob = dnn::blobFromImageWithParams(img, param); // [1, 10, 10, 4]
|
||||||
|
|
||||||
|
float* blobPtr = blob.ptr<float>(0);
|
||||||
|
std::vector<float> targetVec = {(float )factorVec[0] * 0, (float )factorVec[1] * 1, (float )factorVec[2] * 2, (float )factorVec[3] * 3}; // Target Value.
|
||||||
|
for (int hi = 0; hi < 10; hi++)
|
||||||
|
{
|
||||||
|
for (int wi = 0; wi < 10; wi++)
|
||||||
|
{
|
||||||
|
float* hwPtr = blobPtr + hi * 10 * 4 + wi * 4;
|
||||||
|
|
||||||
|
// Check equal
|
||||||
|
EXPECT_NEAR(hwPtr[0], targetVec[0], 1e-5);
|
||||||
|
EXPECT_NEAR(hwPtr[1], targetVec[1], 1e-5);
|
||||||
|
EXPECT_NEAR(hwPtr[2], targetVec[2], 1e-5);
|
||||||
|
EXPECT_NEAR(hwPtr[3], targetVec[3], 1e-5);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(blobFromImageWithParams_4ch, letter_box)
|
||||||
|
{
|
||||||
|
Mat img(40, 20, CV_8UC4, cv::Scalar(0,1,2,3));
|
||||||
|
|
||||||
|
// Construct target mat.
|
||||||
|
Mat targetCh[4];
|
||||||
|
// The letterbox will add zero at the left and right of output blob.
|
||||||
|
// After the letterbox, every row data would have same value showing as valVec.
|
||||||
|
std::vector<uint8_t> valVec = {0,0,0,0,0, 1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0};
|
||||||
|
Mat rowM(1, 20, CV_8UC1, valVec.data());
|
||||||
|
|
||||||
|
for(int i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
|
targetCh[i] = rowM * i;
|
||||||
|
}
|
||||||
|
|
||||||
|
Mat targetImg;
|
||||||
|
merge(targetCh, 4, targetImg);
|
||||||
|
Size targeSize(20, 20);
|
||||||
|
|
||||||
|
Image2BlobParams param;
|
||||||
|
param.size = targeSize;
|
||||||
|
param.paddingmode = DNN_PMODE_LETTERBOX;
|
||||||
|
Mat blob = dnn::blobFromImageWithParams(img, param);
|
||||||
|
Mat targetBlob = dnn::blobFromImage(targetImg, 1.0, targeSize); // only convert data from uint8 to float32.
|
||||||
|
EXPECT_EQ(0, cvtest::norm(targetBlob, blob, NORM_INF));
|
||||||
|
}
|
||||||
|
|
||||||
TEST(readNet, Regression)
|
TEST(readNet, Regression)
|
||||||
{
|
{
|
||||||
Net net = readNet(findDataFile("dnn/squeezenet_v1.1.prototxt"),
|
Net net = readNet(findDataFile("dnn/squeezenet_v1.1.prototxt"),
|
||||||
|
Loading…
Reference in New Issue
Block a user