mirror of
https://github.com/opencv/opencv.git
synced 2025-01-13 08:08:10 +08:00
613c12e590
CUDA backend for the DNN module * stub cuda4dnn design * minor fixes for tests and doxygen * add csl public api directory to module headers * add low-level CSL components * add high-level CSL components * integrate csl::Tensor into backbone code * switch to CPU iff unsupported; otherwise, fail on error * add fully connected layer * add softmax layer * add activation layers * support arbitary rank TensorDescriptor * pass input wrappers to `initCUDA()` * add 1d/2d/3d-convolution * add pooling layer * reorganize and refactor code * fixes for gcc, clang and doxygen; remove cxx14/17 code * add blank_layer * add LRN layer * add rounding modes for pooling layer * split tensor.hpp into tensor.hpp and tensor_ops.hpp * add concat layer * add scale layer * add batch normalization layer * split math.cu into activations.cu and math.hpp * add eltwise layer * add flatten layer * add tensor transform api * add asymmetric padding support for convolution layer * add reshape layer * fix rebase issues * add permute layer * add padding support for concat layer * refactor and reorganize code * add normalize layer * optimize bias addition in scale layer * add prior box layer * fix and optimize normalize layer * add asymmetric padding support for pooling layer * add event API * improve pooling performance for some padding scenarios * avoid over-allocation of compute resources to kernels * improve prior box performance * enable layer fusion * add const layer * add resize layer * add slice layer * add padding layer * add deconvolution layer * fix channelwise ReLU initialization * add vector traits * add vectorized versions of relu, clipped_relu, power * add vectorized concat kernels * improve concat_with_offsets performance * vectorize scale and bias kernels * add support for multi-billion element tensors * vectorize prior box kernels * fix address alignment check * improve bias addition performance of conv/deconv/fc layers * restructure code for supporting multiple targets * add DNN_TARGET_CUDA_FP64 * add DNN_TARGET_FP16 * improve vectorization * add region layer * improve tensor API, add dynamic ranks 1. use ManagedPtr instead of a Tensor in backend wrapper 2. add new methods to tensor classes - size_range: computes the combined size of for a given axis range - tensor span/view can be constructed from a raw pointer and shape 3. the tensor classes can change their rank at runtime (previously rank was fixed at compile-time) 4. remove device code from tensor classes (as they are unused) 5. enforce strict conditions on tensor class APIs to improve debugging ability * fix parametric relu activation * add squeeze/unsqueeze tensor API * add reorg layer * optimize permute and enable 2d permute * enable 1d and 2d slice * add split layer * add shuffle channel layer * allow tensors of different ranks in reshape primitive * patch SliceOp to allow Crop Layer * allow extra shape inputs in reshape layer * use `std::move_backward` instead of `std::move` for insert in resizable_static_array * improve workspace management * add spatial LRN * add nms (cpu) to region layer * add max pooling with argmax ( and a fix to limits.hpp) * add max unpooling layer * rename DNN_TARGET_CUDA_FP32 to DNN_TARGET_CUDA * update supportBackend to be more rigorous * remove stray include from preventing non-cuda build * include op_cuda.hpp outside condition #if * refactoring, fixes and many optimizations * drop DNN_TARGET_CUDA_FP64 * fix gcc errors * increase max. tensor rank limit to six * add Interp layer * drop custom layers; use BackendNode * vectorize activation kernels * fixes for gcc * remove wrong assertion * fix broken assertion in unpooling primitive * fix build errors in non-CUDA build * completely remove workspace from public API * fix permute layer * enable accuracy and perf. tests for DNN_TARGET_CUDA * add asynchronous forward * vectorize eltwise ops * vectorize fill kernel * fixes for gcc * remove CSL headers from public API * remove csl header source group from cmake * update min. cudnn version in cmake * add numerically stable FP32 log1pexp * refactor code * add FP16 specialization to cudnn based tensor addition * vectorize scale1 and bias1 + minor refactoring * fix doxygen build * fix invalid alignment assertion * clear backend wrappers before allocateLayers * ignore memory lock failures * do not allocate internal blobs * integrate NVTX * add numerically stable half precision log1pexp * fix indentation, following coding style, improve docs * remove accidental modification of IE code * Revert "add asynchronous forward" This reverts commit 1154b9da9da07e9b52f8a81bdcea48cf31c56f70. * [cmake] throw error for unsupported CC versions * fix rebase issues * add more docs, refactor code, fix bugs * minor refactoring and fixes * resolve warnings/errors from clang * remove haveCUDA() checks from supportBackend() * remove NVTX integration * changes based on review comments * avoid exception when no CUDA device is present * add color code for CUDA in Net::dump
513 lines
16 KiB
C++
513 lines
16 KiB
C++
// This file is part of OpenCV project.
|
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
|
// of this distribution and at http://opencv.org/license.html.
|
|
//
|
|
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
|
// Third party copyrights are property of their respective owners.
|
|
|
|
#include "test_precomp.hpp"
|
|
#include <opencv2/core/ocl.hpp>
|
|
#include <opencv2/core/opencl/ocl_defs.hpp>
|
|
#include <opencv2/dnn/layer.details.hpp> // CV_DNN_REGISTER_LAYER_CLASS
|
|
|
|
namespace opencv_test { namespace {
|
|
|
|
TEST(blobFromImage_4ch, Regression)
|
|
{
|
|
Mat ch[4];
|
|
for(int i = 0; i < 4; i++)
|
|
ch[i] = Mat::ones(10, 10, CV_8U)*i;
|
|
|
|
Mat img;
|
|
merge(ch, 4, img);
|
|
Mat blob = dnn::blobFromImage(img, 1., Size(), Scalar(), false, false);
|
|
|
|
for(int i = 0; i < 4; i++)
|
|
{
|
|
ch[i] = Mat(img.rows, img.cols, CV_32F, blob.ptr(0, i));
|
|
ASSERT_DOUBLE_EQ(cvtest::norm(ch[i], cv::NORM_INF), i);
|
|
}
|
|
}
|
|
|
|
TEST(blobFromImage, allocated)
|
|
{
|
|
int size[] = {1, 3, 4, 5};
|
|
Mat img(size[2], size[3], CV_32FC(size[1]));
|
|
Mat blob(4, size, CV_32F);
|
|
void* blobData = blob.data;
|
|
dnn::blobFromImage(img, blob, 1.0 / 255, Size(), Scalar(), false, false);
|
|
ASSERT_EQ(blobData, blob.data);
|
|
}
|
|
|
|
TEST(imagesFromBlob, Regression)
|
|
{
|
|
int nbOfImages = 8;
|
|
|
|
std::vector<cv::Mat> inputImgs(nbOfImages);
|
|
for (int i = 0; i < nbOfImages; i++)
|
|
{
|
|
inputImgs[i] = cv::Mat::ones(100, 100, CV_32FC3);
|
|
cv::randu(inputImgs[i], cv::Scalar::all(0), cv::Scalar::all(1));
|
|
}
|
|
|
|
cv::Mat blob = cv::dnn::blobFromImages(inputImgs, 1., cv::Size(), cv::Scalar(), false, false);
|
|
std::vector<cv::Mat> outputImgs;
|
|
cv::dnn::imagesFromBlob(blob, outputImgs);
|
|
|
|
for (int i = 0; i < nbOfImages; i++)
|
|
{
|
|
ASSERT_EQ(cv::countNonZero(inputImgs[i] != outputImgs[i]), 0);
|
|
}
|
|
}
|
|
|
|
TEST(readNet, Regression)
|
|
{
|
|
Net net = readNet(findDataFile("dnn/squeezenet_v1.1.prototxt"),
|
|
findDataFile("dnn/squeezenet_v1.1.caffemodel", false));
|
|
EXPECT_FALSE(net.empty());
|
|
net = readNet(findDataFile("dnn/opencv_face_detector.caffemodel", false),
|
|
findDataFile("dnn/opencv_face_detector.prototxt"));
|
|
EXPECT_FALSE(net.empty());
|
|
net = readNet(findDataFile("dnn/openface_nn4.small2.v1.t7", false));
|
|
EXPECT_FALSE(net.empty());
|
|
net = readNet(findDataFile("dnn/tiny-yolo-voc.cfg"),
|
|
findDataFile("dnn/tiny-yolo-voc.weights", false));
|
|
EXPECT_FALSE(net.empty());
|
|
net = readNet(findDataFile("dnn/ssd_mobilenet_v1_coco.pbtxt"),
|
|
findDataFile("dnn/ssd_mobilenet_v1_coco.pb", false));
|
|
EXPECT_FALSE(net.empty());
|
|
}
|
|
|
|
typedef testing::TestWithParam<tuple<Backend, Target> > dump;
|
|
TEST_P(dump, Regression)
|
|
{
|
|
const int backend = get<0>(GetParam());
|
|
const int target = get<1>(GetParam());
|
|
Net net = readNet(findDataFile("dnn/squeezenet_v1.1.prototxt"),
|
|
findDataFile("dnn/squeezenet_v1.1.caffemodel", false));
|
|
|
|
int size[] = {1, 3, 227, 227};
|
|
Mat input = cv::Mat::ones(4, size, CV_32F);
|
|
net.setInput(input);
|
|
net.setPreferableBackend(backend);
|
|
net.setPreferableTarget(target);
|
|
EXPECT_FALSE(net.dump().empty());
|
|
net.forward();
|
|
EXPECT_FALSE(net.dump().empty());
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(/**/, dump, dnnBackendsAndTargets());
|
|
|
|
class FirstCustomLayer CV_FINAL : public Layer
|
|
{
|
|
public:
|
|
FirstCustomLayer(const LayerParams ¶ms) : Layer(params) {}
|
|
|
|
static Ptr<Layer> create(LayerParams& params)
|
|
{
|
|
return Ptr<Layer>(new FirstCustomLayer(params));
|
|
}
|
|
|
|
void forward(InputArrayOfArrays, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays) CV_OVERRIDE
|
|
{
|
|
CV_TRACE_FUNCTION();
|
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
|
|
|
std::vector<Mat> outputs;
|
|
outputs_arr.getMatVector(outputs);
|
|
outputs[0].setTo(1);
|
|
}
|
|
};
|
|
|
|
class SecondCustomLayer CV_FINAL : public Layer
|
|
{
|
|
public:
|
|
SecondCustomLayer(const LayerParams ¶ms) : Layer(params) {}
|
|
|
|
static Ptr<Layer> create(LayerParams& params)
|
|
{
|
|
return Ptr<Layer>(new SecondCustomLayer(params));
|
|
}
|
|
|
|
void forward(InputArrayOfArrays, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays) CV_OVERRIDE
|
|
{
|
|
CV_TRACE_FUNCTION();
|
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
|
|
|
std::vector<Mat> outputs;
|
|
outputs_arr.getMatVector(outputs);
|
|
outputs[0].setTo(2);
|
|
}
|
|
};
|
|
|
|
TEST(LayerFactory, custom_layers)
|
|
{
|
|
LayerParams lp;
|
|
lp.name = "name";
|
|
lp.type = "CustomType";
|
|
|
|
Mat inp(1, 1, CV_32FC1);
|
|
for (int i = 0; i < 3; ++i)
|
|
{
|
|
if (i == 0) { CV_DNN_REGISTER_LAYER_CLASS(CustomType, FirstCustomLayer); }
|
|
else if (i == 1) { CV_DNN_REGISTER_LAYER_CLASS(CustomType, SecondCustomLayer); }
|
|
else if (i == 2) { LayerFactory::unregisterLayer("CustomType"); }
|
|
|
|
Net net;
|
|
net.addLayerToPrev(lp.name, lp.type, lp);
|
|
|
|
net.setInput(inp);
|
|
net.setPreferableBackend(DNN_BACKEND_OPENCV);
|
|
Mat output = net.forward();
|
|
|
|
if (i == 0) { EXPECT_EQ(output.at<float>(0), 1); }
|
|
else if (i == 1) { EXPECT_EQ(output.at<float>(0), 2); }
|
|
else if (i == 2) { EXPECT_EQ(output.at<float>(0), 1); }
|
|
}
|
|
LayerFactory::unregisterLayer("CustomType");
|
|
}
|
|
|
|
typedef testing::TestWithParam<tuple<float, Vec3f, int, tuple<Backend, Target> > > setInput;
|
|
TEST_P(setInput, normalization)
|
|
{
|
|
const float kScale = get<0>(GetParam());
|
|
const Scalar kMean = get<1>(GetParam());
|
|
const int dtype = get<2>(GetParam());
|
|
const int backend = get<0>(get<3>(GetParam()));
|
|
const int target = get<1>(get<3>(GetParam()));
|
|
const bool kSwapRB = true;
|
|
|
|
if(backend == DNN_BACKEND_CUDA)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA);
|
|
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16 && dtype != CV_32F)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
|
if (backend == DNN_BACKEND_VKCOM && dtype != CV_32F)
|
|
throw SkipTestException(CV_TEST_TAG_DNN_SKIP_VULKAN);
|
|
|
|
Mat inp(5, 5, CV_8UC3);
|
|
randu(inp, 0, 255);
|
|
Mat ref = blobFromImage(inp, kScale, Size(), kMean, kSwapRB, /*crop*/false);
|
|
|
|
LayerParams lp;
|
|
Net net;
|
|
net.addLayerToPrev("testLayer", "Identity", lp);
|
|
net.setPreferableBackend(backend);
|
|
net.setPreferableTarget(target);
|
|
|
|
Mat blob = blobFromImage(inp, 1.0, Size(), Scalar(), kSwapRB, /*crop*/false, dtype);
|
|
ASSERT_EQ(blob.type(), dtype);
|
|
net.setInput(blob, "", kScale, kMean);
|
|
Mat out = net.forward();
|
|
ASSERT_EQ(out.type(), CV_32F);
|
|
normAssert(ref, out, "", 4e-4, 1e-3);
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(/**/, setInput, Combine(
|
|
Values(1.0f, 1.0 / 127.5),
|
|
Values(Vec3f(), Vec3f(50, 50, 50), Vec3f(10, 50, 140)),
|
|
Values(CV_32F, CV_8U),
|
|
dnnBackendsAndTargets()
|
|
));
|
|
|
|
class CustomLayerWithDeprecatedForward CV_FINAL : public Layer
|
|
{
|
|
public:
|
|
CustomLayerWithDeprecatedForward(const LayerParams ¶ms) : Layer(params) {}
|
|
|
|
static Ptr<Layer> create(LayerParams& params)
|
|
{
|
|
return Ptr<Layer>(new CustomLayerWithDeprecatedForward(params));
|
|
}
|
|
|
|
virtual void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) CV_OVERRIDE
|
|
{
|
|
CV_Assert_N(inputs[0]->depth() == CV_32F, outputs[0].depth() == CV_32F);
|
|
cv::add(*inputs[0], 0.5f, outputs[0]);
|
|
}
|
|
};
|
|
|
|
class CustomLayerWithDeprecatedForwardAndFallback CV_FINAL : public Layer
|
|
{
|
|
public:
|
|
CustomLayerWithDeprecatedForwardAndFallback(const LayerParams ¶ms) : Layer(params) {}
|
|
|
|
static Ptr<Layer> create(LayerParams& params)
|
|
{
|
|
return Ptr<Layer>(new CustomLayerWithDeprecatedForwardAndFallback(params));
|
|
}
|
|
|
|
void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) CV_OVERRIDE
|
|
{
|
|
CV_TRACE_FUNCTION();
|
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
|
|
|
CV_OCL_RUN(preferableTarget == DNN_TARGET_OPENCL || preferableTarget == DNN_TARGET_OPENCL_FP16,
|
|
forward_ocl(inputs, outputs, internals));
|
|
|
|
Layer::forward_fallback(inputs, outputs, internals);
|
|
}
|
|
|
|
virtual void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) CV_OVERRIDE
|
|
{
|
|
CV_Assert_N(inputs[0]->depth() == CV_32F, outputs[0].depth() == CV_32F);
|
|
cv::add(*inputs[0], 0.5f, outputs[0]);
|
|
}
|
|
|
|
#ifdef HAVE_OPENCL
|
|
bool forward_ocl(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
|
{
|
|
if (inputs_arr.depth() != CV_32F)
|
|
return false;
|
|
|
|
std::vector<UMat> inputs;
|
|
std::vector<UMat> outputs;
|
|
inputs_arr.getUMatVector(inputs);
|
|
outputs_arr.getUMatVector(outputs);
|
|
cv::add(inputs[0], 0.5f, outputs[0]);
|
|
return true;
|
|
}
|
|
#endif
|
|
};
|
|
|
|
typedef testing::TestWithParam<tuple<Backend, Target> > DeprecatedForward;
|
|
TEST_P(DeprecatedForward, CustomLayer)
|
|
{
|
|
const int backend = get<0>(GetParam());
|
|
const int target = get<1>(GetParam());
|
|
|
|
Mat inp(5, 5, CV_32FC1);
|
|
randu(inp, -1.0f, 1.0f);
|
|
inp = blobFromImage(inp);
|
|
|
|
CV_DNN_REGISTER_LAYER_CLASS(CustomType, CustomLayerWithDeprecatedForward);
|
|
try
|
|
{
|
|
LayerParams lp;
|
|
Net net;
|
|
net.addLayerToPrev("testLayer", "CustomType", lp);
|
|
net.setPreferableBackend(backend);
|
|
net.setPreferableTarget(target);
|
|
net.setInput(inp);
|
|
Mat out = net.forward();
|
|
normAssert(out, inp + 0.5f, "", 2e-4, 7e-4);
|
|
}
|
|
catch (...)
|
|
{
|
|
LayerFactory::unregisterLayer("CustomType");
|
|
throw;
|
|
}
|
|
LayerFactory::unregisterLayer("CustomType");
|
|
}
|
|
|
|
TEST_P(DeprecatedForward, CustomLayerWithFallback)
|
|
{
|
|
const int backend = get<0>(GetParam());
|
|
const int target = get<1>(GetParam());
|
|
|
|
Mat inp(5, 5, CV_32FC1);
|
|
randu(inp, -1.0f, 1.0f);
|
|
inp = blobFromImage(inp);
|
|
|
|
CV_DNN_REGISTER_LAYER_CLASS(CustomType, CustomLayerWithDeprecatedForwardAndFallback);
|
|
try
|
|
{
|
|
LayerParams lp;
|
|
Net net;
|
|
net.addLayerToPrev("testLayer", "CustomType", lp);
|
|
net.setPreferableBackend(backend);
|
|
net.setPreferableTarget(target);
|
|
net.setInput(inp);
|
|
Mat out = net.forward();
|
|
normAssert(out, inp + 0.5f, "", 2e-4, 7e-4);
|
|
}
|
|
catch (...)
|
|
{
|
|
LayerFactory::unregisterLayer("CustomType");
|
|
throw;
|
|
}
|
|
LayerFactory::unregisterLayer("CustomType");
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(/**/, DeprecatedForward, dnnBackendsAndTargets());
|
|
|
|
TEST(Net, forwardAndRetrieve)
|
|
{
|
|
std::string prototxt =
|
|
"input: \"data\"\n"
|
|
"layer {\n"
|
|
" name: \"testLayer\"\n"
|
|
" type: \"Slice\"\n"
|
|
" bottom: \"data\"\n"
|
|
" top: \"firstCopy\"\n"
|
|
" top: \"secondCopy\"\n"
|
|
" slice_param {\n"
|
|
" axis: 0\n"
|
|
" slice_point: 2\n"
|
|
" }\n"
|
|
"}";
|
|
Net net = readNetFromCaffe(&prototxt[0], prototxt.size());
|
|
net.setPreferableBackend(DNN_BACKEND_OPENCV);
|
|
|
|
Mat inp(4, 5, CV_32F);
|
|
randu(inp, -1, 1);
|
|
net.setInput(inp);
|
|
|
|
std::vector<String> outNames;
|
|
outNames.push_back("testLayer");
|
|
std::vector<std::vector<Mat> > outBlobs;
|
|
|
|
net.forward(outBlobs, outNames);
|
|
|
|
EXPECT_EQ(outBlobs.size(), 1);
|
|
EXPECT_EQ(outBlobs[0].size(), 2);
|
|
normAssert(outBlobs[0][0], inp.rowRange(0, 2), "first part");
|
|
normAssert(outBlobs[0][1], inp.rowRange(2, 4), "second part");
|
|
}
|
|
|
|
#ifdef HAVE_INF_ENGINE
|
|
static const std::chrono::milliseconds async_timeout(10000);
|
|
|
|
// This test runs network in synchronous mode for different inputs and then
|
|
// runs the same model asynchronously for the same inputs.
|
|
typedef testing::TestWithParam<tuple<int, Target> > Async;
|
|
TEST_P(Async, set_and_forward_single)
|
|
{
|
|
const int dtype = get<0>(GetParam());
|
|
const int target = get<1>(GetParam());
|
|
|
|
const std::string suffix = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? "_fp16" : "";
|
|
const std::string& model = findDataFile("dnn/layers/layer_convolution" + suffix + ".bin");
|
|
const std::string& proto = findDataFile("dnn/layers/layer_convolution" + suffix + ".xml");
|
|
|
|
Net netSync = readNet(model, proto);
|
|
netSync.setPreferableTarget(target);
|
|
|
|
Net netAsync = readNet(model, proto);
|
|
netAsync.setPreferableTarget(target);
|
|
|
|
// Generate inputs.
|
|
const int numInputs = 10;
|
|
std::vector<Mat> inputs(numInputs);
|
|
int blobSize[] = {2, 6, 75, 113};
|
|
for (int i = 0; i < numInputs; ++i)
|
|
{
|
|
inputs[i].create(4, &blobSize[0], dtype);
|
|
randu(inputs[i], 0, 255);
|
|
}
|
|
|
|
// Run synchronously.
|
|
std::vector<Mat> refs(numInputs);
|
|
for (int i = 0; i < numInputs; ++i)
|
|
{
|
|
netSync.setInput(inputs[i]);
|
|
refs[i] = netSync.forward().clone();
|
|
}
|
|
|
|
// Run asynchronously. To make test more robust, process inputs in the reversed order.
|
|
for (int i = numInputs - 1; i >= 0; --i)
|
|
{
|
|
netAsync.setInput(inputs[i]);
|
|
|
|
AsyncArray out = netAsync.forwardAsync();
|
|
ASSERT_TRUE(out.valid());
|
|
Mat result;
|
|
EXPECT_TRUE(out.get(result, async_timeout));
|
|
normAssert(refs[i], result, format("Index: %d", i).c_str(), 0, 0);
|
|
}
|
|
}
|
|
|
|
TEST_P(Async, set_and_forward_all)
|
|
{
|
|
const int dtype = get<0>(GetParam());
|
|
const int target = get<1>(GetParam());
|
|
|
|
const std::string suffix = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? "_fp16" : "";
|
|
const std::string& model = findDataFile("dnn/layers/layer_convolution" + suffix + ".bin");
|
|
const std::string& proto = findDataFile("dnn/layers/layer_convolution" + suffix + ".xml");
|
|
|
|
|
|
Net netSync = readNet(model, proto);
|
|
netSync.setPreferableTarget(target);
|
|
|
|
Net netAsync = readNet(model, proto);
|
|
netAsync.setPreferableTarget(target);
|
|
|
|
// Generate inputs.
|
|
const int numInputs = 10;
|
|
std::vector<Mat> inputs(numInputs);
|
|
int blobSize[] = {2, 6, 75, 113};
|
|
for (int i = 0; i < numInputs; ++i)
|
|
{
|
|
inputs[i].create(4, &blobSize[0], dtype);
|
|
randu(inputs[i], 0, 255);
|
|
}
|
|
|
|
// Run synchronously.
|
|
std::vector<Mat> refs(numInputs);
|
|
for (int i = 0; i < numInputs; ++i)
|
|
{
|
|
netSync.setInput(inputs[i]);
|
|
refs[i] = netSync.forward().clone();
|
|
}
|
|
|
|
// Run asynchronously. To make test more robust, process inputs in the reversed order.
|
|
std::vector<AsyncArray> outs(numInputs);
|
|
for (int i = numInputs - 1; i >= 0; --i)
|
|
{
|
|
netAsync.setInput(inputs[i]);
|
|
outs[i] = netAsync.forwardAsync();
|
|
}
|
|
|
|
for (int i = numInputs - 1; i >= 0; --i)
|
|
{
|
|
ASSERT_TRUE(outs[i].valid());
|
|
Mat result;
|
|
EXPECT_TRUE(outs[i].get(result, async_timeout));
|
|
normAssert(refs[i], result, format("Index: %d", i).c_str(), 0, 0);
|
|
}
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(/**/, Async, Combine(
|
|
Values(CV_32F, CV_8U),
|
|
testing::ValuesIn(getAvailableTargets(DNN_BACKEND_INFERENCE_ENGINE))
|
|
));
|
|
|
|
typedef testing::TestWithParam<Target> Test_Model_Optimizer;
|
|
TEST_P(Test_Model_Optimizer, forward_two_nets)
|
|
{
|
|
const int target = GetParam();
|
|
|
|
const std::string suffix = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? "_fp16" : "";
|
|
const std::string& model = findDataFile("dnn/layers/layer_convolution" + suffix + ".bin");
|
|
const std::string& proto = findDataFile("dnn/layers/layer_convolution" + suffix + ".xml");
|
|
|
|
Net net0 = readNet(model, proto);
|
|
net0.setPreferableTarget(target);
|
|
|
|
Net net1 = readNet(model, proto);
|
|
net1.setPreferableTarget(target);
|
|
|
|
// Generate inputs.
|
|
int blobSize[] = {2, 6, 75, 113};
|
|
Mat input(4, &blobSize[0], CV_32F);
|
|
randu(input, 0, 255);
|
|
|
|
net0.setInput(input);
|
|
Mat ref0 = net0.forward().clone();
|
|
|
|
net1.setInput(input);
|
|
Mat ref1 = net1.forward();
|
|
|
|
net0.setInput(input);
|
|
Mat ref2 = net0.forward();
|
|
|
|
normAssert(ref0, ref2, 0, 0);
|
|
}
|
|
INSTANTIATE_TEST_CASE_P(/**/, Test_Model_Optimizer,
|
|
testing::ValuesIn(getAvailableTargets(DNN_BACKEND_INFERENCE_ENGINE))
|
|
);
|
|
|
|
#endif // HAVE_INF_ENGINE
|
|
|
|
}} // namespace
|