2018-09-11 02:07:51 +08:00
|
|
|
// This file is part of OpenCV project.
|
|
|
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
|
|
|
// of this distribution and at http://opencv.org/license.html.
|
|
|
|
|
2019-03-29 21:42:58 +08:00
|
|
|
// Copyright (C) 2018-2019, Intel Corporation, all rights reserved.
|
2018-09-11 02:07:51 +08:00
|
|
|
// Third party copyrights are property of their respective owners.
|
|
|
|
|
|
|
|
|
|
|
|
#include "test_precomp.hpp"
|
|
|
|
#include "npy_blob.hpp"
|
|
|
|
#include <opencv2/dnn/shape_utils.hpp>
|
|
|
|
namespace opencv_test { namespace {
|
|
|
|
|
|
|
|
template<typename TString>
|
2019-06-20 21:43:28 +08:00
|
|
|
static std::string _tf(TString filename, bool required = true)
|
2018-09-11 02:07:51 +08:00
|
|
|
{
|
2019-06-20 21:43:28 +08:00
|
|
|
return findDataFile(std::string("dnn/onnx/") + filename, required);
|
2018-09-11 02:07:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
class Test_ONNX_layers : public DNNTestLayer
|
|
|
|
{
|
|
|
|
public:
|
2019-06-20 21:43:28 +08:00
|
|
|
bool required;
|
|
|
|
|
|
|
|
Test_ONNX_layers() : required(true) { }
|
|
|
|
|
2018-09-11 02:07:51 +08:00
|
|
|
enum Extension
|
|
|
|
{
|
|
|
|
npy,
|
|
|
|
pb
|
|
|
|
};
|
|
|
|
|
2019-03-29 21:42:58 +08:00
|
|
|
void testONNXModels(const String& basename, const Extension ext = npy,
|
2019-04-19 19:54:08 +08:00
|
|
|
const double l1 = 0, const float lInf = 0, const bool useSoftmax = false,
|
|
|
|
bool checkNoFallbacks = true)
|
2018-09-11 02:07:51 +08:00
|
|
|
{
|
2019-06-20 21:43:28 +08:00
|
|
|
String onnxmodel = _tf("models/" + basename + ".onnx", required);
|
2018-09-11 02:07:51 +08:00
|
|
|
Mat inp, ref;
|
|
|
|
if (ext == npy) {
|
|
|
|
inp = blobFromNPY(_tf("data/input_" + basename + ".npy"));
|
|
|
|
ref = blobFromNPY(_tf("data/output_" + basename + ".npy"));
|
|
|
|
}
|
|
|
|
else if (ext == pb) {
|
|
|
|
inp = readTensorFromONNX(_tf("data/input_" + basename + ".pb"));
|
|
|
|
ref = readTensorFromONNX(_tf("data/output_" + basename + ".pb"));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
CV_Error(Error::StsUnsupportedFormat, "Unsupported extension");
|
|
|
|
|
|
|
|
checkBackend(&inp, &ref);
|
|
|
|
Net net = readNetFromONNX(onnxmodel);
|
|
|
|
ASSERT_FALSE(net.empty());
|
|
|
|
|
|
|
|
net.setPreferableBackend(backend);
|
|
|
|
net.setPreferableTarget(target);
|
|
|
|
|
|
|
|
net.setInput(inp);
|
2019-03-29 21:42:58 +08:00
|
|
|
Mat out = net.forward("");
|
|
|
|
|
|
|
|
if (useSoftmax)
|
|
|
|
{
|
|
|
|
LayerParams lp;
|
|
|
|
Net netSoftmax;
|
|
|
|
netSoftmax.addLayerToPrev("softmaxLayer", "SoftMax", lp);
|
|
|
|
netSoftmax.setPreferableBackend(DNN_BACKEND_OPENCV);
|
|
|
|
|
|
|
|
netSoftmax.setInput(out);
|
|
|
|
out = netSoftmax.forward();
|
|
|
|
|
|
|
|
netSoftmax.setInput(ref);
|
|
|
|
ref = netSoftmax.forward();
|
|
|
|
}
|
2018-09-11 02:07:51 +08:00
|
|
|
normAssert(ref, out, "", l1 ? l1 : default_l1, lInf ? lInf : default_lInf);
|
2019-04-19 19:54:08 +08:00
|
|
|
if (checkNoFallbacks)
|
|
|
|
expectNoFallbacksFromIE(net);
|
2018-09-11 02:07:51 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-07-05 02:15:04 +08:00
|
|
|
TEST_P(Test_ONNX_layers, InstanceNorm)
|
|
|
|
{
|
Merge pull request #14827 from YashasSamaga:cuda4dnn-csl-low
CUDA backend for the DNN module
* stub cuda4dnn design
* minor fixes for tests and doxygen
* add csl public api directory to module headers
* add low-level CSL components
* add high-level CSL components
* integrate csl::Tensor into backbone code
* switch to CPU iff unsupported; otherwise, fail on error
* add fully connected layer
* add softmax layer
* add activation layers
* support arbitary rank TensorDescriptor
* pass input wrappers to `initCUDA()`
* add 1d/2d/3d-convolution
* add pooling layer
* reorganize and refactor code
* fixes for gcc, clang and doxygen; remove cxx14/17 code
* add blank_layer
* add LRN layer
* add rounding modes for pooling layer
* split tensor.hpp into tensor.hpp and tensor_ops.hpp
* add concat layer
* add scale layer
* add batch normalization layer
* split math.cu into activations.cu and math.hpp
* add eltwise layer
* add flatten layer
* add tensor transform api
* add asymmetric padding support for convolution layer
* add reshape layer
* fix rebase issues
* add permute layer
* add padding support for concat layer
* refactor and reorganize code
* add normalize layer
* optimize bias addition in scale layer
* add prior box layer
* fix and optimize normalize layer
* add asymmetric padding support for pooling layer
* add event API
* improve pooling performance for some padding scenarios
* avoid over-allocation of compute resources to kernels
* improve prior box performance
* enable layer fusion
* add const layer
* add resize layer
* add slice layer
* add padding layer
* add deconvolution layer
* fix channelwise ReLU initialization
* add vector traits
* add vectorized versions of relu, clipped_relu, power
* add vectorized concat kernels
* improve concat_with_offsets performance
* vectorize scale and bias kernels
* add support for multi-billion element tensors
* vectorize prior box kernels
* fix address alignment check
* improve bias addition performance of conv/deconv/fc layers
* restructure code for supporting multiple targets
* add DNN_TARGET_CUDA_FP64
* add DNN_TARGET_FP16
* improve vectorization
* add region layer
* improve tensor API, add dynamic ranks
1. use ManagedPtr instead of a Tensor in backend wrapper
2. add new methods to tensor classes
- size_range: computes the combined size of for a given axis range
- tensor span/view can be constructed from a raw pointer and shape
3. the tensor classes can change their rank at runtime (previously rank was fixed at compile-time)
4. remove device code from tensor classes (as they are unused)
5. enforce strict conditions on tensor class APIs to improve debugging ability
* fix parametric relu activation
* add squeeze/unsqueeze tensor API
* add reorg layer
* optimize permute and enable 2d permute
* enable 1d and 2d slice
* add split layer
* add shuffle channel layer
* allow tensors of different ranks in reshape primitive
* patch SliceOp to allow Crop Layer
* allow extra shape inputs in reshape layer
* use `std::move_backward` instead of `std::move` for insert in resizable_static_array
* improve workspace management
* add spatial LRN
* add nms (cpu) to region layer
* add max pooling with argmax ( and a fix to limits.hpp)
* add max unpooling layer
* rename DNN_TARGET_CUDA_FP32 to DNN_TARGET_CUDA
* update supportBackend to be more rigorous
* remove stray include from preventing non-cuda build
* include op_cuda.hpp outside condition #if
* refactoring, fixes and many optimizations
* drop DNN_TARGET_CUDA_FP64
* fix gcc errors
* increase max. tensor rank limit to six
* add Interp layer
* drop custom layers; use BackendNode
* vectorize activation kernels
* fixes for gcc
* remove wrong assertion
* fix broken assertion in unpooling primitive
* fix build errors in non-CUDA build
* completely remove workspace from public API
* fix permute layer
* enable accuracy and perf. tests for DNN_TARGET_CUDA
* add asynchronous forward
* vectorize eltwise ops
* vectorize fill kernel
* fixes for gcc
* remove CSL headers from public API
* remove csl header source group from cmake
* update min. cudnn version in cmake
* add numerically stable FP32 log1pexp
* refactor code
* add FP16 specialization to cudnn based tensor addition
* vectorize scale1 and bias1 + minor refactoring
* fix doxygen build
* fix invalid alignment assertion
* clear backend wrappers before allocateLayers
* ignore memory lock failures
* do not allocate internal blobs
* integrate NVTX
* add numerically stable half precision log1pexp
* fix indentation, following coding style, improve docs
* remove accidental modification of IE code
* Revert "add asynchronous forward"
This reverts commit 1154b9da9da07e9b52f8a81bdcea48cf31c56f70.
* [cmake] throw error for unsupported CC versions
* fix rebase issues
* add more docs, refactor code, fix bugs
* minor refactoring and fixes
* resolve warnings/errors from clang
* remove haveCUDA() checks from supportBackend()
* remove NVTX integration
* changes based on review comments
* avoid exception when no CUDA device is present
* add color code for CUDA in Net::dump
2019-10-21 19:28:00 +08:00
|
|
|
if(backend == DNN_BACKEND_CUDA)
|
|
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); /* MVN is not supported */
|
|
|
|
|
2019-07-05 02:15:04 +08:00
|
|
|
if (target == DNN_TARGET_MYRIAD)
|
|
|
|
testONNXModels("instancenorm", npy, 0, 0, false, false);
|
|
|
|
else
|
|
|
|
testONNXModels("instancenorm", npy);
|
|
|
|
}
|
|
|
|
|
2018-09-11 02:07:51 +08:00
|
|
|
TEST_P(Test_ONNX_layers, MaxPooling)
|
|
|
|
{
|
2019-10-02 19:05:41 +08:00
|
|
|
testONNXModels("maxpooling", npy, 0, 0, false, false);
|
|
|
|
testONNXModels("two_maxpooling", npy, 0, 0, false, false);
|
2018-09-11 02:07:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(Test_ONNX_layers, Convolution)
|
|
|
|
{
|
|
|
|
testONNXModels("convolution");
|
2019-03-29 21:42:58 +08:00
|
|
|
}
|
|
|
|
|
2019-04-30 22:08:17 +08:00
|
|
|
TEST_P(Test_ONNX_layers, Convolution3D)
|
|
|
|
{
|
2019-06-14 23:17:02 +08:00
|
|
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2019010000)
|
Merge pull request #14827 from YashasSamaga:cuda4dnn-csl-low
CUDA backend for the DNN module
* stub cuda4dnn design
* minor fixes for tests and doxygen
* add csl public api directory to module headers
* add low-level CSL components
* add high-level CSL components
* integrate csl::Tensor into backbone code
* switch to CPU iff unsupported; otherwise, fail on error
* add fully connected layer
* add softmax layer
* add activation layers
* support arbitary rank TensorDescriptor
* pass input wrappers to `initCUDA()`
* add 1d/2d/3d-convolution
* add pooling layer
* reorganize and refactor code
* fixes for gcc, clang and doxygen; remove cxx14/17 code
* add blank_layer
* add LRN layer
* add rounding modes for pooling layer
* split tensor.hpp into tensor.hpp and tensor_ops.hpp
* add concat layer
* add scale layer
* add batch normalization layer
* split math.cu into activations.cu and math.hpp
* add eltwise layer
* add flatten layer
* add tensor transform api
* add asymmetric padding support for convolution layer
* add reshape layer
* fix rebase issues
* add permute layer
* add padding support for concat layer
* refactor and reorganize code
* add normalize layer
* optimize bias addition in scale layer
* add prior box layer
* fix and optimize normalize layer
* add asymmetric padding support for pooling layer
* add event API
* improve pooling performance for some padding scenarios
* avoid over-allocation of compute resources to kernels
* improve prior box performance
* enable layer fusion
* add const layer
* add resize layer
* add slice layer
* add padding layer
* add deconvolution layer
* fix channelwise ReLU initialization
* add vector traits
* add vectorized versions of relu, clipped_relu, power
* add vectorized concat kernels
* improve concat_with_offsets performance
* vectorize scale and bias kernels
* add support for multi-billion element tensors
* vectorize prior box kernels
* fix address alignment check
* improve bias addition performance of conv/deconv/fc layers
* restructure code for supporting multiple targets
* add DNN_TARGET_CUDA_FP64
* add DNN_TARGET_FP16
* improve vectorization
* add region layer
* improve tensor API, add dynamic ranks
1. use ManagedPtr instead of a Tensor in backend wrapper
2. add new methods to tensor classes
- size_range: computes the combined size of for a given axis range
- tensor span/view can be constructed from a raw pointer and shape
3. the tensor classes can change their rank at runtime (previously rank was fixed at compile-time)
4. remove device code from tensor classes (as they are unused)
5. enforce strict conditions on tensor class APIs to improve debugging ability
* fix parametric relu activation
* add squeeze/unsqueeze tensor API
* add reorg layer
* optimize permute and enable 2d permute
* enable 1d and 2d slice
* add split layer
* add shuffle channel layer
* allow tensors of different ranks in reshape primitive
* patch SliceOp to allow Crop Layer
* allow extra shape inputs in reshape layer
* use `std::move_backward` instead of `std::move` for insert in resizable_static_array
* improve workspace management
* add spatial LRN
* add nms (cpu) to region layer
* add max pooling with argmax ( and a fix to limits.hpp)
* add max unpooling layer
* rename DNN_TARGET_CUDA_FP32 to DNN_TARGET_CUDA
* update supportBackend to be more rigorous
* remove stray include from preventing non-cuda build
* include op_cuda.hpp outside condition #if
* refactoring, fixes and many optimizations
* drop DNN_TARGET_CUDA_FP64
* fix gcc errors
* increase max. tensor rank limit to six
* add Interp layer
* drop custom layers; use BackendNode
* vectorize activation kernels
* fixes for gcc
* remove wrong assertion
* fix broken assertion in unpooling primitive
* fix build errors in non-CUDA build
* completely remove workspace from public API
* fix permute layer
* enable accuracy and perf. tests for DNN_TARGET_CUDA
* add asynchronous forward
* vectorize eltwise ops
* vectorize fill kernel
* fixes for gcc
* remove CSL headers from public API
* remove csl header source group from cmake
* update min. cudnn version in cmake
* add numerically stable FP32 log1pexp
* refactor code
* add FP16 specialization to cudnn based tensor addition
* vectorize scale1 and bias1 + minor refactoring
* fix doxygen build
* fix invalid alignment assertion
* clear backend wrappers before allocateLayers
* ignore memory lock failures
* do not allocate internal blobs
* integrate NVTX
* add numerically stable half precision log1pexp
* fix indentation, following coding style, improve docs
* remove accidental modification of IE code
* Revert "add asynchronous forward"
This reverts commit 1154b9da9da07e9b52f8a81bdcea48cf31c56f70.
* [cmake] throw error for unsupported CC versions
* fix rebase issues
* add more docs, refactor code, fix bugs
* minor refactoring and fixes
* resolve warnings/errors from clang
* remove haveCUDA() checks from supportBackend()
* remove NVTX integration
* changes based on review comments
* avoid exception when no CUDA device is present
* add color code for CUDA in Net::dump
2019-10-21 19:28:00 +08:00
|
|
|
if(backend == DNN_BACKEND_INFERENCE_ENGINE)
|
|
|
|
throw SkipTestException("Test is enabled starts from 2019R1");
|
2019-06-14 23:17:02 +08:00
|
|
|
#endif
|
Merge pull request #14827 from YashasSamaga:cuda4dnn-csl-low
CUDA backend for the DNN module
* stub cuda4dnn design
* minor fixes for tests and doxygen
* add csl public api directory to module headers
* add low-level CSL components
* add high-level CSL components
* integrate csl::Tensor into backbone code
* switch to CPU iff unsupported; otherwise, fail on error
* add fully connected layer
* add softmax layer
* add activation layers
* support arbitary rank TensorDescriptor
* pass input wrappers to `initCUDA()`
* add 1d/2d/3d-convolution
* add pooling layer
* reorganize and refactor code
* fixes for gcc, clang and doxygen; remove cxx14/17 code
* add blank_layer
* add LRN layer
* add rounding modes for pooling layer
* split tensor.hpp into tensor.hpp and tensor_ops.hpp
* add concat layer
* add scale layer
* add batch normalization layer
* split math.cu into activations.cu and math.hpp
* add eltwise layer
* add flatten layer
* add tensor transform api
* add asymmetric padding support for convolution layer
* add reshape layer
* fix rebase issues
* add permute layer
* add padding support for concat layer
* refactor and reorganize code
* add normalize layer
* optimize bias addition in scale layer
* add prior box layer
* fix and optimize normalize layer
* add asymmetric padding support for pooling layer
* add event API
* improve pooling performance for some padding scenarios
* avoid over-allocation of compute resources to kernels
* improve prior box performance
* enable layer fusion
* add const layer
* add resize layer
* add slice layer
* add padding layer
* add deconvolution layer
* fix channelwise ReLU initialization
* add vector traits
* add vectorized versions of relu, clipped_relu, power
* add vectorized concat kernels
* improve concat_with_offsets performance
* vectorize scale and bias kernels
* add support for multi-billion element tensors
* vectorize prior box kernels
* fix address alignment check
* improve bias addition performance of conv/deconv/fc layers
* restructure code for supporting multiple targets
* add DNN_TARGET_CUDA_FP64
* add DNN_TARGET_FP16
* improve vectorization
* add region layer
* improve tensor API, add dynamic ranks
1. use ManagedPtr instead of a Tensor in backend wrapper
2. add new methods to tensor classes
- size_range: computes the combined size of for a given axis range
- tensor span/view can be constructed from a raw pointer and shape
3. the tensor classes can change their rank at runtime (previously rank was fixed at compile-time)
4. remove device code from tensor classes (as they are unused)
5. enforce strict conditions on tensor class APIs to improve debugging ability
* fix parametric relu activation
* add squeeze/unsqueeze tensor API
* add reorg layer
* optimize permute and enable 2d permute
* enable 1d and 2d slice
* add split layer
* add shuffle channel layer
* allow tensors of different ranks in reshape primitive
* patch SliceOp to allow Crop Layer
* allow extra shape inputs in reshape layer
* use `std::move_backward` instead of `std::move` for insert in resizable_static_array
* improve workspace management
* add spatial LRN
* add nms (cpu) to region layer
* add max pooling with argmax ( and a fix to limits.hpp)
* add max unpooling layer
* rename DNN_TARGET_CUDA_FP32 to DNN_TARGET_CUDA
* update supportBackend to be more rigorous
* remove stray include from preventing non-cuda build
* include op_cuda.hpp outside condition #if
* refactoring, fixes and many optimizations
* drop DNN_TARGET_CUDA_FP64
* fix gcc errors
* increase max. tensor rank limit to six
* add Interp layer
* drop custom layers; use BackendNode
* vectorize activation kernels
* fixes for gcc
* remove wrong assertion
* fix broken assertion in unpooling primitive
* fix build errors in non-CUDA build
* completely remove workspace from public API
* fix permute layer
* enable accuracy and perf. tests for DNN_TARGET_CUDA
* add asynchronous forward
* vectorize eltwise ops
* vectorize fill kernel
* fixes for gcc
* remove CSL headers from public API
* remove csl header source group from cmake
* update min. cudnn version in cmake
* add numerically stable FP32 log1pexp
* refactor code
* add FP16 specialization to cudnn based tensor addition
* vectorize scale1 and bias1 + minor refactoring
* fix doxygen build
* fix invalid alignment assertion
* clear backend wrappers before allocateLayers
* ignore memory lock failures
* do not allocate internal blobs
* integrate NVTX
* add numerically stable half precision log1pexp
* fix indentation, following coding style, improve docs
* remove accidental modification of IE code
* Revert "add asynchronous forward"
This reverts commit 1154b9da9da07e9b52f8a81bdcea48cf31c56f70.
* [cmake] throw error for unsupported CC versions
* fix rebase issues
* add more docs, refactor code, fix bugs
* minor refactoring and fixes
* resolve warnings/errors from clang
* remove haveCUDA() checks from supportBackend()
* remove NVTX integration
* changes based on review comments
* avoid exception when no CUDA device is present
* add color code for CUDA in Net::dump
2019-10-21 19:28:00 +08:00
|
|
|
if (target != DNN_TARGET_CPU && backend != DNN_BACKEND_CUDA)
|
|
|
|
throw SkipTestException("Only CPU and CUDA is supported");
|
2019-04-30 22:08:17 +08:00
|
|
|
testONNXModels("conv3d");
|
|
|
|
testONNXModels("conv3d_bias");
|
|
|
|
}
|
2019-03-29 21:42:58 +08:00
|
|
|
|
|
|
|
TEST_P(Test_ONNX_layers, Two_convolution)
|
|
|
|
{
|
2019-06-14 23:17:02 +08:00
|
|
|
#if defined(INF_ENGINE_RELEASE)
|
2019-03-29 21:42:58 +08:00
|
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD
|
|
|
|
&& getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X
|
|
|
|
)
|
2019-06-15 20:17:25 +08:00
|
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
|
2019-03-29 21:42:58 +08:00
|
|
|
#endif
|
|
|
|
// Reference output values are in range [-0.855, 0.611]
|
2018-09-11 02:07:51 +08:00
|
|
|
testONNXModels("two_convolution");
|
|
|
|
}
|
|
|
|
|
2018-11-17 03:50:40 +08:00
|
|
|
TEST_P(Test_ONNX_layers, Deconvolution)
|
|
|
|
{
|
2019-04-19 19:54:08 +08:00
|
|
|
testONNXModels("deconvolution", npy, 0, 0, false, false);
|
|
|
|
testONNXModels("two_deconvolution", npy, 0, 0, false, false);
|
|
|
|
testONNXModels("deconvolution_group", npy, 0, 0, false, false);
|
|
|
|
testONNXModels("deconvolution_output_shape", npy, 0, 0, false, false);
|
|
|
|
testONNXModels("deconv_adjpad_2d", npy, 0, 0, false, false);
|
2018-11-17 03:50:40 +08:00
|
|
|
}
|
|
|
|
|
2019-07-12 20:51:44 +08:00
|
|
|
TEST_P(Test_ONNX_layers, Deconvolution3D)
|
|
|
|
{
|
|
|
|
#if defined(INF_ENGINE_RELEASE)
|
|
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_2018R5);
|
|
|
|
#endif
|
Merge pull request #14827 from YashasSamaga:cuda4dnn-csl-low
CUDA backend for the DNN module
* stub cuda4dnn design
* minor fixes for tests and doxygen
* add csl public api directory to module headers
* add low-level CSL components
* add high-level CSL components
* integrate csl::Tensor into backbone code
* switch to CPU iff unsupported; otherwise, fail on error
* add fully connected layer
* add softmax layer
* add activation layers
* support arbitary rank TensorDescriptor
* pass input wrappers to `initCUDA()`
* add 1d/2d/3d-convolution
* add pooling layer
* reorganize and refactor code
* fixes for gcc, clang and doxygen; remove cxx14/17 code
* add blank_layer
* add LRN layer
* add rounding modes for pooling layer
* split tensor.hpp into tensor.hpp and tensor_ops.hpp
* add concat layer
* add scale layer
* add batch normalization layer
* split math.cu into activations.cu and math.hpp
* add eltwise layer
* add flatten layer
* add tensor transform api
* add asymmetric padding support for convolution layer
* add reshape layer
* fix rebase issues
* add permute layer
* add padding support for concat layer
* refactor and reorganize code
* add normalize layer
* optimize bias addition in scale layer
* add prior box layer
* fix and optimize normalize layer
* add asymmetric padding support for pooling layer
* add event API
* improve pooling performance for some padding scenarios
* avoid over-allocation of compute resources to kernels
* improve prior box performance
* enable layer fusion
* add const layer
* add resize layer
* add slice layer
* add padding layer
* add deconvolution layer
* fix channelwise ReLU initialization
* add vector traits
* add vectorized versions of relu, clipped_relu, power
* add vectorized concat kernels
* improve concat_with_offsets performance
* vectorize scale and bias kernels
* add support for multi-billion element tensors
* vectorize prior box kernels
* fix address alignment check
* improve bias addition performance of conv/deconv/fc layers
* restructure code for supporting multiple targets
* add DNN_TARGET_CUDA_FP64
* add DNN_TARGET_FP16
* improve vectorization
* add region layer
* improve tensor API, add dynamic ranks
1. use ManagedPtr instead of a Tensor in backend wrapper
2. add new methods to tensor classes
- size_range: computes the combined size of for a given axis range
- tensor span/view can be constructed from a raw pointer and shape
3. the tensor classes can change their rank at runtime (previously rank was fixed at compile-time)
4. remove device code from tensor classes (as they are unused)
5. enforce strict conditions on tensor class APIs to improve debugging ability
* fix parametric relu activation
* add squeeze/unsqueeze tensor API
* add reorg layer
* optimize permute and enable 2d permute
* enable 1d and 2d slice
* add split layer
* add shuffle channel layer
* allow tensors of different ranks in reshape primitive
* patch SliceOp to allow Crop Layer
* allow extra shape inputs in reshape layer
* use `std::move_backward` instead of `std::move` for insert in resizable_static_array
* improve workspace management
* add spatial LRN
* add nms (cpu) to region layer
* add max pooling with argmax ( and a fix to limits.hpp)
* add max unpooling layer
* rename DNN_TARGET_CUDA_FP32 to DNN_TARGET_CUDA
* update supportBackend to be more rigorous
* remove stray include from preventing non-cuda build
* include op_cuda.hpp outside condition #if
* refactoring, fixes and many optimizations
* drop DNN_TARGET_CUDA_FP64
* fix gcc errors
* increase max. tensor rank limit to six
* add Interp layer
* drop custom layers; use BackendNode
* vectorize activation kernels
* fixes for gcc
* remove wrong assertion
* fix broken assertion in unpooling primitive
* fix build errors in non-CUDA build
* completely remove workspace from public API
* fix permute layer
* enable accuracy and perf. tests for DNN_TARGET_CUDA
* add asynchronous forward
* vectorize eltwise ops
* vectorize fill kernel
* fixes for gcc
* remove CSL headers from public API
* remove csl header source group from cmake
* update min. cudnn version in cmake
* add numerically stable FP32 log1pexp
* refactor code
* add FP16 specialization to cudnn based tensor addition
* vectorize scale1 and bias1 + minor refactoring
* fix doxygen build
* fix invalid alignment assertion
* clear backend wrappers before allocateLayers
* ignore memory lock failures
* do not allocate internal blobs
* integrate NVTX
* add numerically stable half precision log1pexp
* fix indentation, following coding style, improve docs
* remove accidental modification of IE code
* Revert "add asynchronous forward"
This reverts commit 1154b9da9da07e9b52f8a81bdcea48cf31c56f70.
* [cmake] throw error for unsupported CC versions
* fix rebase issues
* add more docs, refactor code, fix bugs
* minor refactoring and fixes
* resolve warnings/errors from clang
* remove haveCUDA() checks from supportBackend()
* remove NVTX integration
* changes based on review comments
* avoid exception when no CUDA device is present
* add color code for CUDA in Net::dump
2019-10-21 19:28:00 +08:00
|
|
|
if ((backend != DNN_BACKEND_INFERENCE_ENGINE || target != DNN_TARGET_CPU) && backend != DNN_BACKEND_CUDA)
|
|
|
|
throw SkipTestException("Only DLIE backend on CPU, and CUDA is supported");
|
2019-07-12 20:51:44 +08:00
|
|
|
testONNXModels("deconv3d");
|
|
|
|
testONNXModels("deconv3d_bias");
|
|
|
|
testONNXModels("deconv3d_pad");
|
|
|
|
testONNXModels("deconv3d_adjpad");
|
|
|
|
}
|
|
|
|
|
2018-09-11 02:07:51 +08:00
|
|
|
TEST_P(Test_ONNX_layers, Dropout)
|
|
|
|
{
|
|
|
|
testONNXModels("dropout");
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(Test_ONNX_layers, Linear)
|
|
|
|
{
|
|
|
|
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|
2019-06-15 20:17:25 +08:00
|
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
2018-09-11 02:07:51 +08:00
|
|
|
testONNXModels("linear");
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(Test_ONNX_layers, ReLU)
|
|
|
|
{
|
|
|
|
testONNXModels("ReLU");
|
|
|
|
}
|
|
|
|
|
2019-07-04 13:56:00 +08:00
|
|
|
TEST_P(Test_ONNX_layers, Clip)
|
|
|
|
{
|
|
|
|
testONNXModels("clip", npy);
|
|
|
|
}
|
|
|
|
|
2019-07-20 00:18:34 +08:00
|
|
|
TEST_P(Test_ONNX_layers, ReduceMean)
|
|
|
|
{
|
|
|
|
testONNXModels("reduce_mean");
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(Test_ONNX_layers, ReduceMean3D)
|
|
|
|
{
|
Merge pull request #14827 from YashasSamaga:cuda4dnn-csl-low
CUDA backend for the DNN module
* stub cuda4dnn design
* minor fixes for tests and doxygen
* add csl public api directory to module headers
* add low-level CSL components
* add high-level CSL components
* integrate csl::Tensor into backbone code
* switch to CPU iff unsupported; otherwise, fail on error
* add fully connected layer
* add softmax layer
* add activation layers
* support arbitary rank TensorDescriptor
* pass input wrappers to `initCUDA()`
* add 1d/2d/3d-convolution
* add pooling layer
* reorganize and refactor code
* fixes for gcc, clang and doxygen; remove cxx14/17 code
* add blank_layer
* add LRN layer
* add rounding modes for pooling layer
* split tensor.hpp into tensor.hpp and tensor_ops.hpp
* add concat layer
* add scale layer
* add batch normalization layer
* split math.cu into activations.cu and math.hpp
* add eltwise layer
* add flatten layer
* add tensor transform api
* add asymmetric padding support for convolution layer
* add reshape layer
* fix rebase issues
* add permute layer
* add padding support for concat layer
* refactor and reorganize code
* add normalize layer
* optimize bias addition in scale layer
* add prior box layer
* fix and optimize normalize layer
* add asymmetric padding support for pooling layer
* add event API
* improve pooling performance for some padding scenarios
* avoid over-allocation of compute resources to kernels
* improve prior box performance
* enable layer fusion
* add const layer
* add resize layer
* add slice layer
* add padding layer
* add deconvolution layer
* fix channelwise ReLU initialization
* add vector traits
* add vectorized versions of relu, clipped_relu, power
* add vectorized concat kernels
* improve concat_with_offsets performance
* vectorize scale and bias kernels
* add support for multi-billion element tensors
* vectorize prior box kernels
* fix address alignment check
* improve bias addition performance of conv/deconv/fc layers
* restructure code for supporting multiple targets
* add DNN_TARGET_CUDA_FP64
* add DNN_TARGET_FP16
* improve vectorization
* add region layer
* improve tensor API, add dynamic ranks
1. use ManagedPtr instead of a Tensor in backend wrapper
2. add new methods to tensor classes
- size_range: computes the combined size of for a given axis range
- tensor span/view can be constructed from a raw pointer and shape
3. the tensor classes can change their rank at runtime (previously rank was fixed at compile-time)
4. remove device code from tensor classes (as they are unused)
5. enforce strict conditions on tensor class APIs to improve debugging ability
* fix parametric relu activation
* add squeeze/unsqueeze tensor API
* add reorg layer
* optimize permute and enable 2d permute
* enable 1d and 2d slice
* add split layer
* add shuffle channel layer
* allow tensors of different ranks in reshape primitive
* patch SliceOp to allow Crop Layer
* allow extra shape inputs in reshape layer
* use `std::move_backward` instead of `std::move` for insert in resizable_static_array
* improve workspace management
* add spatial LRN
* add nms (cpu) to region layer
* add max pooling with argmax ( and a fix to limits.hpp)
* add max unpooling layer
* rename DNN_TARGET_CUDA_FP32 to DNN_TARGET_CUDA
* update supportBackend to be more rigorous
* remove stray include from preventing non-cuda build
* include op_cuda.hpp outside condition #if
* refactoring, fixes and many optimizations
* drop DNN_TARGET_CUDA_FP64
* fix gcc errors
* increase max. tensor rank limit to six
* add Interp layer
* drop custom layers; use BackendNode
* vectorize activation kernels
* fixes for gcc
* remove wrong assertion
* fix broken assertion in unpooling primitive
* fix build errors in non-CUDA build
* completely remove workspace from public API
* fix permute layer
* enable accuracy and perf. tests for DNN_TARGET_CUDA
* add asynchronous forward
* vectorize eltwise ops
* vectorize fill kernel
* fixes for gcc
* remove CSL headers from public API
* remove csl header source group from cmake
* update min. cudnn version in cmake
* add numerically stable FP32 log1pexp
* refactor code
* add FP16 specialization to cudnn based tensor addition
* vectorize scale1 and bias1 + minor refactoring
* fix doxygen build
* fix invalid alignment assertion
* clear backend wrappers before allocateLayers
* ignore memory lock failures
* do not allocate internal blobs
* integrate NVTX
* add numerically stable half precision log1pexp
* fix indentation, following coding style, improve docs
* remove accidental modification of IE code
* Revert "add asynchronous forward"
This reverts commit 1154b9da9da07e9b52f8a81bdcea48cf31c56f70.
* [cmake] throw error for unsupported CC versions
* fix rebase issues
* add more docs, refactor code, fix bugs
* minor refactoring and fixes
* resolve warnings/errors from clang
* remove haveCUDA() checks from supportBackend()
* remove NVTX integration
* changes based on review comments
* avoid exception when no CUDA device is present
* add color code for CUDA in Net::dump
2019-10-21 19:28:00 +08:00
|
|
|
if (target != DNN_TARGET_CPU && backend != DNN_BACKEND_CUDA)
|
|
|
|
throw SkipTestException("Only CPU and CUDA is supported");
|
2019-07-20 00:18:34 +08:00
|
|
|
testONNXModels("reduce_mean3d");
|
|
|
|
}
|
|
|
|
|
2018-09-11 02:07:51 +08:00
|
|
|
TEST_P(Test_ONNX_layers, MaxPooling_Sigmoid)
|
|
|
|
{
|
|
|
|
testONNXModels("maxpooling_sigmoid");
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(Test_ONNX_layers, Concatenation)
|
|
|
|
{
|
2019-06-15 20:17:25 +08:00
|
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE)
|
|
|
|
{
|
|
|
|
if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
|
|
|
|
if (target == DNN_TARGET_OPENCL) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL);
|
|
|
|
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
|
|
|
|
}
|
2018-09-11 02:07:51 +08:00
|
|
|
testONNXModels("concatenation");
|
|
|
|
}
|
|
|
|
|
2019-06-22 15:13:28 +08:00
|
|
|
TEST_P(Test_ONNX_layers, Eltwise3D)
|
|
|
|
{
|
|
|
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2019010000)
|
|
|
|
throw SkipTestException("Test is enabled starts from 2019R1");
|
|
|
|
#endif
|
|
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
|
|
|
|
throw SkipTestException("Only CPU on DLIE backend is supported");
|
|
|
|
testONNXModels("eltwise3d");
|
|
|
|
}
|
|
|
|
|
2018-09-11 02:07:51 +08:00
|
|
|
TEST_P(Test_ONNX_layers, AveragePooling)
|
|
|
|
{
|
|
|
|
testONNXModels("average_pooling");
|
|
|
|
}
|
|
|
|
|
2019-04-30 22:08:17 +08:00
|
|
|
TEST_P(Test_ONNX_layers, MaxPooling3D)
|
|
|
|
{
|
2019-06-14 23:17:02 +08:00
|
|
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2019010000)
|
|
|
|
throw SkipTestException("Test is enabled starts from 2019R1");
|
|
|
|
#endif
|
Merge pull request #14827 from YashasSamaga:cuda4dnn-csl-low
CUDA backend for the DNN module
* stub cuda4dnn design
* minor fixes for tests and doxygen
* add csl public api directory to module headers
* add low-level CSL components
* add high-level CSL components
* integrate csl::Tensor into backbone code
* switch to CPU iff unsupported; otherwise, fail on error
* add fully connected layer
* add softmax layer
* add activation layers
* support arbitary rank TensorDescriptor
* pass input wrappers to `initCUDA()`
* add 1d/2d/3d-convolution
* add pooling layer
* reorganize and refactor code
* fixes for gcc, clang and doxygen; remove cxx14/17 code
* add blank_layer
* add LRN layer
* add rounding modes for pooling layer
* split tensor.hpp into tensor.hpp and tensor_ops.hpp
* add concat layer
* add scale layer
* add batch normalization layer
* split math.cu into activations.cu and math.hpp
* add eltwise layer
* add flatten layer
* add tensor transform api
* add asymmetric padding support for convolution layer
* add reshape layer
* fix rebase issues
* add permute layer
* add padding support for concat layer
* refactor and reorganize code
* add normalize layer
* optimize bias addition in scale layer
* add prior box layer
* fix and optimize normalize layer
* add asymmetric padding support for pooling layer
* add event API
* improve pooling performance for some padding scenarios
* avoid over-allocation of compute resources to kernels
* improve prior box performance
* enable layer fusion
* add const layer
* add resize layer
* add slice layer
* add padding layer
* add deconvolution layer
* fix channelwise ReLU initialization
* add vector traits
* add vectorized versions of relu, clipped_relu, power
* add vectorized concat kernels
* improve concat_with_offsets performance
* vectorize scale and bias kernels
* add support for multi-billion element tensors
* vectorize prior box kernels
* fix address alignment check
* improve bias addition performance of conv/deconv/fc layers
* restructure code for supporting multiple targets
* add DNN_TARGET_CUDA_FP64
* add DNN_TARGET_FP16
* improve vectorization
* add region layer
* improve tensor API, add dynamic ranks
1. use ManagedPtr instead of a Tensor in backend wrapper
2. add new methods to tensor classes
- size_range: computes the combined size of for a given axis range
- tensor span/view can be constructed from a raw pointer and shape
3. the tensor classes can change their rank at runtime (previously rank was fixed at compile-time)
4. remove device code from tensor classes (as they are unused)
5. enforce strict conditions on tensor class APIs to improve debugging ability
* fix parametric relu activation
* add squeeze/unsqueeze tensor API
* add reorg layer
* optimize permute and enable 2d permute
* enable 1d and 2d slice
* add split layer
* add shuffle channel layer
* allow tensors of different ranks in reshape primitive
* patch SliceOp to allow Crop Layer
* allow extra shape inputs in reshape layer
* use `std::move_backward` instead of `std::move` for insert in resizable_static_array
* improve workspace management
* add spatial LRN
* add nms (cpu) to region layer
* add max pooling with argmax ( and a fix to limits.hpp)
* add max unpooling layer
* rename DNN_TARGET_CUDA_FP32 to DNN_TARGET_CUDA
* update supportBackend to be more rigorous
* remove stray include from preventing non-cuda build
* include op_cuda.hpp outside condition #if
* refactoring, fixes and many optimizations
* drop DNN_TARGET_CUDA_FP64
* fix gcc errors
* increase max. tensor rank limit to six
* add Interp layer
* drop custom layers; use BackendNode
* vectorize activation kernels
* fixes for gcc
* remove wrong assertion
* fix broken assertion in unpooling primitive
* fix build errors in non-CUDA build
* completely remove workspace from public API
* fix permute layer
* enable accuracy and perf. tests for DNN_TARGET_CUDA
* add asynchronous forward
* vectorize eltwise ops
* vectorize fill kernel
* fixes for gcc
* remove CSL headers from public API
* remove csl header source group from cmake
* update min. cudnn version in cmake
* add numerically stable FP32 log1pexp
* refactor code
* add FP16 specialization to cudnn based tensor addition
* vectorize scale1 and bias1 + minor refactoring
* fix doxygen build
* fix invalid alignment assertion
* clear backend wrappers before allocateLayers
* ignore memory lock failures
* do not allocate internal blobs
* integrate NVTX
* add numerically stable half precision log1pexp
* fix indentation, following coding style, improve docs
* remove accidental modification of IE code
* Revert "add asynchronous forward"
This reverts commit 1154b9da9da07e9b52f8a81bdcea48cf31c56f70.
* [cmake] throw error for unsupported CC versions
* fix rebase issues
* add more docs, refactor code, fix bugs
* minor refactoring and fixes
* resolve warnings/errors from clang
* remove haveCUDA() checks from supportBackend()
* remove NVTX integration
* changes based on review comments
* avoid exception when no CUDA device is present
* add color code for CUDA in Net::dump
2019-10-21 19:28:00 +08:00
|
|
|
if (target != DNN_TARGET_CPU && backend != DNN_BACKEND_CUDA)
|
|
|
|
throw SkipTestException("Only CPU and CUDA is supported");
|
2019-10-02 19:05:41 +08:00
|
|
|
testONNXModels("max_pool3d", npy, 0, 0, false, false);
|
2019-04-30 22:08:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(Test_ONNX_layers, AvePooling3D)
|
|
|
|
{
|
2019-06-14 23:17:02 +08:00
|
|
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2019010000)
|
|
|
|
throw SkipTestException("Test is enabled starts from 2019R1");
|
|
|
|
#endif
|
Merge pull request #14827 from YashasSamaga:cuda4dnn-csl-low
CUDA backend for the DNN module
* stub cuda4dnn design
* minor fixes for tests and doxygen
* add csl public api directory to module headers
* add low-level CSL components
* add high-level CSL components
* integrate csl::Tensor into backbone code
* switch to CPU iff unsupported; otherwise, fail on error
* add fully connected layer
* add softmax layer
* add activation layers
* support arbitary rank TensorDescriptor
* pass input wrappers to `initCUDA()`
* add 1d/2d/3d-convolution
* add pooling layer
* reorganize and refactor code
* fixes for gcc, clang and doxygen; remove cxx14/17 code
* add blank_layer
* add LRN layer
* add rounding modes for pooling layer
* split tensor.hpp into tensor.hpp and tensor_ops.hpp
* add concat layer
* add scale layer
* add batch normalization layer
* split math.cu into activations.cu and math.hpp
* add eltwise layer
* add flatten layer
* add tensor transform api
* add asymmetric padding support for convolution layer
* add reshape layer
* fix rebase issues
* add permute layer
* add padding support for concat layer
* refactor and reorganize code
* add normalize layer
* optimize bias addition in scale layer
* add prior box layer
* fix and optimize normalize layer
* add asymmetric padding support for pooling layer
* add event API
* improve pooling performance for some padding scenarios
* avoid over-allocation of compute resources to kernels
* improve prior box performance
* enable layer fusion
* add const layer
* add resize layer
* add slice layer
* add padding layer
* add deconvolution layer
* fix channelwise ReLU initialization
* add vector traits
* add vectorized versions of relu, clipped_relu, power
* add vectorized concat kernels
* improve concat_with_offsets performance
* vectorize scale and bias kernels
* add support for multi-billion element tensors
* vectorize prior box kernels
* fix address alignment check
* improve bias addition performance of conv/deconv/fc layers
* restructure code for supporting multiple targets
* add DNN_TARGET_CUDA_FP64
* add DNN_TARGET_FP16
* improve vectorization
* add region layer
* improve tensor API, add dynamic ranks
1. use ManagedPtr instead of a Tensor in backend wrapper
2. add new methods to tensor classes
- size_range: computes the combined size of for a given axis range
- tensor span/view can be constructed from a raw pointer and shape
3. the tensor classes can change their rank at runtime (previously rank was fixed at compile-time)
4. remove device code from tensor classes (as they are unused)
5. enforce strict conditions on tensor class APIs to improve debugging ability
* fix parametric relu activation
* add squeeze/unsqueeze tensor API
* add reorg layer
* optimize permute and enable 2d permute
* enable 1d and 2d slice
* add split layer
* add shuffle channel layer
* allow tensors of different ranks in reshape primitive
* patch SliceOp to allow Crop Layer
* allow extra shape inputs in reshape layer
* use `std::move_backward` instead of `std::move` for insert in resizable_static_array
* improve workspace management
* add spatial LRN
* add nms (cpu) to region layer
* add max pooling with argmax ( and a fix to limits.hpp)
* add max unpooling layer
* rename DNN_TARGET_CUDA_FP32 to DNN_TARGET_CUDA
* update supportBackend to be more rigorous
* remove stray include from preventing non-cuda build
* include op_cuda.hpp outside condition #if
* refactoring, fixes and many optimizations
* drop DNN_TARGET_CUDA_FP64
* fix gcc errors
* increase max. tensor rank limit to six
* add Interp layer
* drop custom layers; use BackendNode
* vectorize activation kernels
* fixes for gcc
* remove wrong assertion
* fix broken assertion in unpooling primitive
* fix build errors in non-CUDA build
* completely remove workspace from public API
* fix permute layer
* enable accuracy and perf. tests for DNN_TARGET_CUDA
* add asynchronous forward
* vectorize eltwise ops
* vectorize fill kernel
* fixes for gcc
* remove CSL headers from public API
* remove csl header source group from cmake
* update min. cudnn version in cmake
* add numerically stable FP32 log1pexp
* refactor code
* add FP16 specialization to cudnn based tensor addition
* vectorize scale1 and bias1 + minor refactoring
* fix doxygen build
* fix invalid alignment assertion
* clear backend wrappers before allocateLayers
* ignore memory lock failures
* do not allocate internal blobs
* integrate NVTX
* add numerically stable half precision log1pexp
* fix indentation, following coding style, improve docs
* remove accidental modification of IE code
* Revert "add asynchronous forward"
This reverts commit 1154b9da9da07e9b52f8a81bdcea48cf31c56f70.
* [cmake] throw error for unsupported CC versions
* fix rebase issues
* add more docs, refactor code, fix bugs
* minor refactoring and fixes
* resolve warnings/errors from clang
* remove haveCUDA() checks from supportBackend()
* remove NVTX integration
* changes based on review comments
* avoid exception when no CUDA device is present
* add color code for CUDA in Net::dump
2019-10-21 19:28:00 +08:00
|
|
|
if (target != DNN_TARGET_CPU && backend != DNN_BACKEND_CUDA)
|
|
|
|
throw SkipTestException("Only CPU and CUDA is supported");
|
2019-04-30 22:08:17 +08:00
|
|
|
testONNXModels("ave_pool3d");
|
|
|
|
}
|
|
|
|
|
2019-07-12 01:13:52 +08:00
|
|
|
TEST_P(Test_ONNX_layers, PoolConv3D)
|
|
|
|
{
|
|
|
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2019010000)
|
|
|
|
throw SkipTestException("Test is enabled starts from 2019R1");
|
|
|
|
#endif
|
Merge pull request #14827 from YashasSamaga:cuda4dnn-csl-low
CUDA backend for the DNN module
* stub cuda4dnn design
* minor fixes for tests and doxygen
* add csl public api directory to module headers
* add low-level CSL components
* add high-level CSL components
* integrate csl::Tensor into backbone code
* switch to CPU iff unsupported; otherwise, fail on error
* add fully connected layer
* add softmax layer
* add activation layers
* support arbitary rank TensorDescriptor
* pass input wrappers to `initCUDA()`
* add 1d/2d/3d-convolution
* add pooling layer
* reorganize and refactor code
* fixes for gcc, clang and doxygen; remove cxx14/17 code
* add blank_layer
* add LRN layer
* add rounding modes for pooling layer
* split tensor.hpp into tensor.hpp and tensor_ops.hpp
* add concat layer
* add scale layer
* add batch normalization layer
* split math.cu into activations.cu and math.hpp
* add eltwise layer
* add flatten layer
* add tensor transform api
* add asymmetric padding support for convolution layer
* add reshape layer
* fix rebase issues
* add permute layer
* add padding support for concat layer
* refactor and reorganize code
* add normalize layer
* optimize bias addition in scale layer
* add prior box layer
* fix and optimize normalize layer
* add asymmetric padding support for pooling layer
* add event API
* improve pooling performance for some padding scenarios
* avoid over-allocation of compute resources to kernels
* improve prior box performance
* enable layer fusion
* add const layer
* add resize layer
* add slice layer
* add padding layer
* add deconvolution layer
* fix channelwise ReLU initialization
* add vector traits
* add vectorized versions of relu, clipped_relu, power
* add vectorized concat kernels
* improve concat_with_offsets performance
* vectorize scale and bias kernels
* add support for multi-billion element tensors
* vectorize prior box kernels
* fix address alignment check
* improve bias addition performance of conv/deconv/fc layers
* restructure code for supporting multiple targets
* add DNN_TARGET_CUDA_FP64
* add DNN_TARGET_FP16
* improve vectorization
* add region layer
* improve tensor API, add dynamic ranks
1. use ManagedPtr instead of a Tensor in backend wrapper
2. add new methods to tensor classes
- size_range: computes the combined size of for a given axis range
- tensor span/view can be constructed from a raw pointer and shape
3. the tensor classes can change their rank at runtime (previously rank was fixed at compile-time)
4. remove device code from tensor classes (as they are unused)
5. enforce strict conditions on tensor class APIs to improve debugging ability
* fix parametric relu activation
* add squeeze/unsqueeze tensor API
* add reorg layer
* optimize permute and enable 2d permute
* enable 1d and 2d slice
* add split layer
* add shuffle channel layer
* allow tensors of different ranks in reshape primitive
* patch SliceOp to allow Crop Layer
* allow extra shape inputs in reshape layer
* use `std::move_backward` instead of `std::move` for insert in resizable_static_array
* improve workspace management
* add spatial LRN
* add nms (cpu) to region layer
* add max pooling with argmax ( and a fix to limits.hpp)
* add max unpooling layer
* rename DNN_TARGET_CUDA_FP32 to DNN_TARGET_CUDA
* update supportBackend to be more rigorous
* remove stray include from preventing non-cuda build
* include op_cuda.hpp outside condition #if
* refactoring, fixes and many optimizations
* drop DNN_TARGET_CUDA_FP64
* fix gcc errors
* increase max. tensor rank limit to six
* add Interp layer
* drop custom layers; use BackendNode
* vectorize activation kernels
* fixes for gcc
* remove wrong assertion
* fix broken assertion in unpooling primitive
* fix build errors in non-CUDA build
* completely remove workspace from public API
* fix permute layer
* enable accuracy and perf. tests for DNN_TARGET_CUDA
* add asynchronous forward
* vectorize eltwise ops
* vectorize fill kernel
* fixes for gcc
* remove CSL headers from public API
* remove csl header source group from cmake
* update min. cudnn version in cmake
* add numerically stable FP32 log1pexp
* refactor code
* add FP16 specialization to cudnn based tensor addition
* vectorize scale1 and bias1 + minor refactoring
* fix doxygen build
* fix invalid alignment assertion
* clear backend wrappers before allocateLayers
* ignore memory lock failures
* do not allocate internal blobs
* integrate NVTX
* add numerically stable half precision log1pexp
* fix indentation, following coding style, improve docs
* remove accidental modification of IE code
* Revert "add asynchronous forward"
This reverts commit 1154b9da9da07e9b52f8a81bdcea48cf31c56f70.
* [cmake] throw error for unsupported CC versions
* fix rebase issues
* add more docs, refactor code, fix bugs
* minor refactoring and fixes
* resolve warnings/errors from clang
* remove haveCUDA() checks from supportBackend()
* remove NVTX integration
* changes based on review comments
* avoid exception when no CUDA device is present
* add color code for CUDA in Net::dump
2019-10-21 19:28:00 +08:00
|
|
|
if (target != DNN_TARGET_CPU && backend != DNN_BACKEND_CUDA)
|
|
|
|
throw SkipTestException("Only CPU and CUDA is supported");
|
2019-07-12 01:13:52 +08:00
|
|
|
testONNXModels("pool_conv_3d");
|
|
|
|
}
|
|
|
|
|
2018-09-11 02:07:51 +08:00
|
|
|
TEST_P(Test_ONNX_layers, BatchNormalization)
|
|
|
|
{
|
|
|
|
testONNXModels("batch_norm");
|
|
|
|
}
|
|
|
|
|
2019-04-29 15:29:34 +08:00
|
|
|
TEST_P(Test_ONNX_layers, BatchNormalization3D)
|
|
|
|
{
|
2019-06-15 20:17:25 +08:00
|
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE)
|
|
|
|
{
|
|
|
|
if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
|
|
|
|
if (target == DNN_TARGET_OPENCL) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL);
|
|
|
|
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
|
|
|
|
}
|
2019-04-29 15:29:34 +08:00
|
|
|
testONNXModels("batch_norm_3d");
|
|
|
|
}
|
|
|
|
|
2018-10-09 03:18:41 +08:00
|
|
|
TEST_P(Test_ONNX_layers, Transpose)
|
|
|
|
{
|
2019-06-15 20:17:25 +08:00
|
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE)
|
|
|
|
{
|
|
|
|
if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
|
|
|
|
if (target == DNN_TARGET_OPENCL) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL);
|
|
|
|
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
|
|
|
|
}
|
2018-10-09 03:18:41 +08:00
|
|
|
testONNXModels("transpose");
|
|
|
|
}
|
|
|
|
|
2018-09-11 02:07:51 +08:00
|
|
|
TEST_P(Test_ONNX_layers, Multiplication)
|
|
|
|
{
|
2019-06-15 20:17:25 +08:00
|
|
|
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|
|
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
|
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
|
|
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
|
2018-09-11 02:07:51 +08:00
|
|
|
testONNXModels("mul");
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(Test_ONNX_layers, Constant)
|
|
|
|
{
|
2019-06-25 02:55:32 +08:00
|
|
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2018050000)
|
2019-03-29 21:42:58 +08:00
|
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD
|
|
|
|
&& getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
|
2019-06-15 20:17:25 +08:00
|
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_2018R5);
|
2019-03-29 21:42:58 +08:00
|
|
|
#endif
|
2018-09-11 02:07:51 +08:00
|
|
|
testONNXModels("constant");
|
|
|
|
}
|
|
|
|
|
2018-10-31 23:24:05 +08:00
|
|
|
TEST_P(Test_ONNX_layers, Padding)
|
|
|
|
{
|
2019-06-14 23:17:02 +08:00
|
|
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2019010000)
|
|
|
|
testONNXModels("padding", npy, 0, 0, false, false);
|
|
|
|
#else
|
2018-10-31 23:24:05 +08:00
|
|
|
testONNXModels("padding");
|
2019-06-14 23:17:02 +08:00
|
|
|
#endif
|
2018-10-31 23:24:05 +08:00
|
|
|
}
|
|
|
|
|
2019-02-22 00:48:46 +08:00
|
|
|
TEST_P(Test_ONNX_layers, Resize)
|
|
|
|
{
|
|
|
|
testONNXModels("resize_nearest");
|
|
|
|
}
|
|
|
|
|
2018-09-11 02:07:51 +08:00
|
|
|
TEST_P(Test_ONNX_layers, MultyInputs)
|
|
|
|
{
|
|
|
|
const String model = _tf("models/multy_inputs.onnx");
|
|
|
|
|
|
|
|
Net net = readNetFromONNX(model);
|
|
|
|
ASSERT_FALSE(net.empty());
|
|
|
|
|
|
|
|
net.setPreferableBackend(backend);
|
|
|
|
net.setPreferableTarget(target);
|
|
|
|
|
|
|
|
Mat inp1 = blobFromNPY(_tf("data/input_multy_inputs_0.npy"));
|
|
|
|
Mat inp2 = blobFromNPY(_tf("data/input_multy_inputs_1.npy"));
|
|
|
|
Mat ref = blobFromNPY(_tf("data/output_multy_inputs.npy"));
|
|
|
|
checkBackend(&inp1, &ref);
|
|
|
|
|
|
|
|
net.setInput(inp1, "0");
|
|
|
|
net.setInput(inp2, "1");
|
|
|
|
Mat out = net.forward();
|
|
|
|
|
|
|
|
normAssert(ref, out, "", default_l1, default_lInf);
|
2019-04-19 19:54:08 +08:00
|
|
|
expectNoFallbacksFromIE(net);
|
2018-09-11 02:07:51 +08:00
|
|
|
}
|
|
|
|
|
2018-12-12 22:36:17 +08:00
|
|
|
TEST_P(Test_ONNX_layers, DynamicReshape)
|
|
|
|
{
|
2019-06-15 20:17:25 +08:00
|
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE)
|
|
|
|
{
|
|
|
|
if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
|
|
|
|
if (target == DNN_TARGET_OPENCL) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL);
|
|
|
|
}
|
2018-12-12 22:36:17 +08:00
|
|
|
testONNXModels("dynamic_reshape");
|
|
|
|
}
|
2018-09-11 02:07:51 +08:00
|
|
|
|
2019-02-22 00:48:46 +08:00
|
|
|
TEST_P(Test_ONNX_layers, Reshape)
|
|
|
|
{
|
|
|
|
testONNXModels("unsqueeze");
|
|
|
|
}
|
|
|
|
|
2019-06-08 21:52:40 +08:00
|
|
|
TEST_P(Test_ONNX_layers, Slice)
|
|
|
|
{
|
2019-06-14 23:17:02 +08:00
|
|
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2019010000)
|
|
|
|
testONNXModels("slice", npy, 0, 0, false, false);
|
|
|
|
#else
|
2019-06-08 21:52:40 +08:00
|
|
|
testONNXModels("slice");
|
2019-06-14 23:17:02 +08:00
|
|
|
#endif
|
2019-06-08 21:52:40 +08:00
|
|
|
}
|
|
|
|
|
2019-05-20 15:46:09 +08:00
|
|
|
TEST_P(Test_ONNX_layers, Softmax)
|
|
|
|
{
|
|
|
|
testONNXModels("softmax");
|
2019-04-30 20:33:32 +08:00
|
|
|
testONNXModels("log_softmax", npy, 0, 0, false, false);
|
2019-05-20 15:46:09 +08:00
|
|
|
}
|
|
|
|
|
2019-07-28 03:10:13 +08:00
|
|
|
TEST_P(Test_ONNX_layers, Split_EltwiseMax)
|
|
|
|
{
|
|
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE)
|
|
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE);
|
|
|
|
testONNXModels("split_max");
|
|
|
|
}
|
|
|
|
|
2018-09-11 02:07:51 +08:00
|
|
|
INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_ONNX_layers, dnnBackendsAndTargets());
|
|
|
|
|
2019-06-20 21:43:28 +08:00
|
|
|
class Test_ONNX_nets : public Test_ONNX_layers
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
Test_ONNX_nets() { required = false; }
|
|
|
|
};
|
|
|
|
|
2018-09-11 02:07:51 +08:00
|
|
|
TEST_P(Test_ONNX_nets, Alexnet)
|
|
|
|
{
|
2018-10-09 06:38:06 +08:00
|
|
|
applyTestTag(target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB);
|
2019-06-20 21:43:28 +08:00
|
|
|
const String model = _tf("models/alexnet.onnx", false);
|
2018-09-11 02:07:51 +08:00
|
|
|
|
|
|
|
Net net = readNetFromONNX(model);
|
|
|
|
ASSERT_FALSE(net.empty());
|
|
|
|
|
|
|
|
net.setPreferableBackend(backend);
|
|
|
|
net.setPreferableTarget(target);
|
|
|
|
|
|
|
|
Mat inp = imread(_tf("../grace_hopper_227.png"));
|
|
|
|
Mat ref = blobFromNPY(_tf("../caffe_alexnet_prob.npy"));
|
|
|
|
checkBackend(&inp, &ref);
|
|
|
|
|
|
|
|
net.setInput(blobFromImage(inp, 1.0f, Size(227, 227), Scalar(), false));
|
|
|
|
ASSERT_FALSE(net.empty());
|
|
|
|
Mat out = net.forward();
|
|
|
|
|
|
|
|
normAssert(out, ref, "", default_l1, default_lInf);
|
2019-04-19 19:54:08 +08:00
|
|
|
expectNoFallbacksFromIE(net);
|
2018-09-11 02:07:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(Test_ONNX_nets, Squeezenet)
|
|
|
|
{
|
|
|
|
testONNXModels("squeezenet", pb);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(Test_ONNX_nets, Googlenet)
|
|
|
|
{
|
|
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE)
|
2019-06-15 20:17:25 +08:00
|
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE);
|
2018-09-11 02:07:51 +08:00
|
|
|
|
2019-06-20 21:43:28 +08:00
|
|
|
const String model = _tf("models/googlenet.onnx", false);
|
2018-09-11 02:07:51 +08:00
|
|
|
|
|
|
|
Net net = readNetFromONNX(model);
|
|
|
|
ASSERT_FALSE(net.empty());
|
|
|
|
|
|
|
|
net.setPreferableBackend(backend);
|
|
|
|
net.setPreferableTarget(target);
|
|
|
|
|
|
|
|
std::vector<Mat> images;
|
|
|
|
images.push_back( imread(_tf("../googlenet_0.png")) );
|
|
|
|
images.push_back( imread(_tf("../googlenet_1.png")) );
|
|
|
|
Mat inp = blobFromImages(images, 1.0f, Size(), Scalar(), false);
|
|
|
|
Mat ref = blobFromNPY(_tf("../googlenet_prob.npy"));
|
|
|
|
checkBackend(&inp, &ref);
|
|
|
|
|
|
|
|
net.setInput(inp);
|
|
|
|
ASSERT_FALSE(net.empty());
|
|
|
|
Mat out = net.forward();
|
|
|
|
|
|
|
|
normAssert(ref, out, "", default_l1, default_lInf);
|
2019-04-19 19:54:08 +08:00
|
|
|
expectNoFallbacksFromIE(net);
|
2018-09-11 02:07:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(Test_ONNX_nets, CaffeNet)
|
|
|
|
{
|
2018-10-09 06:38:06 +08:00
|
|
|
applyTestTag(target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB);
|
2019-10-04 15:29:27 +08:00
|
|
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2019030000)
|
|
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD
|
|
|
|
&& getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
|
|
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_2019R3);
|
|
|
|
#endif
|
2018-09-11 02:07:51 +08:00
|
|
|
testONNXModels("caffenet", pb);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(Test_ONNX_nets, RCNN_ILSVRC13)
|
|
|
|
{
|
2018-10-09 06:38:06 +08:00
|
|
|
applyTestTag(target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB);
|
2019-10-04 15:29:27 +08:00
|
|
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2019030000)
|
|
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD
|
|
|
|
&& getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
|
|
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_2019R3);
|
|
|
|
#endif
|
2019-03-29 21:42:58 +08:00
|
|
|
// Reference output values are in range [-4.992, -1.161]
|
|
|
|
testONNXModels("rcnn_ilsvrc13", pb, 0.0045);
|
2018-09-11 02:07:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(Test_ONNX_nets, VGG16_bn)
|
|
|
|
{
|
2018-10-09 06:38:06 +08:00
|
|
|
applyTestTag(CV_TEST_TAG_MEMORY_6GB); // > 2.3Gb
|
|
|
|
|
2019-03-29 21:42:58 +08:00
|
|
|
// output range: [-16; 27], after Softmax [0; 0.67]
|
|
|
|
const double lInf = (target == DNN_TARGET_MYRIAD) ? 0.038 : default_lInf;
|
|
|
|
testONNXModels("vgg16-bn", pb, default_l1, lInf, true);
|
2018-09-11 02:07:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(Test_ONNX_nets, ZFNet)
|
|
|
|
{
|
2019-05-27 20:14:18 +08:00
|
|
|
applyTestTag(CV_TEST_TAG_MEMORY_2GB);
|
2018-09-11 02:07:51 +08:00
|
|
|
testONNXModels("zfnet512", pb);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(Test_ONNX_nets, ResNet18v1)
|
|
|
|
{
|
2018-10-09 06:38:06 +08:00
|
|
|
applyTestTag(CV_TEST_TAG_MEMORY_512MB);
|
|
|
|
|
2019-03-29 21:42:58 +08:00
|
|
|
// output range: [-16; 22], after Softmax [0, 0.51]
|
2019-05-30 22:36:00 +08:00
|
|
|
testONNXModels("resnet18v1", pb, default_l1, default_lInf, true, target != DNN_TARGET_MYRIAD);
|
2018-09-11 02:07:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(Test_ONNX_nets, ResNet50v1)
|
|
|
|
{
|
2018-10-09 06:38:06 +08:00
|
|
|
applyTestTag(CV_TEST_TAG_MEMORY_512MB);
|
|
|
|
|
2019-03-29 21:42:58 +08:00
|
|
|
// output range: [-67; 75], after Softmax [0, 0.98]
|
2019-05-30 22:36:00 +08:00
|
|
|
testONNXModels("resnet50v1", pb, default_l1, default_lInf, true, target != DNN_TARGET_MYRIAD);
|
2018-09-11 02:07:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(Test_ONNX_nets, ResNet101_DUC_HDC)
|
|
|
|
{
|
2018-10-09 06:38:06 +08:00
|
|
|
applyTestTag(CV_TEST_TAG_VERYLONG);
|
|
|
|
|
2019-04-01 20:00:25 +08:00
|
|
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000)
|
2019-03-29 21:42:58 +08:00
|
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE)
|
2019-06-15 20:17:25 +08:00
|
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE, CV_TEST_TAG_DNN_SKIP_IE_2019R1, CV_TEST_TAG_DNN_SKIP_IE_2019R1_1);
|
2019-03-29 21:42:58 +08:00
|
|
|
#endif
|
|
|
|
#if defined(INF_ENGINE_RELEASE)
|
|
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
|
2019-06-15 20:17:25 +08:00
|
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
|
2019-03-29 21:42:58 +08:00
|
|
|
#endif
|
|
|
|
if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_OPENCL)
|
2019-06-15 20:17:25 +08:00
|
|
|
{
|
|
|
|
if (backend == DNN_BACKEND_OPENCV)
|
|
|
|
applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_OPENCL : CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
2019-03-29 21:42:58 +08:00
|
|
|
throw SkipTestException("Test is disabled for OpenCL targets");
|
2019-06-15 20:17:25 +08:00
|
|
|
}
|
2018-09-11 02:07:51 +08:00
|
|
|
testONNXModels("resnet101_duc_hdc", pb);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(Test_ONNX_nets, TinyYolov2)
|
|
|
|
{
|
2018-10-09 06:38:06 +08:00
|
|
|
applyTestTag(CV_TEST_TAG_MEMORY_512MB);
|
|
|
|
|
2019-03-29 21:42:58 +08:00
|
|
|
if (cvtest::skipUnstableTests)
|
|
|
|
throw SkipTestException("Skip unstable test");
|
|
|
|
#if defined(INF_ENGINE_RELEASE)
|
|
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE
|
|
|
|
&& (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)
|
|
|
|
)
|
2019-06-15 20:17:25 +08:00
|
|
|
applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
|
2019-03-29 21:42:58 +08:00
|
|
|
|
|
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD
|
|
|
|
&& getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X
|
|
|
|
)
|
2019-06-15 20:17:25 +08:00
|
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
|
2019-03-29 21:42:58 +08:00
|
|
|
#endif
|
2018-10-09 06:38:06 +08:00
|
|
|
|
2018-09-11 02:07:51 +08:00
|
|
|
// output range: [-11; 8]
|
2019-03-29 21:42:58 +08:00
|
|
|
double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.017 : default_l1;
|
|
|
|
double lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.14 : default_lInf;
|
2018-09-11 02:07:51 +08:00
|
|
|
testONNXModels("tiny_yolo2", pb, l1, lInf);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(Test_ONNX_nets, CNN_MNIST)
|
|
|
|
{
|
2019-03-29 21:42:58 +08:00
|
|
|
// output range: [-1952; 6574], after Softmax [0; 1]
|
|
|
|
testONNXModels("cnn_mnist", pb, default_l1, default_lInf, true);
|
2018-09-11 02:07:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(Test_ONNX_nets, MobileNet_v2)
|
|
|
|
{
|
2019-03-29 21:42:58 +08:00
|
|
|
// output range: [-166; 317], after Softmax [0; 1]
|
|
|
|
testONNXModels("mobilenetv2", pb, default_l1, default_lInf, true);
|
2018-09-11 02:07:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(Test_ONNX_nets, LResNet100E_IR)
|
|
|
|
{
|
2019-05-27 20:14:18 +08:00
|
|
|
applyTestTag(
|
|
|
|
(target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB),
|
|
|
|
CV_TEST_TAG_DEBUG_LONG
|
|
|
|
);
|
2019-06-15 20:17:25 +08:00
|
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE)
|
|
|
|
{
|
|
|
|
if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
|
|
|
|
if (target == DNN_TARGET_OPENCL) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL);
|
|
|
|
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
|
|
|
|
}
|
2018-09-11 02:07:51 +08:00
|
|
|
|
|
|
|
double l1 = default_l1;
|
|
|
|
double lInf = default_lInf;
|
|
|
|
// output range: [-3; 3]
|
|
|
|
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) {
|
|
|
|
l1 = 0.009;
|
|
|
|
lInf = 0.035;
|
|
|
|
}
|
2019-01-14 14:55:44 +08:00
|
|
|
else if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_CPU) {
|
2019-04-08 16:29:10 +08:00
|
|
|
l1 = 4.6e-5;
|
2019-01-14 14:55:44 +08:00
|
|
|
lInf = 1.9e-4;
|
|
|
|
}
|
2018-09-11 02:07:51 +08:00
|
|
|
testONNXModels("LResNet100E_IR", pb, l1, lInf);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(Test_ONNX_nets, Emotion_ferplus)
|
|
|
|
{
|
2019-03-29 21:42:58 +08:00
|
|
|
#if defined(INF_ENGINE_RELEASE)
|
|
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD
|
|
|
|
&& getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X
|
|
|
|
)
|
2019-06-15 20:17:25 +08:00
|
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
|
2019-03-29 21:42:58 +08:00
|
|
|
#endif
|
|
|
|
|
2018-12-20 18:14:47 +08:00
|
|
|
double l1 = default_l1;
|
|
|
|
double lInf = default_lInf;
|
2019-03-29 21:42:58 +08:00
|
|
|
|
|
|
|
// Output values are in range [-2.011, 2.111]
|
2018-12-20 18:14:47 +08:00
|
|
|
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|
|
|
|
l1 = 0.007;
|
|
|
|
else if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
|
|
|
|
{
|
|
|
|
l1 = 0.021;
|
|
|
|
lInf = 0.034;
|
|
|
|
}
|
2019-01-14 14:55:44 +08:00
|
|
|
else if (backend == DNN_BACKEND_INFERENCE_ENGINE && (target == DNN_TARGET_CPU || target == DNN_TARGET_OPENCL)) {
|
|
|
|
l1 = 2.4e-4;
|
|
|
|
lInf = 6e-4;
|
|
|
|
}
|
2018-12-20 18:14:47 +08:00
|
|
|
testONNXModels("emotion_ferplus", pb, l1, lInf);
|
2018-09-11 02:07:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(Test_ONNX_nets, Inception_v2)
|
|
|
|
{
|
2019-03-29 21:42:58 +08:00
|
|
|
testONNXModels("inception_v2", pb, default_l1, default_lInf, true);
|
2018-09-11 02:07:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(Test_ONNX_nets, DenseNet121)
|
|
|
|
{
|
2018-10-09 06:38:06 +08:00
|
|
|
applyTestTag(CV_TEST_TAG_MEMORY_512MB);
|
|
|
|
|
2019-03-29 21:42:58 +08:00
|
|
|
// output range: [-87; 138], after Softmax [0; 1]
|
2019-05-30 22:36:00 +08:00
|
|
|
testONNXModels("densenet121", pb, default_l1, default_lInf, true, target != DNN_TARGET_MYRIAD);
|
2018-09-11 02:07:51 +08:00
|
|
|
}
|
|
|
|
|
2018-09-18 01:26:17 +08:00
|
|
|
TEST_P(Test_ONNX_nets, Inception_v1)
|
|
|
|
{
|
2019-03-29 21:42:58 +08:00
|
|
|
#if defined(INF_ENGINE_RELEASE)
|
2018-12-20 18:14:47 +08:00
|
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
|
2019-06-15 20:17:25 +08:00
|
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
|
2018-12-20 18:14:47 +08:00
|
|
|
#endif
|
2018-09-18 01:26:17 +08:00
|
|
|
testONNXModels("inception_v1", pb);
|
|
|
|
}
|
2018-09-11 02:07:51 +08:00
|
|
|
|
2018-10-09 03:18:41 +08:00
|
|
|
TEST_P(Test_ONNX_nets, Shufflenet)
|
|
|
|
{
|
2019-06-15 20:17:25 +08:00
|
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE)
|
|
|
|
{
|
|
|
|
if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
|
|
|
|
if (target == DNN_TARGET_OPENCL) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL);
|
|
|
|
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
|
|
|
|
}
|
2018-10-09 03:18:41 +08:00
|
|
|
testONNXModels("shufflenet", pb);
|
|
|
|
}
|
|
|
|
|
2019-05-30 22:36:00 +08:00
|
|
|
TEST_P(Test_ONNX_nets, Resnet34_kinetics)
|
|
|
|
{
|
2019-06-14 23:17:02 +08:00
|
|
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2019010000)
|
|
|
|
throw SkipTestException("Test is enabled starts from 2019R1");
|
|
|
|
#endif
|
Merge pull request #14827 from YashasSamaga:cuda4dnn-csl-low
CUDA backend for the DNN module
* stub cuda4dnn design
* minor fixes for tests and doxygen
* add csl public api directory to module headers
* add low-level CSL components
* add high-level CSL components
* integrate csl::Tensor into backbone code
* switch to CPU iff unsupported; otherwise, fail on error
* add fully connected layer
* add softmax layer
* add activation layers
* support arbitary rank TensorDescriptor
* pass input wrappers to `initCUDA()`
* add 1d/2d/3d-convolution
* add pooling layer
* reorganize and refactor code
* fixes for gcc, clang and doxygen; remove cxx14/17 code
* add blank_layer
* add LRN layer
* add rounding modes for pooling layer
* split tensor.hpp into tensor.hpp and tensor_ops.hpp
* add concat layer
* add scale layer
* add batch normalization layer
* split math.cu into activations.cu and math.hpp
* add eltwise layer
* add flatten layer
* add tensor transform api
* add asymmetric padding support for convolution layer
* add reshape layer
* fix rebase issues
* add permute layer
* add padding support for concat layer
* refactor and reorganize code
* add normalize layer
* optimize bias addition in scale layer
* add prior box layer
* fix and optimize normalize layer
* add asymmetric padding support for pooling layer
* add event API
* improve pooling performance for some padding scenarios
* avoid over-allocation of compute resources to kernels
* improve prior box performance
* enable layer fusion
* add const layer
* add resize layer
* add slice layer
* add padding layer
* add deconvolution layer
* fix channelwise ReLU initialization
* add vector traits
* add vectorized versions of relu, clipped_relu, power
* add vectorized concat kernels
* improve concat_with_offsets performance
* vectorize scale and bias kernels
* add support for multi-billion element tensors
* vectorize prior box kernels
* fix address alignment check
* improve bias addition performance of conv/deconv/fc layers
* restructure code for supporting multiple targets
* add DNN_TARGET_CUDA_FP64
* add DNN_TARGET_FP16
* improve vectorization
* add region layer
* improve tensor API, add dynamic ranks
1. use ManagedPtr instead of a Tensor in backend wrapper
2. add new methods to tensor classes
- size_range: computes the combined size of for a given axis range
- tensor span/view can be constructed from a raw pointer and shape
3. the tensor classes can change their rank at runtime (previously rank was fixed at compile-time)
4. remove device code from tensor classes (as they are unused)
5. enforce strict conditions on tensor class APIs to improve debugging ability
* fix parametric relu activation
* add squeeze/unsqueeze tensor API
* add reorg layer
* optimize permute and enable 2d permute
* enable 1d and 2d slice
* add split layer
* add shuffle channel layer
* allow tensors of different ranks in reshape primitive
* patch SliceOp to allow Crop Layer
* allow extra shape inputs in reshape layer
* use `std::move_backward` instead of `std::move` for insert in resizable_static_array
* improve workspace management
* add spatial LRN
* add nms (cpu) to region layer
* add max pooling with argmax ( and a fix to limits.hpp)
* add max unpooling layer
* rename DNN_TARGET_CUDA_FP32 to DNN_TARGET_CUDA
* update supportBackend to be more rigorous
* remove stray include from preventing non-cuda build
* include op_cuda.hpp outside condition #if
* refactoring, fixes and many optimizations
* drop DNN_TARGET_CUDA_FP64
* fix gcc errors
* increase max. tensor rank limit to six
* add Interp layer
* drop custom layers; use BackendNode
* vectorize activation kernels
* fixes for gcc
* remove wrong assertion
* fix broken assertion in unpooling primitive
* fix build errors in non-CUDA build
* completely remove workspace from public API
* fix permute layer
* enable accuracy and perf. tests for DNN_TARGET_CUDA
* add asynchronous forward
* vectorize eltwise ops
* vectorize fill kernel
* fixes for gcc
* remove CSL headers from public API
* remove csl header source group from cmake
* update min. cudnn version in cmake
* add numerically stable FP32 log1pexp
* refactor code
* add FP16 specialization to cudnn based tensor addition
* vectorize scale1 and bias1 + minor refactoring
* fix doxygen build
* fix invalid alignment assertion
* clear backend wrappers before allocateLayers
* ignore memory lock failures
* do not allocate internal blobs
* integrate NVTX
* add numerically stable half precision log1pexp
* fix indentation, following coding style, improve docs
* remove accidental modification of IE code
* Revert "add asynchronous forward"
This reverts commit 1154b9da9da07e9b52f8a81bdcea48cf31c56f70.
* [cmake] throw error for unsupported CC versions
* fix rebase issues
* add more docs, refactor code, fix bugs
* minor refactoring and fixes
* resolve warnings/errors from clang
* remove haveCUDA() checks from supportBackend()
* remove NVTX integration
* changes based on review comments
* avoid exception when no CUDA device is present
* add color code for CUDA in Net::dump
2019-10-21 19:28:00 +08:00
|
|
|
if (target != DNN_TARGET_CPU && backend != DNN_BACKEND_CUDA)
|
|
|
|
throw SkipTestException("Only CPU and CUDA is supported");
|
2019-05-30 22:36:00 +08:00
|
|
|
|
2019-07-16 15:53:50 +08:00
|
|
|
String onnxmodel = findDataFile("dnn/resnet-34_kinetics.onnx", false);
|
2019-05-30 22:36:00 +08:00
|
|
|
Mat image0 = imread(findDataFile("dnn/dog416.png"));
|
|
|
|
Mat image1 = imread(findDataFile("dnn/street.png"));
|
|
|
|
|
|
|
|
Mat ref0 = blobFromNPY(_tf("data/output_kinetics0.npy"));
|
|
|
|
Mat ref1 = blobFromNPY(_tf("data/output_kinetics1.npy"));
|
|
|
|
|
|
|
|
std::vector<Mat> images_0(16, image0);
|
|
|
|
std::vector<Mat> images_1(16, image1);
|
|
|
|
Mat blob0 = blobFromImages(images_0, 1.0, Size(112, 112), Scalar(114.7748, 107.7354, 99.4750), true, true);
|
|
|
|
Mat blob1 = blobFromImages(images_1, 1.0, Size(112, 112), Scalar(114.7748, 107.7354, 99.4750), true, true);
|
|
|
|
|
|
|
|
Net permute;
|
|
|
|
LayerParams lp;
|
|
|
|
int order[] = {1, 0, 2, 3};
|
|
|
|
lp.set("order", DictValue::arrayInt<int*>(&order[0], 4));
|
|
|
|
permute.addLayerToPrev("perm", "Permute", lp);
|
|
|
|
|
|
|
|
permute.setInput(blob0);
|
|
|
|
Mat input0 = permute.forward().clone();
|
|
|
|
|
|
|
|
permute.setInput(blob1);
|
|
|
|
Mat input1 = permute.forward().clone();
|
|
|
|
|
|
|
|
int dims[] = {1, 3, 16, 112, 112};
|
|
|
|
input0 = input0.reshape(0, 5, &dims[0]);
|
|
|
|
input1 = input1.reshape(0, 5, &dims[0]);
|
|
|
|
|
|
|
|
Net net = readNetFromONNX(onnxmodel);
|
|
|
|
ASSERT_FALSE(net.empty());
|
|
|
|
net.setPreferableBackend(backend);
|
|
|
|
net.setPreferableTarget(target);
|
|
|
|
|
|
|
|
// output range [-5, 11]
|
|
|
|
float l1 = 0.0013;
|
|
|
|
float lInf = 0.009;
|
|
|
|
|
|
|
|
checkBackend(&input0, &ref0);
|
|
|
|
net.setInput(input0);
|
|
|
|
Mat out = net.forward().clone();
|
|
|
|
normAssert(ref0, out, "", l1, lInf);
|
|
|
|
|
|
|
|
checkBackend(&input1, &ref1);
|
|
|
|
net.setInput(input1);
|
|
|
|
out = net.forward().clone();
|
|
|
|
normAssert(ref1, out, "", l1, lInf);
|
|
|
|
|
|
|
|
expectNoFallbacksFromIE(net);
|
|
|
|
}
|
|
|
|
|
2018-09-11 02:07:51 +08:00
|
|
|
INSTANTIATE_TEST_CASE_P(/**/, Test_ONNX_nets, dnnBackendsAndTargets());
|
|
|
|
|
|
|
|
}} // namespace
|