opencv/modules/dnn/test/test_common.hpp
Yashas Samaga B L 613c12e590 Merge pull request #14827 from YashasSamaga:cuda4dnn-csl-low
CUDA backend for the DNN module

* stub cuda4dnn design

* minor fixes for tests and doxygen

* add csl public api directory to module headers

* add low-level CSL components

* add high-level CSL components

* integrate csl::Tensor into backbone code

* switch to CPU iff unsupported; otherwise, fail on error

* add fully connected layer

* add softmax layer

* add activation layers

* support arbitary rank TensorDescriptor

* pass input wrappers to `initCUDA()`

* add 1d/2d/3d-convolution

* add pooling layer

* reorganize and refactor code

* fixes for gcc, clang and doxygen; remove cxx14/17 code

* add blank_layer

* add LRN layer

* add rounding modes for pooling layer

* split tensor.hpp into tensor.hpp and tensor_ops.hpp

* add concat layer

* add scale layer

* add batch normalization layer

* split math.cu into activations.cu and math.hpp

* add eltwise layer

* add flatten layer

* add tensor transform api

* add asymmetric padding support for convolution layer

* add reshape layer

* fix rebase issues

* add permute layer

* add padding support for concat layer

* refactor and reorganize code

* add normalize layer

* optimize bias addition in scale layer

* add prior box layer

* fix and optimize normalize layer

* add asymmetric padding support for pooling layer

* add event API

* improve pooling performance for some padding scenarios

* avoid over-allocation of compute resources to kernels

* improve prior box performance

* enable layer fusion

* add const layer

* add resize layer

* add slice layer

* add padding layer

* add deconvolution layer

* fix channelwise  ReLU initialization

* add vector traits

* add vectorized versions of relu, clipped_relu, power

* add vectorized concat kernels

* improve concat_with_offsets performance

* vectorize scale and bias kernels

* add support for multi-billion element tensors

* vectorize prior box kernels

* fix address alignment check

* improve bias addition performance of conv/deconv/fc layers

* restructure code for supporting multiple targets

* add DNN_TARGET_CUDA_FP64

* add DNN_TARGET_FP16

* improve vectorization

* add region layer

* improve tensor API, add dynamic ranks

1. use ManagedPtr instead of a Tensor in backend wrapper
2. add new methods to tensor classes
  - size_range: computes the combined size of for a given axis range
  - tensor span/view can be constructed from a raw pointer and shape
3. the tensor classes can change their rank at runtime (previously rank was fixed at compile-time)
4. remove device code from tensor classes (as they are unused)
5. enforce strict conditions on tensor class APIs to improve debugging ability

* fix parametric relu activation

* add squeeze/unsqueeze tensor API

* add reorg layer

* optimize permute and enable 2d permute

* enable 1d and 2d slice

* add split layer

* add shuffle channel layer

* allow tensors of different ranks in reshape primitive

* patch SliceOp to allow Crop Layer

* allow extra shape inputs in reshape layer

* use `std::move_backward` instead of `std::move` for insert in resizable_static_array

* improve workspace management

* add spatial LRN

* add nms (cpu) to region layer

* add max pooling with argmax ( and a fix to limits.hpp)

* add max unpooling layer

* rename DNN_TARGET_CUDA_FP32 to DNN_TARGET_CUDA

* update supportBackend to be more rigorous

* remove stray include from preventing non-cuda build

* include op_cuda.hpp outside condition #if

* refactoring, fixes and many optimizations

* drop DNN_TARGET_CUDA_FP64

* fix gcc errors

* increase max. tensor rank limit to six

* add Interp layer

* drop custom layers; use BackendNode

* vectorize activation kernels

* fixes for gcc

* remove wrong assertion

* fix broken assertion in unpooling primitive

* fix build errors in non-CUDA build

* completely remove workspace from public API

* fix permute layer

* enable accuracy and perf. tests for DNN_TARGET_CUDA

* add asynchronous forward

* vectorize eltwise ops

* vectorize fill kernel

* fixes for gcc

* remove CSL headers from public API

* remove csl header source group from cmake

* update min. cudnn version in cmake

* add numerically stable FP32 log1pexp

* refactor code

* add FP16 specialization to cudnn based tensor addition

* vectorize scale1 and bias1 + minor refactoring

* fix doxygen build

* fix invalid alignment assertion

* clear backend wrappers before allocateLayers

* ignore memory lock failures

* do not allocate internal blobs

* integrate NVTX

* add numerically stable half precision log1pexp

* fix indentation, following coding style,  improve docs

* remove accidental modification of IE code

* Revert "add asynchronous forward"

This reverts commit 1154b9da9da07e9b52f8a81bdcea48cf31c56f70.

* [cmake] throw error for unsupported CC versions

* fix rebase issues

* add more docs, refactor code, fix bugs

* minor refactoring and fixes

* resolve warnings/errors from clang

* remove haveCUDA() checks from supportBackend()

* remove NVTX integration

* changes based on review comments

* avoid exception when no CUDA device is present

* add color code for CUDA in Net::dump
2019-10-21 14:28:00 +03:00

189 lines
6.5 KiB
C++

// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef __OPENCV_TEST_COMMON_HPP__
#define __OPENCV_TEST_COMMON_HPP__
#include "opencv2/dnn/utils/inference_engine.hpp"
#ifdef HAVE_OPENCL
#include "opencv2/core/ocl.hpp"
#endif
#define CV_TEST_TAG_DNN_SKIP_HALIDE "dnn_skip_halide"
#define CV_TEST_TAG_DNN_SKIP_OPENCL "dnn_skip_ocl"
#define CV_TEST_TAG_DNN_SKIP_OPENCL_FP16 "dnn_skip_ocl_fp16"
#define CV_TEST_TAG_DNN_SKIP_IE "dnn_skip_ie"
#define CV_TEST_TAG_DNN_SKIP_IE_2018R5 "dnn_skip_ie_2018r5"
#define CV_TEST_TAG_DNN_SKIP_IE_2019R1 "dnn_skip_ie_2019r1"
#define CV_TEST_TAG_DNN_SKIP_IE_2019R1_1 "dnn_skip_ie_2019r1_1"
#define CV_TEST_TAG_DNN_SKIP_IE_2019R2 "dnn_skip_ie_2019r2"
#define CV_TEST_TAG_DNN_SKIP_IE_2019R3 "dnn_skip_ie_2019r3"
#define CV_TEST_TAG_DNN_SKIP_IE_OPENCL "dnn_skip_ie_ocl"
#define CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16 "dnn_skip_ie_ocl_fp16"
#define CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_2 "dnn_skip_ie_myriad2"
#define CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X "dnn_skip_ie_myriadx"
#define CV_TEST_TAG_DNN_SKIP_IE_MYRIAD CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_2, CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X
#define CV_TEST_TAG_DNN_SKIP_VULKAN "dnn_skip_vulkan"
#define CV_TEST_TAG_DNN_SKIP_CUDA "dnn_skip_cuda"
#define CV_TEST_TAG_DNN_SKIP_CUDA_FP16 "dnn_skip_cuda_fp16"
#define CV_TEST_TAG_DNN_SKIP_CUDA_FP32 "dnn_skip_cuda_fp32"
namespace cv { namespace dnn {
CV__DNN_INLINE_NS_BEGIN
void PrintTo(const cv::dnn::Backend& v, std::ostream* os);
void PrintTo(const cv::dnn::Target& v, std::ostream* os);
using opencv_test::tuple;
using opencv_test::get;
void PrintTo(const tuple<cv::dnn::Backend, cv::dnn::Target> v, std::ostream* os);
CV__DNN_INLINE_NS_END
}} // namespace cv::dnn
namespace opencv_test {
void initDNNTests();
using namespace cv::dnn;
static inline const std::string &getOpenCVExtraDir()
{
return cvtest::TS::ptr()->get_data_path();
}
void normAssert(
cv::InputArray ref, cv::InputArray test, const char *comment = "",
double l1 = 0.00001, double lInf = 0.0001);
std::vector<cv::Rect2d> matToBoxes(const cv::Mat& m);
void normAssertDetections(
const std::vector<int>& refClassIds,
const std::vector<float>& refScores,
const std::vector<cv::Rect2d>& refBoxes,
const std::vector<int>& testClassIds,
const std::vector<float>& testScores,
const std::vector<cv::Rect2d>& testBoxes,
const char *comment = "", double confThreshold = 0.0,
double scores_diff = 1e-5, double boxes_iou_diff = 1e-4);
// For SSD-based object detection networks which produce output of shape 1x1xNx7
// where N is a number of detections and an every detection is represented by
// a vector [batchId, classId, confidence, left, top, right, bottom].
void normAssertDetections(
cv::Mat ref, cv::Mat out, const char *comment = "",
double confThreshold = 0.0, double scores_diff = 1e-5,
double boxes_iou_diff = 1e-4);
void readFileContent(const std::string& filename, CV_OUT std::vector<char>& content);
#ifdef HAVE_INF_ENGINE
bool validateVPUType();
#endif
testing::internal::ParamGenerator< tuple<Backend, Target> > dnnBackendsAndTargets(
bool withInferenceEngine = true,
bool withHalide = false,
bool withCpuOCV = true,
bool withVkCom = true,
bool withCUDA = true
);
class DNNTestLayer : public TestWithParam<tuple<Backend, Target> >
{
public:
dnn::Backend backend;
dnn::Target target;
double default_l1, default_lInf;
DNNTestLayer()
{
backend = (dnn::Backend)(int)get<0>(GetParam());
target = (dnn::Target)(int)get<1>(GetParam());
getDefaultThresholds(backend, target, &default_l1, &default_lInf);
}
static void getDefaultThresholds(int backend, int target, double* l1, double* lInf)
{
if (target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD)
{
*l1 = 4e-3;
*lInf = 2e-2;
}
else
{
*l1 = 1e-5;
*lInf = 1e-4;
}
}
static void checkBackend(int backend, int target, Mat* inp = 0, Mat* ref = 0)
{
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
{
if (inp && ref && inp->dims == 4 && ref->dims == 4 &&
inp->size[0] != 1 && inp->size[0] != ref->size[0])
{
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
throw SkipTestException("Inconsistent batch size of input and output blobs for Myriad plugin");
}
}
}
void expectNoFallbacks(Net& net)
{
// Check if all the layers are supported with current backend and target.
// Some layers might be fused so their timings equal to zero.
std::vector<double> timings;
net.getPerfProfile(timings);
std::vector<String> names = net.getLayerNames();
CV_Assert(names.size() == timings.size());
for (int i = 0; i < names.size(); ++i)
{
Ptr<dnn::Layer> l = net.getLayer(net.getLayerId(names[i]));
bool fused = !timings[i];
if ((!l->supportBackend(backend) || l->preferableTarget != target) && !fused)
CV_Error(Error::StsNotImplemented, "Layer [" + l->name + "] of type [" +
l->type + "] is expected to has backend implementation");
}
}
void expectNoFallbacksFromIE(Net& net)
{
if (backend == DNN_BACKEND_INFERENCE_ENGINE)
expectNoFallbacks(net);
}
void expectNoFallbacksFromCUDA(Net& net)
{
if (backend == DNN_BACKEND_CUDA)
expectNoFallbacks(net);
}
protected:
void checkBackend(Mat* inp = 0, Mat* ref = 0)
{
checkBackend(backend, target, inp, ref);
}
};
} // namespace
// src/op_inf_engine.hpp
#define INF_ENGINE_VER_MAJOR_GT(ver) (((INF_ENGINE_RELEASE) / 10000) > ((ver) / 10000))
#define INF_ENGINE_VER_MAJOR_GE(ver) (((INF_ENGINE_RELEASE) / 10000) >= ((ver) / 10000))
#define INF_ENGINE_VER_MAJOR_LT(ver) (((INF_ENGINE_RELEASE) / 10000) < ((ver) / 10000))
#define INF_ENGINE_VER_MAJOR_LE(ver) (((INF_ENGINE_RELEASE) / 10000) <= ((ver) / 10000))
#define INF_ENGINE_VER_MAJOR_EQ(ver) (((INF_ENGINE_RELEASE) / 10000) == ((ver) / 10000))
#endif