Merge branch 4.x

This commit is contained in:
Alexander Smorkalov 2023-08-08 17:31:57 +03:00
commit a6748df587
37 changed files with 575 additions and 210 deletions

View File

@ -74,6 +74,10 @@ if(POLICY CMP0077)
cmake_policy(SET CMP0077 NEW) # CMake 3.13+: option() honors normal variables. cmake_policy(SET CMP0077 NEW) # CMake 3.13+: option() honors normal variables.
endif() endif()
if(POLICY CMP0146)
cmake_policy(SET CMP0146 OLD) # CMake 3.27+: use CMake FindCUDA if available.
endif()
# #
# Configure OpenCV CMake hooks # Configure OpenCV CMake hooks
# #

View File

@ -643,4 +643,69 @@ INSTANTIATE_TEST_CASE_P(/**/, Layer_ScatterND, testing::Values(std::make_tuple(D
INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU))); INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNormExpanded, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU))); INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNormExpanded, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
typedef TestBaseWithParam<tuple<Vec4i, int, bool, tuple<Backend, Target> > > Layer_FullyConnected;
PERF_TEST_P_(Layer_FullyConnected, fc)
{
std::vector<int> inpShape;
inpShape.reserve(4);
for (int i = 0; i < 4; ++i) {
int dim = get<0>(GetParam())[i];
if (dim == 0)
break;
inpShape.push_back(dim);
}
Mat input(inpShape, CV_32F);
randn(input, 0, 1);
int axis = input.dims - 1;
int outDims = get<1>(GetParam());
bool isMatMul = get<2>(GetParam());
int backendId = get<0>(get<3>(GetParam()));
int targetId = get<1>(get<3>(GetParam()));
std::vector<int> weightShape;
if (isMatMul) {
weightShape = inpShape;
weightShape[weightShape.size() - 2] = outDims;
} else {
weightShape = {outDims, (int)input.total(axis, input.dims)};
}
Mat weights(weightShape, CV_32F);
randn(weights, 0, 1);
LayerParams lp;
lp.set("axis", input.dims - 1);
lp.set("is_matmul", weights.dims > 2);
lp.set("bias_term", false);
lp.set("transB", true);
lp.set("num_output", (int)weights.total(0, weights.dims - 1));
lp.blobs.resize(1, weights);
Net net;
net.addLayerToPrev("matmul", "InnerProduct", lp);
net.setInput(input);
net.setPreferableBackend(backendId);
net.setPreferableTarget(targetId);
// warmup
Mat output = net.forward();
TEST_CYCLE()
{
net.forward();
}
SANITY_CHECK_NOTHING();
}
INSTANTIATE_TEST_CASE_P(/**/, Layer_FullyConnected, Combine(
Values( // input size
Vec4i(5, 512, 384),
Vec4i(5, 16, 512, 128)
),
Values(256, 512, 1024), // output dimension
testing::Bool(), // is_matmul
dnnBackendsAndTargets()
));
} // namespace } // namespace

View File

@ -248,6 +248,11 @@ void selu(const Stream& stream, Span<T> output, View<T> input, T alpha, T gamma)
generic_op<T, SeluFunctor<T>>(stream, output, input, {alpha, gamma}); generic_op<T, SeluFunctor<T>>(stream, output, input, {alpha, gamma});
} }
template <class T>
void gelu(const Stream& stream, Span<T> output, View<T> input) {
generic_op<T, GeluFunctor<T>>(stream, output, input);
}
template <class T> template <class T>
void sign(const Stream& stream, Span<T> output, View<T> input) { void sign(const Stream& stream, Span<T> output, View<T> input) {
generic_op<T, SignFunctor<T>>(stream, output, input); generic_op<T, SignFunctor<T>>(stream, output, input);
@ -324,6 +329,7 @@ template void tan<__half>(const Stream&, Span<__half>, View<__half>);
template void celu<__half>(const Stream&, Span<__half>, View<__half>, __half); template void celu<__half>(const Stream&, Span<__half>, View<__half>, __half);
template void hardsigmoid<__half>(const Stream&, Span<__half>, View<__half>, __half, __half); template void hardsigmoid<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
template void selu<__half>(const Stream&, Span<__half>, View<__half>, __half, __half); template void selu<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
template void gelu<__half>(const Stream&, Span<__half>, View<__half>);
template void thresholdedrelu<__half>(const Stream&, Span<__half>, View<__half>, __half); template void thresholdedrelu<__half>(const Stream&, Span<__half>, View<__half>, __half);
template void power<__half>(const Stream&, Span<__half>, View<__half>, __half, __half, __half); template void power<__half>(const Stream&, Span<__half>, View<__half>, __half, __half, __half);
template void exp<__half>(const Stream&, Span<__half>, View<__half>, __half, __half); template void exp<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
@ -366,6 +372,7 @@ template void tan<float>(const Stream&, Span<float>, View<float>);
template void celu<float>(const Stream&, Span<float>, View<float>, float); template void celu<float>(const Stream&, Span<float>, View<float>, float);
template void hardsigmoid<float>(const Stream&, Span<float>, View<float>, float, float); template void hardsigmoid<float>(const Stream&, Span<float>, View<float>, float, float);
template void selu<float>(const Stream&, Span<float>, View<float>, float, float); template void selu<float>(const Stream&, Span<float>, View<float>, float, float);
template void gelu<float>(const Stream&, Span<float>, View<float>);
template void thresholdedrelu<float>(const Stream&, Span<float>, View<float>, float); template void thresholdedrelu<float>(const Stream&, Span<float>, View<float>, float);
template void power<float>(const Stream&, Span<float>, View<float>, float, float, float); template void power<float>(const Stream&, Span<float>, View<float>, float, float, float);
template void exp<float>(const Stream&, Span<float>, View<float>, float, float); template void exp<float>(const Stream&, Span<float>, View<float>, float, float);

View File

@ -588,6 +588,21 @@ struct SeluFunctor {
T alpha, gamma; T alpha, gamma;
}; };
template <class T>
struct GeluFunctor {
struct Params {
CUDA4DNN_HOST_DEVICE Params() { }
};
CUDA4DNN_DEVICE GeluFunctor() { }
CUDA4DNN_DEVICE GeluFunctor(const Params& params) { }
CUDA4DNN_DEVICE T operator()(T value) {
using csl::device::erf;
return static_cast<T>(0.5f) * value * (static_cast<T>(1.f) + erf(value * static_cast<T>(M_SQRT1_2)));
}
};
template <class T> template <class T>
struct ThresholdedReluFunctor { struct ThresholdedReluFunctor {
struct Params { struct Params {

View File

@ -114,6 +114,9 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
template <class T> template <class T>
void selu(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, T alpha, T gamma); void selu(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, T alpha, T gamma);
template <class T>
void gelu(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
template <class T> template <class T>
void thresholdedrelu(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, T alpha); void thresholdedrelu(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, T alpha);

View File

@ -537,6 +537,20 @@ namespace cv { namespace dnn { namespace cuda4dnn {
const T alpha, gamma; const T alpha, gamma;
}; };
template <class T>
class GeluOp final : public BaseOp<GeluOp, T> {
public:
GeluOp(csl::Stream stream_) : stream(std::move(stream_)) { }
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
{
kernels::gelu<T>(stream, output, input);
}
private:
csl::Stream stream;
};
template <class T> template <class T>
class ThresholdedReluOp final : public BaseOp<ThresholdedReluOp, T> { class ThresholdedReluOp final : public BaseOp<ThresholdedReluOp, T> {
public: public:

View File

@ -111,7 +111,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
* or there might be several weights * or there might be several weights
* or we don't have to scale * or we don't have to scale
*/ */
if (weight != 1.0) if (weight != static_cast<T>(1.0f))
{ {
kernels::scale1_with_bias1<T>(stream, output, input, weight, 1.0); kernels::scale1_with_bias1<T>(stream, output, input, weight, 1.0);
} }

View File

@ -121,7 +121,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
new_coords new_coords
); );
if (nms_iou_threshold > 0) { if (nms_iou_threshold > static_cast<T>(0.0f)) {
auto output_mat = output_wrapper->getMutableHostMat(); auto output_mat = output_wrapper->getMutableHostMat();
CV_Assert(output_mat.type() == CV_32F); CV_Assert(output_mat.type() == CV_32F);
for (int i = 0; i < input.get_axis_size(0); i++) { for (int i = 0; i < input.get_axis_size(0); i++) {

View File

@ -446,66 +446,6 @@ void InfEngineNgraphNet::addOutput(const Ptr<InfEngineNgraphNode>& node)
requestedOutputs.insert({name, node.get()}); requestedOutputs.insert({name, node.get()});
} }
void InfEngineNgraphNet::setNodePtr(std::shared_ptr<ngraph::Node>* ptr) {
all_nodes.emplace((*ptr)->get_friendly_name(), ptr);
}
void InfEngineNgraphNet::release()
{
// FIXIT release should not be conditional, release ALL
for (auto& node : components.back()) {
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
if (!(ngraph::op::is_parameter(node) || ngraph::op::is_output(node) || ngraph::op::is_constant(node)) ) {
#else
if (!(node->is_parameter() || node->is_output() || node->is_constant()) ) {
#endif
auto it = all_nodes.find(node->get_friendly_name());
if (it != all_nodes.end()) {
it->second->reset();
all_nodes.erase(it);
}
}
}
}
void InfEngineNgraphNet::dfs(std::shared_ptr<ngraph::Node>& node,
std::vector<std::shared_ptr<ngraph::Node>>& comp,
std::unordered_map<std::string, bool>& used) {
used[node->get_friendly_name()] = true;
comp.push_back(node);
auto inputs = node->get_users();
for (size_t i = 0; i < node->get_input_size(); ++i) {
inputs.push_back(node->input_value(i).get_node()->shared_from_this());
}
for (auto& to : inputs) {
if (!used[to->get_friendly_name()]) {
dfs(to, comp, used);
}
}
}
int InfEngineNgraphNet::getNumComponents()
{
if (!components.empty()) {
return components.size();
}
std::unordered_map<std::string, bool> used;
auto inputs = ngraph_function->get_ordered_ops();
for (auto& node : inputs) {
used.emplace(node->get_friendly_name(), false);
}
for (auto& node : inputs) {
if (!used[node->get_friendly_name()]) {
std::vector<std::shared_ptr<ngraph::Node>> current_comp;
dfs(node, current_comp, used);
components.push_back(current_comp);
}
}
return components.size();
}
void InfEngineNgraphNet::createNet(Target targetId) { void InfEngineNgraphNet::createNet(Target targetId) {
if (!hasNetOwner) if (!hasNetOwner)
{ {
@ -524,47 +464,8 @@ void InfEngineNgraphNet::createNet(Target targetId) {
} }
CV_Assert_N(!inputs_vec.empty(), !outs.empty()); CV_Assert_N(!inputs_vec.empty(), !outs.empty());
ngraph_function = std::make_shared<ngraph::Function>(outs, inputs_vec); ngraph_function = std::make_shared<ngraph::Function>(outs, inputs_vec);
int num_comp = getNumComponents();
CV_LOG_DEBUG(NULL, "DNN/IE: number of subgraphs: " << num_comp);
if (num_comp > 1) {
for (int i = num_comp - 1; i >= 0; --i) {
ngraph::ResultVector outputs;
ngraph::ParameterVector inps;
for (auto& node : components.back()) {
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
if (ngraph::op::is_parameter(node)) {
#else
if (node->is_parameter()) {
#endif
CV_LOG_DEBUG(NULL, "DNN/IE: subgraph[" << i << "]: +input[" << inps.size() << "] = '" << node->get_friendly_name() << "'");
auto parameter = std::dynamic_pointer_cast<ngraph::op::Parameter>(node);
inps.push_back(parameter);
}
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
else if (ngraph::op::is_output(node)) {
#else
else if (node->is_output()) {
#endif
CV_LOG_DEBUG(NULL, "DNN/IE: subgraph[" << i << "]: +output[" << outputs.size() << "] = '" << node->get_friendly_name() << "'");
auto result = std::dynamic_pointer_cast<ngraph::op::Result>(node);
outputs.push_back(result);
}
}
CV_LOG_DEBUG(NULL, "DNN/IE: subgraph[" << i << ": nodes=" << components.back().size() << " inputs=" << inps.size() << " outputs=" << outputs.size());
isInit = false;
CV_Assert_N(!inps.empty(), !outputs.empty());
ngraph_function = std::make_shared<ngraph::Function>(outputs, inps);
release();
components.pop_back();
init(targetId); init(targetId);
} }
} else {
release();
components.clear();
init(targetId);
}
}
} }
#if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2022_1) #if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2022_1)

View File

@ -50,22 +50,14 @@ public:
void addBlobs(const std::vector<cv::Ptr<BackendWrapper> >& ptrs); void addBlobs(const std::vector<cv::Ptr<BackendWrapper> >& ptrs);
void createNet(Target targetId); void createNet(Target targetId);
void setNodePtr(std::shared_ptr<ngraph::Node>* ptr);
void reset(); void reset();
//private: //private:
detail::NetImplBase& netImpl_; detail::NetImplBase& netImpl_;
void release();
int getNumComponents();
void dfs(std::shared_ptr<ngraph::Node>& node, std::vector<std::shared_ptr<ngraph::Node>>& comp,
std::unordered_map<std::string, bool>& used);
ngraph::ParameterVector inputs_vec; ngraph::ParameterVector inputs_vec;
std::shared_ptr<ngraph::Function> ngraph_function; std::shared_ptr<ngraph::Function> ngraph_function;
std::vector<std::vector<std::shared_ptr<ngraph::Node>>> components;
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>* > all_nodes;
InferenceEngine::ExecutableNetwork netExec; InferenceEngine::ExecutableNetwork netExec;
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2022_1) #if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2022_1)

View File

@ -221,7 +221,7 @@ public:
{ {
return backendId == DNN_BACKEND_OPENCV || return backendId == DNN_BACKEND_OPENCV ||
(backendId == DNN_BACKEND_CUDA && !_groupByClasses) || (backendId == DNN_BACKEND_CUDA && !_groupByClasses) ||
(backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && !_locPredTransposed && _bboxesNormalized); backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
} }
bool getMemoryShapes(const std::vector<MatShape> &inputs, bool getMemoryShapes(const std::vector<MatShape> &inputs,
@ -1006,9 +1006,30 @@ public:
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{ {
CV_Assert(nodes.size() == 3); CV_Assert(nodes.size() == 3);
auto& box_logits = nodes[0].dynamicCast<InfEngineNgraphNode>()->node; auto box_logits = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
auto& class_preds = nodes[1].dynamicCast<InfEngineNgraphNode>()->node; auto class_preds = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
auto& proposals = nodes[2].dynamicCast<InfEngineNgraphNode>()->node; auto proposals = nodes[2].dynamicCast<InfEngineNgraphNode>()->node;
if (_locPredTransposed) {
// Convert box predictions from yxYX to xyXY
box_logits = std::make_shared<ngraph::op::v1::Reshape>(box_logits,
std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{3}, std::vector<int32_t>{0, -1, 2}),
true
);
int axis = 2;
box_logits = std::make_shared<ngraph::op::v1::Reverse>(box_logits,
std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{1}, &axis),
ngraph::op::v1::Reverse::Mode::INDEX
);
}
auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{2}, std::vector<int32_t>{0, -1});
box_logits = std::make_shared<ngraph::op::v1::Reshape>(box_logits, shape, true);
class_preds = std::make_shared<ngraph::op::v1::Reshape>(class_preds, shape, true);
proposals = std::make_shared<ngraph::op::v1::Reshape>(proposals,
std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{3}, std::vector<int32_t>{0, _varianceEncodedInTarget ? 1 : 2, -1}),
true
);
ngraph::op::DetectionOutputAttrs attrs; ngraph::op::DetectionOutputAttrs attrs;
attrs.num_classes = _numClasses; attrs.num_classes = _numClasses;

View File

@ -821,7 +821,7 @@ struct GeluFunctor : public BaseDefaultFunctor<GeluFunctor>
bool supportBackend(int backendId, int) bool supportBackend(int backendId, int)
{ {
return backendId == DNN_BACKEND_OPENCV; return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
} }
inline float calculate(float x) const inline float calculate(float x) const
@ -829,6 +829,13 @@ struct GeluFunctor : public BaseDefaultFunctor<GeluFunctor>
return 0.5f * x * (1.0f + erf(x * M_SQRT1_2)); return 0.5f * x * (1.0f + erf(x * M_SQRT1_2));
} }
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
{
return make_cuda_node<cuda4dnn::GeluOp>(target, stream);
}
#endif
int64 getFLOPSPerElement() const { return 100; } int64 getFLOPSPerElement() const { return 100; }
}; };

View File

@ -180,15 +180,12 @@ public:
virtual bool supportBackend(int backendId) CV_OVERRIDE virtual bool supportBackend(int backendId) CV_OVERRIDE
{ {
bool tranAorB = transA || transB; bool tranAorB = transA || transB;
#ifdef HAVE_INF_ENGINE
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
return axis == 1 && !tranAorB;
#endif
return backendId == DNN_BACKEND_OPENCV || return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA || backendId == DNN_BACKEND_CUDA ||
(backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1 && !tranAorB) || (backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1 && !tranAorB) ||
(backendId == DNN_BACKEND_WEBNN && axis == 1 && !tranAorB) || (backendId == DNN_BACKEND_WEBNN && axis == 1 && !tranAorB) ||
backendId == DNN_BACKEND_CANN || backendId == DNN_BACKEND_CANN ||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ||
(backendId == DNN_BACKEND_VKCOM && haveVulkan() && !tranAorB); (backendId == DNN_BACKEND_VKCOM && haveVulkan() && !tranAorB);
} }
@ -630,9 +627,11 @@ public:
if(input_wrapper->getRank() == inp2Dim) if(input_wrapper->getRank() == inp2Dim)
return make_cuda_node<cuda4dnn::MatMulOp>(preferableTarget, std::move(context->stream), std::move(context->cublas_handle), oriMat, biasMat_, transA, transB); return make_cuda_node<cuda4dnn::MatMulOp>(preferableTarget, std::move(context->stream), std::move(context->cublas_handle), oriMat, biasMat_, transA, transB);
else else {
CV_LOG_INFO(NULL, "DNN/CUDA: no implementation for MatMul with rank " << input_wrapper->getRank());
return Ptr<BackendNode>(); return Ptr<BackendNode>();
} }
}
auto flatten_start_axis = normalize_axis(axis, input_wrapper->getRank()); auto flatten_start_axis = normalize_axis(axis, input_wrapper->getRank());
return make_cuda_node<cuda4dnn::InnerProductOp>(preferableTarget, std::move(context->stream), std::move(context->cublas_handle), flatten_start_axis, weightsMat, biasMat_); return make_cuda_node<cuda4dnn::InnerProductOp>(preferableTarget, std::move(context->stream), std::move(context->cublas_handle), flatten_start_axis, weightsMat, biasMat_);
@ -800,17 +799,26 @@ public:
if (nodes.size() == 2) if (nodes.size() == 2)
{ {
auto& inp2 = nodes[1].dynamicCast<InfEngineNgraphNode>()->node; auto& inp2 = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
matmul = std::make_shared<ngraph::op::MatMul>(ieInpNode, inp2, false, false); matmul = std::make_shared<ngraph::op::MatMul>(ieInpNode, inp2, transA, transB);
} }
else else
{ {
std::vector<int64_t> data = {(int64_t)ieInpNode->get_shape()[0], (int64_t)blobs[0].size[1]}; std::vector<int> shape(1 + normalize_axis(axis, ieInpNode->get_shape().size()), 0);
auto new_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, data.data()); shape[shape.size() - 1] = -1;
auto inp = std::make_shared<ngraph::op::v1::Reshape>(ieInpNode, new_shape, true); auto inp = std::make_shared<ngraph::op::v1::Reshape>(
ieInpNode,
std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{shape.size()}, shape.data()),
true
);
std::vector<size_t> weight_shape{(size_t)blobs[0].size[0], (size_t)blobs[0].size[1]}; std::vector<size_t> weight_shape;
if (isMatMul) {
weight_shape = getShape<size_t>(oriMat);
} else {
weight_shape = {(size_t)blobs[0].size[0], (size_t)blobs[0].size[1]};
}
auto ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, weight_shape, blobs[0].data); auto ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, weight_shape, blobs[0].data);
matmul = std::make_shared<ngraph::op::MatMul>(inp, ieWeights, false, true); matmul = std::make_shared<ngraph::op::MatMul>(inp, ieWeights, transA, transB);
} }
if (bias) { if (bias) {

View File

@ -13,6 +13,7 @@ Implementation of Batch Normalization layer.
#include "layers_common.hpp" #include "layers_common.hpp"
#include "../op_cuda.hpp" #include "../op_cuda.hpp"
#include "../op_halide.hpp" #include "../op_halide.hpp"
#include "../ie_ngraph.hpp"
#include <opencv2/dnn/shape_utils.hpp> #include <opencv2/dnn/shape_utils.hpp>
#include <opencv2/core/utils/logger.hpp> #include <opencv2/core/utils/logger.hpp>
@ -41,6 +42,7 @@ public:
{ {
return backendId == DNN_BACKEND_OPENCV || return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA || backendId == DNN_BACKEND_CUDA ||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ||
(backendId == DNN_BACKEND_HALIDE && haveHalide() && !poolPad.width && !poolPad.height); (backendId == DNN_BACKEND_HALIDE && haveHalide() && !poolPad.width && !poolPad.height);
} }
@ -181,6 +183,50 @@ public:
#endif // HAVE_HALIDE #endif // HAVE_HALIDE
return Ptr<BackendNode>(); return Ptr<BackendNode>();
} }
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto features = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
auto indices = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
std::vector<MatShape> inpShapes(nodes.size());
std::vector<MatShape> outShapes, internals;
for (int i = 0; i < nodes.size(); ++i) {
std::vector<size_t> shape = nodes[i].dynamicCast<InfEngineNgraphNode>()->node->get_shape();
inpShapes[i] = std::vector<int>(shape.begin(), shape.end());
}
getMemoryShapes(inpShapes, 1, outShapes, internals);
Mat zeros = Mat::zeros(1, total(outShapes[0]), CV_32F);
auto zeroInp = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{zeros.total()}, zeros.data);
int newShape = -1;
features = std::make_shared<ngraph::op::v1::Reshape>(
features,
std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{1}, &newShape),
true
);
indices = std::make_shared<ngraph::op::v1::Reshape>(
indices,
std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{1}, &newShape),
true
);
if (indices->get_element_type() != ngraph::element::i32 && indices->get_element_type() != ngraph::element::i64) {
indices = std::make_shared<ngraph::op::Convert>(indices, ngraph::element::i64);
}
int axis = 0;
std::shared_ptr<ngraph::Node> unpool = std::make_shared<ngraph::op::ScatterElementsUpdate>(zeroInp, indices, features,
std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{1}, &axis));
auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{outShapes[0].size()}, outShapes[0].data());
unpool = std::make_shared<ngraph::op::v1::Reshape>(unpool, shape, true);
return Ptr<BackendNode>(new InfEngineNgraphNode(unpool));
}
#endif // HAVE_DNN_NGRAPH
}; };
Ptr<MaxUnpoolLayer> MaxUnpoolLayer::create(const LayerParams& params) Ptr<MaxUnpoolLayer> MaxUnpoolLayer::create(const LayerParams& params)

View File

@ -209,7 +209,7 @@ public:
#ifdef HAVE_INF_ENGINE #ifdef HAVE_INF_ENGINE
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{ {
return !computeMaxIdx && type != STOCHASTIC && kernel_size.size() > 1 && (kernel_size.size() != 3 || !isArmComputePlugin()); return type != STOCHASTIC && kernel_size.size() > 1 && (kernel_size.size() != 3 || !isArmComputePlugin());
} }
#endif #endif
if (backendId == DNN_BACKEND_OPENCV) if (backendId == DNN_BACKEND_OPENCV)
@ -613,9 +613,17 @@ public:
return Ptr<BackendNode>(new InfEngineNgraphNode(reduce_sum)); return Ptr<BackendNode>(new InfEngineNgraphNode(reduce_sum));
} }
else if (type == MAX) { else if (type == MAX) {
auto max_pool = std::make_shared<ngraph::op::v1::MaxPool>(ieInpNode, ngraph::Strides(strides), std::shared_ptr<ngraph::Node> max_pool;
if (computeMaxIdx) {
std::vector<size_t> dilations(kernel_size.size(), 1);
max_pool = std::make_shared<ngraph::op::v8::MaxPool>(ieInpNode, ngraph::Strides(strides), ngraph::Strides(dilations),
ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size), ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
rounding_type, pad_type); rounding_type, pad_type);
} else {
max_pool = std::make_shared<ngraph::op::v1::MaxPool>(ieInpNode, ngraph::Strides(strides),
ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
rounding_type, pad_type);
}
return Ptr<BackendNode>(new InfEngineNgraphNode(max_pool)); return Ptr<BackendNode>(new InfEngineNgraphNode(max_pool));
} }
else if (type == ROI) { else if (type == ROI) {

View File

@ -425,7 +425,7 @@ public:
dtype* p_dst = dst.ptr<dtype>(); dtype* p_dst = dst.ptr<dtype>();
size_t main_index = start / last_unreduced_dim; size_t main_index = start / last_unreduced_dim;
size_t loop = start / last_unreduced_dim; size_t loop = start % last_unreduced_dim;
size_t origin = unprojected_steps[main_index] + loop * last_unreduced_step; size_t origin = unprojected_steps[main_index] + loop * last_unreduced_step;
for (int i = start; i < end; ++i) { for (int i = start; i < end; ++i) {
Op accumulator(n_reduce, p_src[origin + projected_steps[0]]); Op accumulator(n_reduce, p_src[origin + projected_steps[0]]);

View File

@ -410,7 +410,10 @@ public:
} }
attrs.shape_calculation_mode = ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES; attrs.shape_calculation_mode = ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES;
if (alignCorners) { CV_Assert(!halfPixelCenters || !alignCorners);
if (halfPixelCenters) {
attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::HALF_PIXEL;
} else if (alignCorners) {
attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::ALIGN_CORNERS; attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::ALIGN_CORNERS;
} }
@ -427,7 +430,10 @@ public:
} }
attrs.shape_calculation_mode = ngraph::op::v4::Interpolate::ShapeCalcMode::sizes; attrs.shape_calculation_mode = ngraph::op::v4::Interpolate::ShapeCalcMode::sizes;
if (alignCorners) { CV_Assert(!halfPixelCenters || !alignCorners);
if (halfPixelCenters) {
attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::half_pixel;
} else if (alignCorners) {
attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::align_corners; attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::align_corners;
} }

View File

@ -476,13 +476,14 @@ void NetImplOpenVINO::initBackend(const std::vector<LayerPin>& blobsToKeep_)
{ {
int lid = ld.inputBlobsId[i].lid; int lid = ld.inputBlobsId[i].lid;
int oid = ld.inputBlobsId[i].oid; int oid = ld.inputBlobsId[i].oid;
if (oid == 0 || lid == 0)
continue;
auto ieInpNode = inputNodes[i].dynamicCast<InfEngineNgraphNode>(); auto ieInpNode = inputNodes[i].dynamicCast<InfEngineNgraphNode>();
const auto& ngraph_input_node = ieInpNode->node; const auto& ngraph_input_node = ieInpNode->node;
CV_LOG_DEBUG(NULL, "DNN/IE: bind output port " << lid << ":" << oid << " (" << ngraph_input_node->get_friendly_name() << ":" << ngraph_input_node->get_type_info().name << ")"); CV_LOG_DEBUG(NULL, "DNN/IE: bind output port " << lid << ":" << oid << " (" << ngraph_input_node->get_friendly_name() << ":" << ngraph_input_node->get_type_info().name << ")");
if ((oid == 0 && ngraph_input_node->get_output_size() == 1) || lid == 0)
continue;
// Handle parameters from other subnets. Output port is not used in this case // Handle parameters from other subnets. Output port is not used in this case
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4) #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
if ((ngraph::op::is_parameter(ngraph_input_node) || ngraph::op::is_constant(ngraph_input_node)) && if ((ngraph::op::is_parameter(ngraph_input_node) || ngraph::op::is_constant(ngraph_input_node)) &&
@ -549,7 +550,6 @@ void NetImplOpenVINO::initBackend(const std::vector<LayerPin>& blobsToKeep_)
break; break;
} }
} }
ieNode->net->setNodePtr(&ieNode->node);
net->addBlobs(ld.inputBlobsWrappers); net->addBlobs(ld.inputBlobsWrappers);
net->addBlobs(ld.outputBlobsWrappers); net->addBlobs(ld.outputBlobsWrappers);

View File

@ -1385,7 +1385,12 @@ void ONNXImporter::parseSplit(LayerParams& layerParams, const opencv_onnx::NodeP
CV_Assert(constBlobs.find(node_proto.input(1)) != constBlobs.end()); CV_Assert(constBlobs.find(node_proto.input(1)) != constBlobs.end());
Mat splitsBlob = getBlob(node_proto, 1); Mat splitsBlob = getBlob(node_proto, 1);
int splitSize = splitsBlob.total(); int splitSize = splitsBlob.total();
if (splitSize == 1)
{
layerParams.set("num_split", 1);
}
else
{
std::vector<int> slicePoints(splitSize - 1, splitsBlob.at<int>(0)); std::vector<int> slicePoints(splitSize - 1, splitsBlob.at<int>(0));
for (int i = 1; i < splitSize - 1; ++i) for (int i = 1; i < splitSize - 1; ++i)
{ {
@ -1393,6 +1398,7 @@ void ONNXImporter::parseSplit(LayerParams& layerParams, const opencv_onnx::NodeP
} }
layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size())); layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size()));
} }
}
else else
{ {
layerParams.set("num_split", node_proto.output_size()); layerParams.set("num_split", node_proto.output_size());
@ -1965,9 +1971,11 @@ void ONNXImporter::parseGemm(LayerParams& layerParams, const opencv_onnx::NodePr
} }
int transB = layerParams.get<int>("transB", 0); int transB = layerParams.get<int>("transB", 0);
int secondInpDims;
if (constBlobs.find(node_proto.input(1)) != constBlobs.end()) if (constBlobs.find(node_proto.input(1)) != constBlobs.end())
{ {
Mat weights = getBlob(node_proto, 1); Mat weights = getBlob(node_proto, 1);
secondInpDims = weights.dims;
if (transA == 0) // optimized barnch, for now, we can only optimize the Gemm when transA = 0. if (transA == 0) // optimized barnch, for now, we can only optimize the Gemm when transA = 0.
{ {
@ -1993,7 +2001,10 @@ void ONNXImporter::parseGemm(LayerParams& layerParams, const opencv_onnx::NodePr
} }
} }
else else
{
layerParams.set("transB", transB == 1); layerParams.set("transB", transB == 1);
secondInpDims = outShapes[node_proto.input(1)].size();
}
if (node_proto.input_size() == 3) if (node_proto.input_size() == 3)
{ {
@ -2002,7 +2013,7 @@ void ONNXImporter::parseGemm(LayerParams& layerParams, const opencv_onnx::NodePr
} }
layerParams.set("bias_term", node_proto.input_size() == 3); layerParams.set("bias_term", node_proto.input_size() == 3);
layerParams.set("is_matmul", true); layerParams.set("is_matmul", secondInpDims > 2);
addLayer(layerParams, node_proto); addLayer(layerParams, node_proto);
} }
@ -2045,7 +2056,7 @@ void ONNXImporter::parseMatMul(LayerParams& layerParams, const opencv_onnx::Node
layerParams.blobs.push_back(transBlob); layerParams.blobs.push_back(transBlob);
int numOutput = layerParams.blobs[0].total(0, secondInpDims - 1); int numOutput = layerParams.blobs[0].total(0, secondInpDims - 1);
layerParams.set("num_output", numOutput); layerParams.set("num_output", numOutput);
layerParams.set("is_matmul", true); layerParams.set("is_matmul", secondInpDims > 2);
} else } else
secondInpDims = outShapes[node_proto.input(1)].size(); secondInpDims = outShapes[node_proto.input(1)].size();

View File

@ -731,21 +731,23 @@ TEST_P(Test_Caffe_nets, FasterRCNN_vgg16)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
#endif #endif
double scoreDiff = 0.0; double scoreDiff = 0.0, iouDiff = 0.0;
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) #if defined(INF_ENGINE_RELEASE)
// Check 'backward_compatible_check || in_out_elements_equal' failed at core/src/op/reshape.cpp:427:
// While validating node 'v1::Reshape bbox_pred_reshape (bbox_pred[0]:f32{1,84}, Constant_265242[0]:i64{4}) -> (f32{?,?,?,?})' with friendly_name 'bbox_pred_reshape':
// Requested output shape {1,6300,4,1} is incompatible with input shape {1, 84}
if (target == DNN_TARGET_MYRIAD) if (target == DNN_TARGET_MYRIAD)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
if (target == DNN_TARGET_OPENCL_FP16) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
scoreDiff = 0.02; iouDiff = 0.02;
if (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16) {
scoreDiff = 0.04;
iouDiff = 0.06;
}
}
#endif #endif
static Mat ref = (Mat_<float>(3, 7) << 0, 2, 0.949398, 99.2454, 210.141, 601.205, 462.849, static Mat ref = (Mat_<float>(3, 7) << 0, 2, 0.949398, 99.2454, 210.141, 601.205, 462.849,
0, 7, 0.997022, 481.841, 92.3218, 722.685, 175.953, 0, 7, 0.997022, 481.841, 92.3218, 722.685, 175.953,
0, 12, 0.993028, 133.221, 189.377, 350.994, 563.166); 0, 12, 0.993028, 133.221, 189.377, 350.994, 563.166);
testFaster("faster_rcnn_vgg16.prototxt", "VGG16_faster_rcnn_final.caffemodel", ref, scoreDiff); testFaster("faster_rcnn_vgg16.prototxt", "VGG16_faster_rcnn_final.caffemodel", ref, scoreDiff, iouDiff);
} }
TEST_P(Test_Caffe_nets, FasterRCNN_zf) TEST_P(Test_Caffe_nets, FasterRCNN_zf)
@ -766,9 +768,6 @@ TEST_P(Test_Caffe_nets, FasterRCNN_zf)
); );
#endif #endif
if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_OPENCL_FP16)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD) backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD); applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
@ -779,7 +778,14 @@ TEST_P(Test_Caffe_nets, FasterRCNN_zf)
static Mat ref = (Mat_<float>(3, 7) << 0, 2, 0.90121, 120.407, 115.83, 570.586, 528.395, static Mat ref = (Mat_<float>(3, 7) << 0, 2, 0.90121, 120.407, 115.83, 570.586, 528.395,
0, 7, 0.988779, 469.849, 75.1756, 718.64, 186.762, 0, 7, 0.988779, 469.849, 75.1756, 718.64, 186.762,
0, 12, 0.967198, 138.588, 206.843, 329.766, 553.176); 0, 12, 0.967198, 138.588, 206.843, 329.766, 553.176);
testFaster("faster_rcnn_zf.prototxt", "ZF_faster_rcnn_final.caffemodel", ref);
double scoreDiff = 0.0, iouDiff = 0.0;
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
scoreDiff = 0.02;
iouDiff = 0.13;
}
testFaster("faster_rcnn_zf.prototxt", "ZF_faster_rcnn_final.caffemodel", ref, scoreDiff, iouDiff);
} }
TEST_P(Test_Caffe_nets, RFCN) TEST_P(Test_Caffe_nets, RFCN)
@ -802,8 +808,8 @@ TEST_P(Test_Caffe_nets, RFCN)
iouDiff = 0.12; iouDiff = 0.12;
} }
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) #if defined(INF_ENGINE_RELEASE)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{ {
scoreDiff = 0.1f; scoreDiff = 0.1f;
iouDiff = 0.2f; iouDiff = 0.2f;

View File

@ -102,11 +102,14 @@ TEST(Test_Darknet, read_yolo_voc_stream)
class Test_Darknet_layers : public DNNTestLayer class Test_Darknet_layers : public DNNTestLayer
{ {
public: public:
void testDarknetLayer(const std::string& name, bool hasWeights = false, bool testBatchProcessing = true) void testDarknetLayer(const std::string& name, bool hasWeights = false, bool testBatchProcessing = true,
double l1 = 0.0, double lInf = 0.0)
{ {
SCOPED_TRACE(name); SCOPED_TRACE(name);
Mat inp = blobFromNPY(findDataFile("dnn/darknet/" + name + "_in.npy")); Mat inp = blobFromNPY(findDataFile("dnn/darknet/" + name + "_in.npy"));
Mat ref = blobFromNPY(findDataFile("dnn/darknet/" + name + "_out.npy")); Mat ref = blobFromNPY(findDataFile("dnn/darknet/" + name + "_out.npy"));
l1 = l1 ? l1 : default_l1;
lInf = lInf ? lInf : default_lInf;
std::string cfg = findDataFile("dnn/darknet/" + name + ".cfg"); std::string cfg = findDataFile("dnn/darknet/" + name + ".cfg");
std::string model = ""; std::string model = "";
@ -120,7 +123,7 @@ public:
net.setPreferableTarget(target); net.setPreferableTarget(target);
net.setInput(inp); net.setInput(inp);
Mat out = net.forward(); Mat out = net.forward();
normAssert(out, ref, "", default_l1, default_lInf); normAssert(out, ref, "", l1, lInf);
if (inp.size[0] == 1 && testBatchProcessing) // test handling of batch size if (inp.size[0] == 1 && testBatchProcessing) // test handling of batch size
{ {
@ -166,8 +169,8 @@ public:
}*/ }*/
ASSERT_EQ(out2.dims, ref2.dims) << ref.dims; ASSERT_EQ(out2.dims, ref2.dims) << ref.dims;
normAssert(out2(ranges0), ref2, "", default_l1, default_lInf); normAssert(out2(ranges0), ref2, "", l1, lInf);
normAssert(out2(ranges1), ref2, "", default_l1, default_lInf); normAssert(out2(ranges1), ref2, "", l1, lInf);
} }
} }
}; };
@ -1046,7 +1049,7 @@ TEST_P(Test_Darknet_layers, region)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
#endif #endif
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2022010000)
// accuracy on CPU, OpenCL // accuracy on CPU, OpenCL
// Expected: (normL1) <= (l1), actual: 0.000358148 vs 1e-05 // Expected: (normL1) <= (l1), actual: 0.000358148 vs 1e-05
// |ref| = 1.207319974899292 // |ref| = 1.207319974899292
@ -1116,7 +1119,12 @@ TEST_P(Test_Darknet_layers, connected)
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_CPU_FP16) if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_CPU_FP16)
applyTestTag(CV_TEST_TAG_DNN_SKIP_CPU_FP16); applyTestTag(CV_TEST_TAG_DNN_SKIP_CPU_FP16);
testDarknetLayer("connected", true); double l1 = 0.0;
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
{
l1 = 3e-5;
}
testDarknetLayer("connected", true, true, l1);
} }
TEST_P(Test_Darknet_layers, relu) TEST_P(Test_Darknet_layers, relu)

View File

@ -361,22 +361,9 @@ TEST_P(MaxPooling, Accuracy)
Backend backendId = get<0>(get<5>(GetParam())); Backend backendId = get<0>(get<5>(GetParam()));
Target targetId = get<1>(get<5>(GetParam())); Target targetId = get<1>(get<5>(GetParam()));
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2018050000) // https://github.com/openvinotoolkit/openvino/issues/18731
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && stride != Size(1, 1))
&& inSize == Size(7, 6) && kernel == Size(3, 2) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
&& (stride == Size(1, 1) || stride == Size(2, 2))
&& (pad == Size(0, 1) || pad == Size(1, 1))
)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
#endif
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2018050000)
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD
&& (kernel == Size(2, 2) || kernel == Size(3, 2))
&& stride == Size(1, 1) && (pad == Size(0, 0) || pad == Size(0, 1))
)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
#endif
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000) #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000)
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD
@ -467,6 +454,11 @@ TEST_P(FullyConnected, Accuracy)
{ {
l1 = 0.01; l1 = 0.01;
} }
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && targetId == DNN_TARGET_OPENCL)
{
l1 = 5e-3;
lInf = 7e-3;
}
#endif #endif
if (targetId == DNN_TARGET_CUDA_FP16) if (targetId == DNN_TARGET_CUDA_FP16)
l1 = 0.015; l1 = 0.015;

View File

@ -465,8 +465,8 @@ TEST_P(DNNTestHighLevelAPI, predict)
const std::string modelPath = getOpenVINOModel(modelName, isFP16); const std::string modelPath = getOpenVINOModel(modelName, isFP16);
ASSERT_FALSE(modelPath.empty()) << modelName; ASSERT_FALSE(modelPath.empty()) << modelName;
std::string xmlPath = findDataFile(modelPath + ".xml"); std::string xmlPath = findDataFile(modelPath + ".xml", false);
std::string binPath = findDataFile(modelPath + ".bin"); std::string binPath = findDataFile(modelPath + ".bin", false);
Model model(xmlPath, binPath); Model model(xmlPath, binPath);
Mat frame = imread(findDataFile("dnn/googlenet_1.png")); Mat frame = imread(findDataFile("dnn/googlenet_1.png"));

View File

@ -215,7 +215,13 @@ TEST_P(Test_Caffe_layers, InnerProduct)
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_CPU_FP16) if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_CPU_FP16)
applyTestTag(CV_TEST_TAG_DNN_SKIP_CPU_FP16); applyTestTag(CV_TEST_TAG_DNN_SKIP_CPU_FP16);
testLayerUsingCaffeModels("layer_inner_product", true); double l1 = 0.0, lInf = 0.0;
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
{
l1 = 5e-3;
lInf = 2e-2;
}
testLayerUsingCaffeModels("layer_inner_product", true, true, l1, lInf);
} }
TEST_P(Test_Caffe_layers, Pooling_max) TEST_P(Test_Caffe_layers, Pooling_max)

View File

@ -447,14 +447,17 @@ TEST_P(Test_Model, DetectionOutput)
{ {
if (backend == DNN_BACKEND_OPENCV) if (backend == DNN_BACKEND_OPENCV)
scoreDiff = 4e-3; scoreDiff = 4e-3;
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2022010000)
else if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
scoreDiff = 4e-2;
#endif
else else
scoreDiff = 2e-2; scoreDiff = 2e-2;
iouDiff = 1.8e-1; iouDiff = 1.8e-1;
} }
#if defined(INF_ENGINE_RELEASE)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{
scoreDiff = 0.05;
iouDiff = 0.08;
}
#endif
testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences, refBoxes, testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences, refBoxes,
scoreDiff, iouDiff, confThreshold, nmsThreshold, size, mean); scoreDiff, iouDiff, confThreshold, nmsThreshold, size, mean);

View File

@ -579,9 +579,7 @@ CASE(test_dropout_default_mask_ratio)
CASE(test_dropout_default_old) CASE(test_dropout_default_old)
// no filter // no filter
CASE(test_dropout_default_ratio) CASE(test_dropout_default_ratio)
#if SKIP_SET_1 // no filter
SKIP;
#endif
CASE(test_dropout_random_old) CASE(test_dropout_random_old)
// no filter // no filter
CASE(test_dynamicquantizelinear) CASE(test_dynamicquantizelinear)

View File

@ -52,7 +52,7 @@ public:
} }
void testONNXModels(const String& basename, const Extension ext = npy, void testONNXModels(const String& basename, const Extension ext = npy,
const double l1 = 0, const float lInf = 0, const bool useSoftmax = false, double l1 = 0, double lInf = 0, const bool useSoftmax = false,
bool checkNoFallbacks = true, int numInps = 1) bool checkNoFallbacks = true, int numInps = 1)
{ {
String onnxmodel = _tf("models/" + basename + ".onnx", required); String onnxmodel = _tf("models/" + basename + ".onnx", required);
@ -102,7 +102,12 @@ public:
netSoftmax.setInput(ref); netSoftmax.setInput(ref);
ref = netSoftmax.forward(); ref = netSoftmax.forward();
} }
normAssert(ref, out, "", l1 ? l1 : default_l1, lInf ? lInf : default_lInf); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
{
l1 = std::max(l1, 1.4e-3);
lInf = std::max(lInf, 8e-3);
}
normAssert(ref, out, basename.c_str(), l1 ? l1 : default_l1, lInf ? lInf : default_lInf);
if (checkNoFallbacks) if (checkNoFallbacks)
expectNoFallbacksFromIE(net); expectNoFallbacksFromIE(net);
} }

View File

@ -1816,6 +1816,11 @@ TEST_P(Test_TensorFlow_nets, Mask_RCNN)
double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CPU_FP16) ? 0.2 : 2e-5; double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CPU_FP16) ? 0.2 : 2e-5;
double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CPU_FP16) ? 0.018 : default_lInf; double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CPU_FP16) ? 0.018 : default_lInf;
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{
scoreDiff = std::max(scoreDiff, 0.06);
iouDiff = std::max(iouDiff, 0.01);
}
normAssertDetections(refDetections, outDetections, "", /*threshold for zero confidence*/1e-5, scoreDiff, iouDiff); normAssertDetections(refDetections, outDetections, "", /*threshold for zero confidence*/1e-5, scoreDiff, iouDiff);
// Output size of masks is NxCxHxW where // Output size of masks is NxCxHxW where

View File

@ -20,6 +20,14 @@ namespace opencv_test { namespace {
using namespace cv; using namespace cv;
using namespace cv::dnn; using namespace cv::dnn;
class Test_TFLite : public DNNTestLayer {
public:
void testModel(Net& net, const std::string& modelName, const Mat& input, double l1 = 0, double lInf = 0);
void testModel(const std::string& modelName, const Mat& input, double l1 = 0, double lInf = 0);
void testModel(const std::string& modelName, const Size& inpSize, double l1 = 0, double lInf = 0);
void testLayer(const std::string& modelName, double l1 = 0, double lInf = 0);
};
void testInputShapes(const Net& net, const std::vector<Mat>& inps) { void testInputShapes(const Net& net, const std::vector<Mat>& inps) {
std::vector<MatShape> inLayerShapes; std::vector<MatShape> inLayerShapes;
std::vector<MatShape> outLayerShapes; std::vector<MatShape> outLayerShapes;
@ -31,8 +39,14 @@ void testInputShapes(const Net& net, const std::vector<Mat>& inps) {
} }
} }
void testModel(Net& net, const std::string& modelName, const Mat& input, double l1 = 1e-5, double lInf = 1e-4) void Test_TFLite::testModel(Net& net, const std::string& modelName, const Mat& input, double l1, double lInf)
{ {
l1 = l1 ? l1 : default_l1;
lInf = lInf ? lInf : default_lInf;
net.setPreferableBackend(backend);
net.setPreferableTarget(target);
testInputShapes(net, {input}); testInputShapes(net, {input});
net.setInput(input); net.setInput(input);
@ -48,20 +62,20 @@ void testModel(Net& net, const std::string& modelName, const Mat& input, double
} }
} }
void testModel(const std::string& modelName, const Mat& input, double l1 = 1e-5, double lInf = 1e-4) void Test_TFLite::testModel(const std::string& modelName, const Mat& input, double l1, double lInf)
{ {
Net net = readNet(findDataFile("dnn/tflite/" + modelName + ".tflite", false)); Net net = readNet(findDataFile("dnn/tflite/" + modelName + ".tflite", false));
testModel(net, modelName, input, l1, lInf); testModel(net, modelName, input, l1, lInf);
} }
void testModel(const std::string& modelName, const Size& inpSize, double l1 = 1e-5, double lInf = 1e-4) void Test_TFLite::testModel(const std::string& modelName, const Size& inpSize, double l1, double lInf)
{ {
Mat input = imread(findDataFile("cv/shared/lena.png")); Mat input = imread(findDataFile("cv/shared/lena.png"));
input = blobFromImage(input, 1.0 / 255, inpSize, 0, true); input = blobFromImage(input, 1.0 / 255, inpSize, 0, true);
testModel(modelName, input, l1, lInf); testModel(modelName, input, l1, lInf);
} }
void testLayer(const std::string& modelName, double l1 = 1e-5, double lInf = 1e-4) void Test_TFLite::testLayer(const std::string& modelName, double l1, double lInf)
{ {
Mat inp = blobFromNPY(findDataFile("dnn/tflite/" + modelName + "_inp.npy")); Mat inp = blobFromNPY(findDataFile("dnn/tflite/" + modelName + "_inp.npy"));
Net net = readNet(findDataFile("dnn/tflite/" + modelName + ".tflite")); Net net = readNet(findDataFile("dnn/tflite/" + modelName + ".tflite"));
@ -69,29 +83,66 @@ void testLayer(const std::string& modelName, double l1 = 1e-5, double lInf = 1e-
} }
// https://google.github.io/mediapipe/solutions/face_mesh // https://google.github.io/mediapipe/solutions/face_mesh
TEST(Test_TFLite, face_landmark) TEST_P(Test_TFLite, face_landmark)
{ {
testModel("face_landmark", Size(192, 192), 2e-5, 2e-4); if (backend == DNN_BACKEND_CUDA && target == DNN_TARGET_CUDA_FP16)
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA_FP16);
double l1 = 2e-5, lInf = 2e-4;
if (target == DNN_TARGET_CPU_FP16 || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD ||
(backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL))
{
l1 = 0.15;
lInf = 0.82;
}
testModel("face_landmark", Size(192, 192), l1, lInf);
} }
// https://google.github.io/mediapipe/solutions/face_detection // https://google.github.io/mediapipe/solutions/face_detection
TEST(Test_TFLite, face_detection_short_range) TEST_P(Test_TFLite, face_detection_short_range)
{ {
testModel("face_detection_short_range", Size(128, 128)); double l1 = 0, lInf = 2e-4;
if (target == DNN_TARGET_CPU_FP16 || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD ||
(backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL))
{
l1 = 0.04;
lInf = 0.8;
}
testModel("face_detection_short_range", Size(128, 128), l1, lInf);
} }
// https://google.github.io/mediapipe/solutions/selfie_segmentation // https://google.github.io/mediapipe/solutions/selfie_segmentation
TEST(Test_TFLite, selfie_segmentation) TEST_P(Test_TFLite, selfie_segmentation)
{ {
testModel("selfie_segmentation", Size(256, 256)); double l1 = 0, lInf = 0;
if (target == DNN_TARGET_CPU_FP16 || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD ||
(backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL))
{
l1 = 0.01;
lInf = 0.48;
}
testModel("selfie_segmentation", Size(256, 256), l1, lInf);
} }
TEST(Test_TFLite, max_unpooling) TEST_P(Test_TFLite, max_unpooling)
{ {
if (backend == DNN_BACKEND_CUDA)
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA);
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target != DNN_TARGET_CPU) {
if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
if (target == DNN_TARGET_OPENCL) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
}
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
// Due Max Unpoling is a numerically unstable operation and small difference between frameworks // Due Max Unpoling is a numerically unstable operation and small difference between frameworks
// might lead to positional difference of maximal elements in the tensor, this test checks // might lead to positional difference of maximal elements in the tensor, this test checks
// behavior of Max Unpooling layer only. // behavior of Max Unpooling layer only.
Net net = readNet(findDataFile("dnn/tflite/hair_segmentation.tflite", false)); Net net = readNet(findDataFile("dnn/tflite/hair_segmentation.tflite", false));
net.setPreferableBackend(backend);
net.setPreferableTarget(target);
Mat input = imread(findDataFile("cv/shared/lena.png")); Mat input = imread(findDataFile("cv/shared/lena.png"));
cvtColor(input, input, COLOR_BGR2RGBA); cvtColor(input, input, COLOR_BGR2RGBA);
@ -101,7 +152,15 @@ TEST(Test_TFLite, max_unpooling)
net.setInput(input); net.setInput(input);
std::vector<std::vector<Mat> > outs; std::vector<std::vector<Mat> > outs;
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
// TODO: seems like a bug with a retrieving intermediate tensors
net.forward(outs, {"conv2d_transpose_4", "p_re_lu_1", "max_pooling_with_argmax2d", "conv2d_86", "max_unpooling2d_2"});
outs.erase(outs.begin());
}
else {
net.forward(outs, {"p_re_lu_1", "max_pooling_with_argmax2d", "conv2d_86", "max_unpooling2d_2"}); net.forward(outs, {"p_re_lu_1", "max_pooling_with_argmax2d", "conv2d_86", "max_unpooling2d_2"});
}
ASSERT_EQ(outs.size(), 4); ASSERT_EQ(outs.size(), 4);
ASSERT_EQ(outs[0].size(), 1); ASSERT_EQ(outs[0].size(), 1);
ASSERT_EQ(outs[1].size(), 2); ASSERT_EQ(outs[1].size(), 2);
@ -117,6 +176,8 @@ TEST(Test_TFLite, max_unpooling)
ASSERT_EQ(poolOut.size, poolIds.size); ASSERT_EQ(poolOut.size, poolIds.size);
ASSERT_EQ(poolOut.size, unpoolInp.size); ASSERT_EQ(poolOut.size, unpoolInp.size);
ASSERT_EQ(countNonZero(poolInp), poolInp.total());
for (int c = 0; c < 32; ++c) { for (int c = 0; c < 32; ++c) {
float *poolInpData = poolInp.ptr<float>(0, c); float *poolInpData = poolInp.ptr<float>(0, c);
float *poolOutData = poolOut.ptr<float>(0, c); float *poolOutData = poolOut.ptr<float>(0, c);
@ -135,15 +196,19 @@ TEST(Test_TFLite, max_unpooling)
} }
} }
EXPECT_EQ(poolInpData[maxIdx], poolOutData[y * 64 + x]) << errMsg; EXPECT_EQ(poolInpData[maxIdx], poolOutData[y * 64 + x]) << errMsg;
if (backend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
EXPECT_EQ(poolIdsData[y * 64 + x], (float)maxIdx) << errMsg; EXPECT_EQ(poolIdsData[y * 64 + x], (float)maxIdx) << errMsg;
}
EXPECT_EQ(unpoolOutData[maxIdx], unpoolInpData[y * 64 + x]) << errMsg; EXPECT_EQ(unpoolOutData[maxIdx], unpoolInpData[y * 64 + x]) << errMsg;
} }
} }
} }
} }
TEST(Test_TFLite, EfficientDet_int8) { TEST_P(Test_TFLite, EfficientDet_int8) {
Net net = readNet(findDataFile("dnn/tflite/coco_efficientdet_lite0_v1_1.0_quant_2021_09_06.tflite", false)); Net net = readNet(findDataFile("dnn/tflite/coco_efficientdet_lite0_v1_1.0_quant_2021_09_06.tflite", false));
net.setPreferableBackend(backend);
net.setPreferableTarget(target);
Mat img = imread(findDataFile("dnn/dog416.png")); Mat img = imread(findDataFile("dnn/dog416.png"));
Mat blob = blobFromImage(img, 1.0, Size(320, 320)); Mat blob = blobFromImage(img, 1.0, Size(320, 320));
@ -158,10 +223,18 @@ TEST(Test_TFLite, EfficientDet_int8) {
normAssertDetections(ref, out, "", 0.5, 0.05, 0.1); normAssertDetections(ref, out, "", 0.5, 0.05, 0.1);
} }
TEST(Test_TFLite, replicate_by_pack) { TEST_P(Test_TFLite, replicate_by_pack) {
testLayer("replicate_by_pack"); double l1 = 0, lInf = 0;
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
{
l1 = 4e-4;
lInf = 2e-3;
}
testLayer("replicate_by_pack", l1, lInf);
} }
INSTANTIATE_TEST_CASE_P(/**/, Test_TFLite, dnnBackendsAndTargets());
}} // namespace }} // namespace
#endif // OPENCV_TEST_DNN_TFLITE #endif // OPENCV_TEST_DNN_TFLITE

View File

@ -39,6 +39,12 @@ public:
GAPI_WRAP GAPI_WRAP
PyParams& cfgAddExecutionProvider(ep::DirectML ep); PyParams& cfgAddExecutionProvider(ep::DirectML ep);
GAPI_WRAP
PyParams& cfgAddExecutionProvider(ep::CUDA ep);
GAPI_WRAP
PyParams& cfgAddExecutionProvider(ep::TensorRT ep);
GAPI_WRAP GAPI_WRAP
PyParams& cfgDisableMemPattern(); PyParams& cfgDisableMemPattern();

View File

@ -32,6 +32,56 @@ namespace onnx {
*/ */
namespace ep { namespace ep {
/**
* @brief This structure provides functions
* that fill inference options for CUDA Execution Provider.
* Please follow https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#cuda-execution-provider
*/
struct GAPI_EXPORTS_W_SIMPLE CUDA {
// NB: Used from python.
/// @private -- Exclude this constructor from OpenCV documentation
GAPI_WRAP
CUDA() = default;
/** @brief Class constructor.
Constructs CUDA parameters based on device type information.
@param dev_id Target device id to use.
*/
GAPI_WRAP
explicit CUDA(const int dev_id)
: device_id(dev_id) {
}
int device_id;
};
/**
* @brief This structure provides functions
* that fill inference options for TensorRT Execution Provider.
* Please follow https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html#tensorrt-execution-provider
*/
struct GAPI_EXPORTS_W_SIMPLE TensorRT {
// NB: Used from python.
/// @private -- Exclude this constructor from OpenCV documentation
GAPI_WRAP
TensorRT() = default;
/** @brief Class constructor.
Constructs TensorRT parameters based on device type information.
@param dev_id Target device id to use.
*/
GAPI_WRAP
explicit TensorRT(const int dev_id)
: device_id(dev_id) {
}
int device_id;
};
/** /**
* @brief This structure provides functions * @brief This structure provides functions
* that fill inference options for ONNX OpenVINO Execution Provider. * that fill inference options for ONNX OpenVINO Execution Provider.
@ -143,7 +193,11 @@ public:
DeviceDesc ddesc; DeviceDesc ddesc;
}; };
using EP = cv::util::variant<cv::util::monostate, OpenVINO, DirectML>; using EP = cv::util::variant< cv::util::monostate
, OpenVINO
, DirectML
, CUDA
, TensorRT>;
} // namespace ep } // namespace ep
@ -431,6 +485,34 @@ public:
return *this; return *this;
} }
/** @brief Adds execution provider for runtime.
The function is used to add ONNX Runtime CUDA Execution Provider options.
@param ep CUDA Execution Provider options.
@see cv::gapi::onnx::ep::CUDA.
@return the reference on modified object.
*/
Params<Net>& cfgAddExecutionProvider(ep::CUDA&& ep) {
desc.execution_providers.emplace_back(std::move(ep));
return *this;
}
/** @brief Adds execution provider for runtime.
The function is used to add ONNX Runtime TensorRT Execution Provider options.
@param ep TensorRT Execution Provider options.
@see cv::gapi::onnx::ep::TensorRT.
@return the reference on modified object.
*/
Params<Net>& cfgAddExecutionProvider(ep::TensorRT&& ep) {
desc.execution_providers.emplace_back(std::move(ep));
return *this;
}
/** @brief Disables the memory pattern optimization. /** @brief Disables the memory pattern optimization.
@return the reference on modified object. @return the reference on modified object.
@ -491,6 +573,16 @@ public:
desc.execution_providers.emplace_back(std::move(ep)); desc.execution_providers.emplace_back(std::move(ep));
} }
/** @see onnx::Params::cfgAddExecutionProvider. */
void cfgAddExecutionProvider(ep::CUDA&& ep) {
desc.execution_providers.emplace_back(std::move(ep));
}
/** @see onnx::Params::cfgAddExecutionProvider. */
void cfgAddExecutionProvider(ep::TensorRT&& ep) {
desc.execution_providers.emplace_back(std::move(ep));
}
/** @see onnx::Params::cfgDisableMemPattern. */ /** @see onnx::Params::cfgDisableMemPattern. */
void cfgDisableMemPattern() { void cfgDisableMemPattern() {
desc.disable_mem_pattern = true; desc.disable_mem_pattern = true;

View File

@ -31,6 +31,8 @@ using map_string_and_vector_float = std::map<std::string, std::vector<float>>;
using map_int_and_double = std::map<int, double>; using map_int_and_double = std::map<int, double>;
using ep_OpenVINO = cv::gapi::onnx::ep::OpenVINO; using ep_OpenVINO = cv::gapi::onnx::ep::OpenVINO;
using ep_DirectML = cv::gapi::onnx::ep::DirectML; using ep_DirectML = cv::gapi::onnx::ep::DirectML;
using ep_CUDA = cv::gapi::onnx::ep::CUDA;
using ep_TensorRT = cv::gapi::onnx::ep::TensorRT;
// NB: Python wrapper generate T_U for T<U> // NB: Python wrapper generate T_U for T<U>
// This behavior is only observed for inputs // This behavior is only observed for inputs

View File

@ -33,6 +33,18 @@ cv::gapi::onnx::PyParams::cfgAddExecutionProvider(cv::gapi::onnx::ep::DirectML e
return *this; return *this;
} }
cv::gapi::onnx::PyParams&
cv::gapi::onnx::PyParams::cfgAddExecutionProvider(cv::gapi::onnx::ep::CUDA ep) {
m_priv->cfgAddExecutionProvider(std::move(ep));
return *this;
}
cv::gapi::onnx::PyParams&
cv::gapi::onnx::PyParams::cfgAddExecutionProvider(cv::gapi::onnx::ep::TensorRT ep) {
m_priv->cfgAddExecutionProvider(std::move(ep));
return *this;
}
cv::gapi::onnx::PyParams& cv::gapi::onnx::PyParams&
cv::gapi::onnx::PyParams::cfgDisableMemPattern() { cv::gapi::onnx::PyParams::cfgDisableMemPattern() {
m_priv->cfgDisableMemPattern(); m_priv->cfgDisableMemPattern();

View File

@ -145,9 +145,39 @@ public:
void run(); void run();
}; };
static void addCUDAExecutionProvider(Ort::SessionOptions *session_options,
const cv::gapi::onnx::ep::CUDA &cuda_ep) {
OrtCUDAProviderOptions options{};
options.device_id = cuda_ep.device_id;
try {
session_options->AppendExecutionProvider_CUDA(options);
} catch (const std::exception &e) {
std::stringstream ss;
ss << "ONNX Backend: Failed to enable CUDA"
<< " Execution Provider: " << e.what();
cv::util::throw_error(std::runtime_error(ss.str()));
}
}
static void addTensorRTExecutionProvider(Ort::SessionOptions *session_options,
const cv::gapi::onnx::ep::TensorRT &trt_ep) {
OrtTensorRTProviderOptions options{};
options.device_id = trt_ep.device_id;
try {
session_options->AppendExecutionProvider_TensorRT(options);
} catch (const std::exception &e) {
std::stringstream ss;
ss << "ONNX Backend: Failed to enable TensorRT"
<< " Execution Provider: " << e.what();
cv::util::throw_error(std::runtime_error(ss.str()));
}
}
static void addOpenVINOExecutionProvider(Ort::SessionOptions *session_options, static void addOpenVINOExecutionProvider(Ort::SessionOptions *session_options,
const cv::gapi::onnx::ep::OpenVINO &ov_ep) { const cv::gapi::onnx::ep::OpenVINO &ov_ep) {
OrtOpenVINOProviderOptions options; OrtOpenVINOProviderOptions options{};
options.device_type = ov_ep.device_type.c_str(); options.device_type = ov_ep.device_type.c_str();
options.cache_dir = ov_ep.cache_dir.c_str(); options.cache_dir = ov_ep.cache_dir.c_str();
options.num_of_threads = ov_ep.num_of_threads; options.num_of_threads = ov_ep.num_of_threads;
@ -181,6 +211,18 @@ static void addExecutionProvider(Ort::SessionOptions *session_options,
addDMLExecutionProvider(session_options, dml_ep); addDMLExecutionProvider(session_options, dml_ep);
break; break;
} }
case ep::EP::index_of<ep::CUDA>(): {
GAPI_LOG_INFO(NULL, "CUDA Execution Provider is added.");
const auto &cuda_ep = cv::util::get<ep::CUDA>(execution_provider);
addCUDAExecutionProvider(session_options, cuda_ep);
break;
}
case ep::EP::index_of<ep::TensorRT>(): {
GAPI_LOG_INFO(NULL, "TensorRT Execution Provider is added.");
const auto &trt_ep = cv::util::get<ep::TensorRT>(execution_provider);
addTensorRTExecutionProvider(session_options, trt_ep);
break;
}
default: default:
GAPI_LOG_INFO(NULL, "CPU Execution Provider is added."); GAPI_LOG_INFO(NULL, "CPU Execution Provider is added.");
break; break;

View File

@ -260,6 +260,10 @@ typedef uint32_t __u32;
#define V4L2_CID_IRIS_ABSOLUTE (V4L2_CID_CAMERA_CLASS_BASE+17) #define V4L2_CID_IRIS_ABSOLUTE (V4L2_CID_CAMERA_CLASS_BASE+17)
#endif #endif
#ifndef v4l2_fourcc_be
#define v4l2_fourcc_be(a, b, c, d) (v4l2_fourcc(a, b, c, d) | (1U << 31))
#endif
#ifndef V4L2_PIX_FMT_Y10 #ifndef V4L2_PIX_FMT_Y10
#define V4L2_PIX_FMT_Y10 v4l2_fourcc('Y', '1', '0', ' ') #define V4L2_PIX_FMT_Y10 v4l2_fourcc('Y', '1', '0', ' ')
#endif #endif

View File

@ -65,7 +65,7 @@ inline std::string fourccToStringSafe(int fourcc)
{ {
std::string res = fourccToString(fourcc); std::string res = fourccToString(fourcc);
// TODO: return hex values for invalid characters // TODO: return hex values for invalid characters
std::transform(res.begin(), res.end(), res.begin(), [](uint8_t c) { return (c >= '0' && c <= 'z') ? c : (c == ' ' ? '_' : 'x'); }); std::transform(res.begin(), res.end(), res.begin(), [](char c) -> char { return (c >= '0' && c <= 'z') ? c : (c == ' ' ? '_' : 'x'); });
return res; return res;
} }

View File

@ -22,6 +22,9 @@
#include <linux/videodev2.h> #include <linux/videodev2.h>
// workarounds for older versions // workarounds for older versions
#ifndef v4l2_fourcc_be
#define v4l2_fourcc_be(a, b, c, d) (v4l2_fourcc(a, b, c, d) | (1U << 31))
#endif
#ifndef V4L2_PIX_FMT_Y10 #ifndef V4L2_PIX_FMT_Y10
#define V4L2_PIX_FMT_Y10 v4l2_fourcc('Y', '1', '0', ' ') #define V4L2_PIX_FMT_Y10 v4l2_fourcc('Y', '1', '0', ' ')
#endif #endif