mirror of
https://github.com/opencv/opencv.git
synced 2024-12-12 07:09:12 +08:00
Merge branch 4.x
This commit is contained in:
commit
a6748df587
@ -74,6 +74,10 @@ if(POLICY CMP0077)
|
||||
cmake_policy(SET CMP0077 NEW) # CMake 3.13+: option() honors normal variables.
|
||||
endif()
|
||||
|
||||
if(POLICY CMP0146)
|
||||
cmake_policy(SET CMP0146 OLD) # CMake 3.27+: use CMake FindCUDA if available.
|
||||
endif()
|
||||
|
||||
#
|
||||
# Configure OpenCV CMake hooks
|
||||
#
|
||||
|
@ -643,4 +643,69 @@ INSTANTIATE_TEST_CASE_P(/**/, Layer_ScatterND, testing::Values(std::make_tuple(D
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNormExpanded, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
|
||||
|
||||
|
||||
typedef TestBaseWithParam<tuple<Vec4i, int, bool, tuple<Backend, Target> > > Layer_FullyConnected;
|
||||
PERF_TEST_P_(Layer_FullyConnected, fc)
|
||||
{
|
||||
std::vector<int> inpShape;
|
||||
inpShape.reserve(4);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
int dim = get<0>(GetParam())[i];
|
||||
if (dim == 0)
|
||||
break;
|
||||
inpShape.push_back(dim);
|
||||
}
|
||||
Mat input(inpShape, CV_32F);
|
||||
randn(input, 0, 1);
|
||||
|
||||
int axis = input.dims - 1;
|
||||
int outDims = get<1>(GetParam());
|
||||
bool isMatMul = get<2>(GetParam());
|
||||
int backendId = get<0>(get<3>(GetParam()));
|
||||
int targetId = get<1>(get<3>(GetParam()));
|
||||
|
||||
std::vector<int> weightShape;
|
||||
if (isMatMul) {
|
||||
weightShape = inpShape;
|
||||
weightShape[weightShape.size() - 2] = outDims;
|
||||
} else {
|
||||
weightShape = {outDims, (int)input.total(axis, input.dims)};
|
||||
}
|
||||
Mat weights(weightShape, CV_32F);
|
||||
randn(weights, 0, 1);
|
||||
|
||||
LayerParams lp;
|
||||
lp.set("axis", input.dims - 1);
|
||||
lp.set("is_matmul", weights.dims > 2);
|
||||
lp.set("bias_term", false);
|
||||
lp.set("transB", true);
|
||||
lp.set("num_output", (int)weights.total(0, weights.dims - 1));
|
||||
lp.blobs.resize(1, weights);
|
||||
|
||||
Net net;
|
||||
net.addLayerToPrev("matmul", "InnerProduct", lp);
|
||||
|
||||
net.setInput(input);
|
||||
net.setPreferableBackend(backendId);
|
||||
net.setPreferableTarget(targetId);
|
||||
|
||||
// warmup
|
||||
Mat output = net.forward();
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
net.forward();
|
||||
}
|
||||
SANITY_CHECK_NOTHING();
|
||||
}
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_FullyConnected, Combine(
|
||||
Values( // input size
|
||||
Vec4i(5, 512, 384),
|
||||
Vec4i(5, 16, 512, 128)
|
||||
),
|
||||
Values(256, 512, 1024), // output dimension
|
||||
testing::Bool(), // is_matmul
|
||||
dnnBackendsAndTargets()
|
||||
));
|
||||
|
||||
} // namespace
|
||||
|
@ -248,6 +248,11 @@ void selu(const Stream& stream, Span<T> output, View<T> input, T alpha, T gamma)
|
||||
generic_op<T, SeluFunctor<T>>(stream, output, input, {alpha, gamma});
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void gelu(const Stream& stream, Span<T> output, View<T> input) {
|
||||
generic_op<T, GeluFunctor<T>>(stream, output, input);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void sign(const Stream& stream, Span<T> output, View<T> input) {
|
||||
generic_op<T, SignFunctor<T>>(stream, output, input);
|
||||
@ -324,6 +329,7 @@ template void tan<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void celu<__half>(const Stream&, Span<__half>, View<__half>, __half);
|
||||
template void hardsigmoid<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
|
||||
template void selu<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
|
||||
template void gelu<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void thresholdedrelu<__half>(const Stream&, Span<__half>, View<__half>, __half);
|
||||
template void power<__half>(const Stream&, Span<__half>, View<__half>, __half, __half, __half);
|
||||
template void exp<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
|
||||
@ -366,6 +372,7 @@ template void tan<float>(const Stream&, Span<float>, View<float>);
|
||||
template void celu<float>(const Stream&, Span<float>, View<float>, float);
|
||||
template void hardsigmoid<float>(const Stream&, Span<float>, View<float>, float, float);
|
||||
template void selu<float>(const Stream&, Span<float>, View<float>, float, float);
|
||||
template void gelu<float>(const Stream&, Span<float>, View<float>);
|
||||
template void thresholdedrelu<float>(const Stream&, Span<float>, View<float>, float);
|
||||
template void power<float>(const Stream&, Span<float>, View<float>, float, float, float);
|
||||
template void exp<float>(const Stream&, Span<float>, View<float>, float, float);
|
||||
|
@ -588,6 +588,21 @@ struct SeluFunctor {
|
||||
T alpha, gamma;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct GeluFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE GeluFunctor() { }
|
||||
CUDA4DNN_DEVICE GeluFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
using csl::device::erf;
|
||||
return static_cast<T>(0.5f) * value * (static_cast<T>(1.f) + erf(value * static_cast<T>(M_SQRT1_2)));
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct ThresholdedReluFunctor {
|
||||
struct Params {
|
||||
|
@ -114,6 +114,9 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
template <class T>
|
||||
void selu(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, T alpha, T gamma);
|
||||
|
||||
template <class T>
|
||||
void gelu(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
|
||||
|
||||
template <class T>
|
||||
void thresholdedrelu(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, T alpha);
|
||||
|
||||
|
@ -537,6 +537,20 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
||||
const T alpha, gamma;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class GeluOp final : public BaseOp<GeluOp, T> {
|
||||
public:
|
||||
GeluOp(csl::Stream stream_) : stream(std::move(stream_)) { }
|
||||
|
||||
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
|
||||
{
|
||||
kernels::gelu<T>(stream, output, input);
|
||||
}
|
||||
|
||||
private:
|
||||
csl::Stream stream;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class ThresholdedReluOp final : public BaseOp<ThresholdedReluOp, T> {
|
||||
public:
|
||||
|
@ -111,7 +111,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
||||
* or there might be several weights
|
||||
* or we don't have to scale
|
||||
*/
|
||||
if (weight != 1.0)
|
||||
if (weight != static_cast<T>(1.0f))
|
||||
{
|
||||
kernels::scale1_with_bias1<T>(stream, output, input, weight, 1.0);
|
||||
}
|
||||
|
@ -121,7 +121,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
||||
new_coords
|
||||
);
|
||||
|
||||
if (nms_iou_threshold > 0) {
|
||||
if (nms_iou_threshold > static_cast<T>(0.0f)) {
|
||||
auto output_mat = output_wrapper->getMutableHostMat();
|
||||
CV_Assert(output_mat.type() == CV_32F);
|
||||
for (int i = 0; i < input.get_axis_size(0); i++) {
|
||||
|
@ -446,66 +446,6 @@ void InfEngineNgraphNet::addOutput(const Ptr<InfEngineNgraphNode>& node)
|
||||
requestedOutputs.insert({name, node.get()});
|
||||
}
|
||||
|
||||
void InfEngineNgraphNet::setNodePtr(std::shared_ptr<ngraph::Node>* ptr) {
|
||||
all_nodes.emplace((*ptr)->get_friendly_name(), ptr);
|
||||
}
|
||||
|
||||
void InfEngineNgraphNet::release()
|
||||
{
|
||||
// FIXIT release should not be conditional, release ALL
|
||||
for (auto& node : components.back()) {
|
||||
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
|
||||
if (!(ngraph::op::is_parameter(node) || ngraph::op::is_output(node) || ngraph::op::is_constant(node)) ) {
|
||||
#else
|
||||
if (!(node->is_parameter() || node->is_output() || node->is_constant()) ) {
|
||||
#endif
|
||||
auto it = all_nodes.find(node->get_friendly_name());
|
||||
if (it != all_nodes.end()) {
|
||||
it->second->reset();
|
||||
all_nodes.erase(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void InfEngineNgraphNet::dfs(std::shared_ptr<ngraph::Node>& node,
|
||||
std::vector<std::shared_ptr<ngraph::Node>>& comp,
|
||||
std::unordered_map<std::string, bool>& used) {
|
||||
used[node->get_friendly_name()] = true;
|
||||
comp.push_back(node);
|
||||
auto inputs = node->get_users();
|
||||
for (size_t i = 0; i < node->get_input_size(); ++i) {
|
||||
inputs.push_back(node->input_value(i).get_node()->shared_from_this());
|
||||
}
|
||||
|
||||
for (auto& to : inputs) {
|
||||
if (!used[to->get_friendly_name()]) {
|
||||
dfs(to, comp, used);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int InfEngineNgraphNet::getNumComponents()
|
||||
{
|
||||
if (!components.empty()) {
|
||||
return components.size();
|
||||
}
|
||||
std::unordered_map<std::string, bool> used;
|
||||
auto inputs = ngraph_function->get_ordered_ops();
|
||||
for (auto& node : inputs) {
|
||||
used.emplace(node->get_friendly_name(), false);
|
||||
}
|
||||
|
||||
for (auto& node : inputs) {
|
||||
if (!used[node->get_friendly_name()]) {
|
||||
std::vector<std::shared_ptr<ngraph::Node>> current_comp;
|
||||
dfs(node, current_comp, used);
|
||||
components.push_back(current_comp);
|
||||
}
|
||||
}
|
||||
return components.size();
|
||||
}
|
||||
|
||||
void InfEngineNgraphNet::createNet(Target targetId) {
|
||||
if (!hasNetOwner)
|
||||
{
|
||||
@ -524,47 +464,8 @@ void InfEngineNgraphNet::createNet(Target targetId) {
|
||||
}
|
||||
CV_Assert_N(!inputs_vec.empty(), !outs.empty());
|
||||
ngraph_function = std::make_shared<ngraph::Function>(outs, inputs_vec);
|
||||
|
||||
int num_comp = getNumComponents();
|
||||
CV_LOG_DEBUG(NULL, "DNN/IE: number of subgraphs: " << num_comp);
|
||||
if (num_comp > 1) {
|
||||
for (int i = num_comp - 1; i >= 0; --i) {
|
||||
ngraph::ResultVector outputs;
|
||||
ngraph::ParameterVector inps;
|
||||
for (auto& node : components.back()) {
|
||||
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
|
||||
if (ngraph::op::is_parameter(node)) {
|
||||
#else
|
||||
if (node->is_parameter()) {
|
||||
#endif
|
||||
CV_LOG_DEBUG(NULL, "DNN/IE: subgraph[" << i << "]: +input[" << inps.size() << "] = '" << node->get_friendly_name() << "'");
|
||||
auto parameter = std::dynamic_pointer_cast<ngraph::op::Parameter>(node);
|
||||
inps.push_back(parameter);
|
||||
}
|
||||
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
|
||||
else if (ngraph::op::is_output(node)) {
|
||||
#else
|
||||
else if (node->is_output()) {
|
||||
#endif
|
||||
CV_LOG_DEBUG(NULL, "DNN/IE: subgraph[" << i << "]: +output[" << outputs.size() << "] = '" << node->get_friendly_name() << "'");
|
||||
auto result = std::dynamic_pointer_cast<ngraph::op::Result>(node);
|
||||
outputs.push_back(result);
|
||||
}
|
||||
}
|
||||
CV_LOG_DEBUG(NULL, "DNN/IE: subgraph[" << i << ": nodes=" << components.back().size() << " inputs=" << inps.size() << " outputs=" << outputs.size());
|
||||
isInit = false;
|
||||
CV_Assert_N(!inps.empty(), !outputs.empty());
|
||||
ngraph_function = std::make_shared<ngraph::Function>(outputs, inps);
|
||||
release();
|
||||
components.pop_back();
|
||||
init(targetId);
|
||||
}
|
||||
} else {
|
||||
release();
|
||||
components.clear();
|
||||
init(targetId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2022_1)
|
||||
|
@ -50,22 +50,14 @@ public:
|
||||
void addBlobs(const std::vector<cv::Ptr<BackendWrapper> >& ptrs);
|
||||
|
||||
void createNet(Target targetId);
|
||||
void setNodePtr(std::shared_ptr<ngraph::Node>* ptr);
|
||||
|
||||
void reset();
|
||||
|
||||
//private:
|
||||
detail::NetImplBase& netImpl_;
|
||||
|
||||
void release();
|
||||
int getNumComponents();
|
||||
void dfs(std::shared_ptr<ngraph::Node>& node, std::vector<std::shared_ptr<ngraph::Node>>& comp,
|
||||
std::unordered_map<std::string, bool>& used);
|
||||
|
||||
ngraph::ParameterVector inputs_vec;
|
||||
std::shared_ptr<ngraph::Function> ngraph_function;
|
||||
std::vector<std::vector<std::shared_ptr<ngraph::Node>>> components;
|
||||
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>* > all_nodes;
|
||||
|
||||
InferenceEngine::ExecutableNetwork netExec;
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2022_1)
|
||||
|
@ -221,7 +221,7 @@ public:
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
(backendId == DNN_BACKEND_CUDA && !_groupByClasses) ||
|
||||
(backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && !_locPredTransposed && _bboxesNormalized);
|
||||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
@ -1006,9 +1006,30 @@ public:
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(nodes.size() == 3);
|
||||
auto& box_logits = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
auto& class_preds = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
auto& proposals = nodes[2].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
auto box_logits = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
auto class_preds = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
auto proposals = nodes[2].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
|
||||
if (_locPredTransposed) {
|
||||
// Convert box predictions from yxYX to xyXY
|
||||
box_logits = std::make_shared<ngraph::op::v1::Reshape>(box_logits,
|
||||
std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{3}, std::vector<int32_t>{0, -1, 2}),
|
||||
true
|
||||
);
|
||||
int axis = 2;
|
||||
box_logits = std::make_shared<ngraph::op::v1::Reverse>(box_logits,
|
||||
std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{1}, &axis),
|
||||
ngraph::op::v1::Reverse::Mode::INDEX
|
||||
);
|
||||
}
|
||||
|
||||
auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{2}, std::vector<int32_t>{0, -1});
|
||||
box_logits = std::make_shared<ngraph::op::v1::Reshape>(box_logits, shape, true);
|
||||
class_preds = std::make_shared<ngraph::op::v1::Reshape>(class_preds, shape, true);
|
||||
proposals = std::make_shared<ngraph::op::v1::Reshape>(proposals,
|
||||
std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{3}, std::vector<int32_t>{0, _varianceEncodedInTarget ? 1 : 2, -1}),
|
||||
true
|
||||
);
|
||||
|
||||
ngraph::op::DetectionOutputAttrs attrs;
|
||||
attrs.num_classes = _numClasses;
|
||||
|
@ -821,7 +821,7 @@ struct GeluFunctor : public BaseDefaultFunctor<GeluFunctor>
|
||||
|
||||
bool supportBackend(int backendId, int)
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV;
|
||||
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
|
||||
}
|
||||
|
||||
inline float calculate(float x) const
|
||||
@ -829,6 +829,13 @@ struct GeluFunctor : public BaseDefaultFunctor<GeluFunctor>
|
||||
return 0.5f * x * (1.0f + erf(x * M_SQRT1_2));
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
|
||||
{
|
||||
return make_cuda_node<cuda4dnn::GeluOp>(target, stream);
|
||||
}
|
||||
#endif
|
||||
|
||||
int64 getFLOPSPerElement() const { return 100; }
|
||||
};
|
||||
|
||||
|
@ -180,15 +180,12 @@ public:
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
bool tranAorB = transA || transB;
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
return axis == 1 && !tranAorB;
|
||||
#endif
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_CUDA ||
|
||||
(backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1 && !tranAorB) ||
|
||||
(backendId == DNN_BACKEND_WEBNN && axis == 1 && !tranAorB) ||
|
||||
backendId == DNN_BACKEND_CANN ||
|
||||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ||
|
||||
(backendId == DNN_BACKEND_VKCOM && haveVulkan() && !tranAorB);
|
||||
}
|
||||
|
||||
@ -630,9 +627,11 @@ public:
|
||||
|
||||
if(input_wrapper->getRank() == inp2Dim)
|
||||
return make_cuda_node<cuda4dnn::MatMulOp>(preferableTarget, std::move(context->stream), std::move(context->cublas_handle), oriMat, biasMat_, transA, transB);
|
||||
else
|
||||
else {
|
||||
CV_LOG_INFO(NULL, "DNN/CUDA: no implementation for MatMul with rank " << input_wrapper->getRank());
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
}
|
||||
|
||||
auto flatten_start_axis = normalize_axis(axis, input_wrapper->getRank());
|
||||
return make_cuda_node<cuda4dnn::InnerProductOp>(preferableTarget, std::move(context->stream), std::move(context->cublas_handle), flatten_start_axis, weightsMat, biasMat_);
|
||||
@ -800,17 +799,26 @@ public:
|
||||
if (nodes.size() == 2)
|
||||
{
|
||||
auto& inp2 = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
matmul = std::make_shared<ngraph::op::MatMul>(ieInpNode, inp2, false, false);
|
||||
matmul = std::make_shared<ngraph::op::MatMul>(ieInpNode, inp2, transA, transB);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<int64_t> data = {(int64_t)ieInpNode->get_shape()[0], (int64_t)blobs[0].size[1]};
|
||||
auto new_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, data.data());
|
||||
auto inp = std::make_shared<ngraph::op::v1::Reshape>(ieInpNode, new_shape, true);
|
||||
std::vector<int> shape(1 + normalize_axis(axis, ieInpNode->get_shape().size()), 0);
|
||||
shape[shape.size() - 1] = -1;
|
||||
auto inp = std::make_shared<ngraph::op::v1::Reshape>(
|
||||
ieInpNode,
|
||||
std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{shape.size()}, shape.data()),
|
||||
true
|
||||
);
|
||||
|
||||
std::vector<size_t> weight_shape{(size_t)blobs[0].size[0], (size_t)blobs[0].size[1]};
|
||||
std::vector<size_t> weight_shape;
|
||||
if (isMatMul) {
|
||||
weight_shape = getShape<size_t>(oriMat);
|
||||
} else {
|
||||
weight_shape = {(size_t)blobs[0].size[0], (size_t)blobs[0].size[1]};
|
||||
}
|
||||
auto ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, weight_shape, blobs[0].data);
|
||||
matmul = std::make_shared<ngraph::op::MatMul>(inp, ieWeights, false, true);
|
||||
matmul = std::make_shared<ngraph::op::MatMul>(inp, ieWeights, transA, transB);
|
||||
}
|
||||
|
||||
if (bias) {
|
||||
|
@ -13,6 +13,7 @@ Implementation of Batch Normalization layer.
|
||||
#include "layers_common.hpp"
|
||||
#include "../op_cuda.hpp"
|
||||
#include "../op_halide.hpp"
|
||||
#include "../ie_ngraph.hpp"
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
#include <opencv2/core/utils/logger.hpp>
|
||||
|
||||
@ -41,6 +42,7 @@ public:
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_CUDA ||
|
||||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ||
|
||||
(backendId == DNN_BACKEND_HALIDE && haveHalide() && !poolPad.width && !poolPad.height);
|
||||
}
|
||||
|
||||
@ -181,6 +183,50 @@ public:
|
||||
#endif // HAVE_HALIDE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
auto features = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
auto indices = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
|
||||
std::vector<MatShape> inpShapes(nodes.size());
|
||||
std::vector<MatShape> outShapes, internals;
|
||||
for (int i = 0; i < nodes.size(); ++i) {
|
||||
std::vector<size_t> shape = nodes[i].dynamicCast<InfEngineNgraphNode>()->node->get_shape();
|
||||
inpShapes[i] = std::vector<int>(shape.begin(), shape.end());
|
||||
}
|
||||
getMemoryShapes(inpShapes, 1, outShapes, internals);
|
||||
|
||||
Mat zeros = Mat::zeros(1, total(outShapes[0]), CV_32F);
|
||||
auto zeroInp = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{zeros.total()}, zeros.data);
|
||||
|
||||
int newShape = -1;
|
||||
features = std::make_shared<ngraph::op::v1::Reshape>(
|
||||
features,
|
||||
std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{1}, &newShape),
|
||||
true
|
||||
);
|
||||
indices = std::make_shared<ngraph::op::v1::Reshape>(
|
||||
indices,
|
||||
std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{1}, &newShape),
|
||||
true
|
||||
);
|
||||
if (indices->get_element_type() != ngraph::element::i32 && indices->get_element_type() != ngraph::element::i64) {
|
||||
indices = std::make_shared<ngraph::op::Convert>(indices, ngraph::element::i64);
|
||||
}
|
||||
|
||||
int axis = 0;
|
||||
std::shared_ptr<ngraph::Node> unpool = std::make_shared<ngraph::op::ScatterElementsUpdate>(zeroInp, indices, features,
|
||||
std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{1}, &axis));
|
||||
|
||||
auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{outShapes[0].size()}, outShapes[0].data());
|
||||
unpool = std::make_shared<ngraph::op::v1::Reshape>(unpool, shape, true);
|
||||
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(unpool));
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
};
|
||||
|
||||
Ptr<MaxUnpoolLayer> MaxUnpoolLayer::create(const LayerParams& params)
|
||||
|
@ -209,7 +209,7 @@ public:
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
{
|
||||
return !computeMaxIdx && type != STOCHASTIC && kernel_size.size() > 1 && (kernel_size.size() != 3 || !isArmComputePlugin());
|
||||
return type != STOCHASTIC && kernel_size.size() > 1 && (kernel_size.size() != 3 || !isArmComputePlugin());
|
||||
}
|
||||
#endif
|
||||
if (backendId == DNN_BACKEND_OPENCV)
|
||||
@ -613,9 +613,17 @@ public:
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(reduce_sum));
|
||||
}
|
||||
else if (type == MAX) {
|
||||
auto max_pool = std::make_shared<ngraph::op::v1::MaxPool>(ieInpNode, ngraph::Strides(strides),
|
||||
std::shared_ptr<ngraph::Node> max_pool;
|
||||
if (computeMaxIdx) {
|
||||
std::vector<size_t> dilations(kernel_size.size(), 1);
|
||||
max_pool = std::make_shared<ngraph::op::v8::MaxPool>(ieInpNode, ngraph::Strides(strides), ngraph::Strides(dilations),
|
||||
ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
|
||||
rounding_type, pad_type);
|
||||
} else {
|
||||
max_pool = std::make_shared<ngraph::op::v1::MaxPool>(ieInpNode, ngraph::Strides(strides),
|
||||
ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
|
||||
rounding_type, pad_type);
|
||||
}
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(max_pool));
|
||||
}
|
||||
else if (type == ROI) {
|
||||
|
@ -425,7 +425,7 @@ public:
|
||||
dtype* p_dst = dst.ptr<dtype>();
|
||||
|
||||
size_t main_index = start / last_unreduced_dim;
|
||||
size_t loop = start / last_unreduced_dim;
|
||||
size_t loop = start % last_unreduced_dim;
|
||||
size_t origin = unprojected_steps[main_index] + loop * last_unreduced_step;
|
||||
for (int i = start; i < end; ++i) {
|
||||
Op accumulator(n_reduce, p_src[origin + projected_steps[0]]);
|
||||
|
@ -410,7 +410,10 @@ public:
|
||||
}
|
||||
attrs.shape_calculation_mode = ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES;
|
||||
|
||||
if (alignCorners) {
|
||||
CV_Assert(!halfPixelCenters || !alignCorners);
|
||||
if (halfPixelCenters) {
|
||||
attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::HALF_PIXEL;
|
||||
} else if (alignCorners) {
|
||||
attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::ALIGN_CORNERS;
|
||||
}
|
||||
|
||||
@ -427,7 +430,10 @@ public:
|
||||
}
|
||||
attrs.shape_calculation_mode = ngraph::op::v4::Interpolate::ShapeCalcMode::sizes;
|
||||
|
||||
if (alignCorners) {
|
||||
CV_Assert(!halfPixelCenters || !alignCorners);
|
||||
if (halfPixelCenters) {
|
||||
attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::half_pixel;
|
||||
} else if (alignCorners) {
|
||||
attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::align_corners;
|
||||
}
|
||||
|
||||
|
@ -476,13 +476,14 @@ void NetImplOpenVINO::initBackend(const std::vector<LayerPin>& blobsToKeep_)
|
||||
{
|
||||
int lid = ld.inputBlobsId[i].lid;
|
||||
int oid = ld.inputBlobsId[i].oid;
|
||||
if (oid == 0 || lid == 0)
|
||||
continue;
|
||||
|
||||
auto ieInpNode = inputNodes[i].dynamicCast<InfEngineNgraphNode>();
|
||||
const auto& ngraph_input_node = ieInpNode->node;
|
||||
CV_LOG_DEBUG(NULL, "DNN/IE: bind output port " << lid << ":" << oid << " (" << ngraph_input_node->get_friendly_name() << ":" << ngraph_input_node->get_type_info().name << ")");
|
||||
|
||||
if ((oid == 0 && ngraph_input_node->get_output_size() == 1) || lid == 0)
|
||||
continue;
|
||||
|
||||
// Handle parameters from other subnets. Output port is not used in this case
|
||||
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
|
||||
if ((ngraph::op::is_parameter(ngraph_input_node) || ngraph::op::is_constant(ngraph_input_node)) &&
|
||||
@ -549,7 +550,6 @@ void NetImplOpenVINO::initBackend(const std::vector<LayerPin>& blobsToKeep_)
|
||||
break;
|
||||
}
|
||||
}
|
||||
ieNode->net->setNodePtr(&ieNode->node);
|
||||
|
||||
net->addBlobs(ld.inputBlobsWrappers);
|
||||
net->addBlobs(ld.outputBlobsWrappers);
|
||||
|
@ -1385,7 +1385,12 @@ void ONNXImporter::parseSplit(LayerParams& layerParams, const opencv_onnx::NodeP
|
||||
CV_Assert(constBlobs.find(node_proto.input(1)) != constBlobs.end());
|
||||
Mat splitsBlob = getBlob(node_proto, 1);
|
||||
int splitSize = splitsBlob.total();
|
||||
|
||||
if (splitSize == 1)
|
||||
{
|
||||
layerParams.set("num_split", 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<int> slicePoints(splitSize - 1, splitsBlob.at<int>(0));
|
||||
for (int i = 1; i < splitSize - 1; ++i)
|
||||
{
|
||||
@ -1393,6 +1398,7 @@ void ONNXImporter::parseSplit(LayerParams& layerParams, const opencv_onnx::NodeP
|
||||
}
|
||||
layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size()));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
layerParams.set("num_split", node_proto.output_size());
|
||||
@ -1965,9 +1971,11 @@ void ONNXImporter::parseGemm(LayerParams& layerParams, const opencv_onnx::NodePr
|
||||
}
|
||||
|
||||
int transB = layerParams.get<int>("transB", 0);
|
||||
int secondInpDims;
|
||||
if (constBlobs.find(node_proto.input(1)) != constBlobs.end())
|
||||
{
|
||||
Mat weights = getBlob(node_proto, 1);
|
||||
secondInpDims = weights.dims;
|
||||
|
||||
if (transA == 0) // optimized barnch, for now, we can only optimize the Gemm when transA = 0.
|
||||
{
|
||||
@ -1993,7 +2001,10 @@ void ONNXImporter::parseGemm(LayerParams& layerParams, const opencv_onnx::NodePr
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
layerParams.set("transB", transB == 1);
|
||||
secondInpDims = outShapes[node_proto.input(1)].size();
|
||||
}
|
||||
|
||||
if (node_proto.input_size() == 3)
|
||||
{
|
||||
@ -2002,7 +2013,7 @@ void ONNXImporter::parseGemm(LayerParams& layerParams, const opencv_onnx::NodePr
|
||||
}
|
||||
|
||||
layerParams.set("bias_term", node_proto.input_size() == 3);
|
||||
layerParams.set("is_matmul", true);
|
||||
layerParams.set("is_matmul", secondInpDims > 2);
|
||||
addLayer(layerParams, node_proto);
|
||||
}
|
||||
|
||||
@ -2045,7 +2056,7 @@ void ONNXImporter::parseMatMul(LayerParams& layerParams, const opencv_onnx::Node
|
||||
layerParams.blobs.push_back(transBlob);
|
||||
int numOutput = layerParams.blobs[0].total(0, secondInpDims - 1);
|
||||
layerParams.set("num_output", numOutput);
|
||||
layerParams.set("is_matmul", true);
|
||||
layerParams.set("is_matmul", secondInpDims > 2);
|
||||
} else
|
||||
secondInpDims = outShapes[node_proto.input(1)].size();
|
||||
|
||||
|
@ -731,21 +731,23 @@ TEST_P(Test_Caffe_nets, FasterRCNN_vgg16)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
|
||||
#endif
|
||||
|
||||
double scoreDiff = 0.0;
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
|
||||
// Check 'backward_compatible_check || in_out_elements_equal' failed at core/src/op/reshape.cpp:427:
|
||||
// While validating node 'v1::Reshape bbox_pred_reshape (bbox_pred[0]:f32{1,84}, Constant_265242[0]:i64{4}) -> (f32{?,?,?,?})' with friendly_name 'bbox_pred_reshape':
|
||||
// Requested output shape {1,6300,4,1} is incompatible with input shape {1, 84}
|
||||
double scoreDiff = 0.0, iouDiff = 0.0;
|
||||
#if defined(INF_ENGINE_RELEASE)
|
||||
if (target == DNN_TARGET_MYRIAD)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
|
||||
if (target == DNN_TARGET_OPENCL_FP16)
|
||||
scoreDiff = 0.02;
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
|
||||
iouDiff = 0.02;
|
||||
if (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16) {
|
||||
scoreDiff = 0.04;
|
||||
iouDiff = 0.06;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static Mat ref = (Mat_<float>(3, 7) << 0, 2, 0.949398, 99.2454, 210.141, 601.205, 462.849,
|
||||
0, 7, 0.997022, 481.841, 92.3218, 722.685, 175.953,
|
||||
0, 12, 0.993028, 133.221, 189.377, 350.994, 563.166);
|
||||
testFaster("faster_rcnn_vgg16.prototxt", "VGG16_faster_rcnn_final.caffemodel", ref, scoreDiff);
|
||||
testFaster("faster_rcnn_vgg16.prototxt", "VGG16_faster_rcnn_final.caffemodel", ref, scoreDiff, iouDiff);
|
||||
}
|
||||
|
||||
TEST_P(Test_Caffe_nets, FasterRCNN_zf)
|
||||
@ -766,9 +768,6 @@ TEST_P(Test_Caffe_nets, FasterRCNN_zf)
|
||||
);
|
||||
#endif
|
||||
|
||||
if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
|
||||
backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_OPENCL_FP16)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
|
||||
if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
|
||||
backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
|
||||
@ -779,7 +778,14 @@ TEST_P(Test_Caffe_nets, FasterRCNN_zf)
|
||||
static Mat ref = (Mat_<float>(3, 7) << 0, 2, 0.90121, 120.407, 115.83, 570.586, 528.395,
|
||||
0, 7, 0.988779, 469.849, 75.1756, 718.64, 186.762,
|
||||
0, 12, 0.967198, 138.588, 206.843, 329.766, 553.176);
|
||||
testFaster("faster_rcnn_zf.prototxt", "ZF_faster_rcnn_final.caffemodel", ref);
|
||||
|
||||
double scoreDiff = 0.0, iouDiff = 0.0;
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
|
||||
scoreDiff = 0.02;
|
||||
iouDiff = 0.13;
|
||||
}
|
||||
|
||||
testFaster("faster_rcnn_zf.prototxt", "ZF_faster_rcnn_final.caffemodel", ref, scoreDiff, iouDiff);
|
||||
}
|
||||
|
||||
TEST_P(Test_Caffe_nets, RFCN)
|
||||
@ -802,8 +808,8 @@ TEST_P(Test_Caffe_nets, RFCN)
|
||||
iouDiff = 0.12;
|
||||
}
|
||||
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
|
||||
#if defined(INF_ENGINE_RELEASE)
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
{
|
||||
scoreDiff = 0.1f;
|
||||
iouDiff = 0.2f;
|
||||
|
@ -102,11 +102,14 @@ TEST(Test_Darknet, read_yolo_voc_stream)
|
||||
class Test_Darknet_layers : public DNNTestLayer
|
||||
{
|
||||
public:
|
||||
void testDarknetLayer(const std::string& name, bool hasWeights = false, bool testBatchProcessing = true)
|
||||
void testDarknetLayer(const std::string& name, bool hasWeights = false, bool testBatchProcessing = true,
|
||||
double l1 = 0.0, double lInf = 0.0)
|
||||
{
|
||||
SCOPED_TRACE(name);
|
||||
Mat inp = blobFromNPY(findDataFile("dnn/darknet/" + name + "_in.npy"));
|
||||
Mat ref = blobFromNPY(findDataFile("dnn/darknet/" + name + "_out.npy"));
|
||||
l1 = l1 ? l1 : default_l1;
|
||||
lInf = lInf ? lInf : default_lInf;
|
||||
|
||||
std::string cfg = findDataFile("dnn/darknet/" + name + ".cfg");
|
||||
std::string model = "";
|
||||
@ -120,7 +123,7 @@ public:
|
||||
net.setPreferableTarget(target);
|
||||
net.setInput(inp);
|
||||
Mat out = net.forward();
|
||||
normAssert(out, ref, "", default_l1, default_lInf);
|
||||
normAssert(out, ref, "", l1, lInf);
|
||||
|
||||
if (inp.size[0] == 1 && testBatchProcessing) // test handling of batch size
|
||||
{
|
||||
@ -166,8 +169,8 @@ public:
|
||||
}*/
|
||||
ASSERT_EQ(out2.dims, ref2.dims) << ref.dims;
|
||||
|
||||
normAssert(out2(ranges0), ref2, "", default_l1, default_lInf);
|
||||
normAssert(out2(ranges1), ref2, "", default_l1, default_lInf);
|
||||
normAssert(out2(ranges0), ref2, "", l1, lInf);
|
||||
normAssert(out2(ranges1), ref2, "", l1, lInf);
|
||||
}
|
||||
}
|
||||
};
|
||||
@ -1046,7 +1049,7 @@ TEST_P(Test_Darknet_layers, region)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
|
||||
#endif
|
||||
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2022010000)
|
||||
// accuracy on CPU, OpenCL
|
||||
// Expected: (normL1) <= (l1), actual: 0.000358148 vs 1e-05
|
||||
// |ref| = 1.207319974899292
|
||||
@ -1116,7 +1119,12 @@ TEST_P(Test_Darknet_layers, connected)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
||||
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_CPU_FP16)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_CPU_FP16);
|
||||
testDarknetLayer("connected", true);
|
||||
double l1 = 0.0;
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
|
||||
{
|
||||
l1 = 3e-5;
|
||||
}
|
||||
testDarknetLayer("connected", true, true, l1);
|
||||
}
|
||||
|
||||
TEST_P(Test_Darknet_layers, relu)
|
||||
|
@ -361,22 +361,9 @@ TEST_P(MaxPooling, Accuracy)
|
||||
Backend backendId = get<0>(get<5>(GetParam()));
|
||||
Target targetId = get<1>(get<5>(GetParam()));
|
||||
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2018050000)
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD
|
||||
&& inSize == Size(7, 6) && kernel == Size(3, 2)
|
||||
&& (stride == Size(1, 1) || stride == Size(2, 2))
|
||||
&& (pad == Size(0, 1) || pad == Size(1, 1))
|
||||
)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
|
||||
#endif
|
||||
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2018050000)
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD
|
||||
&& (kernel == Size(2, 2) || kernel == Size(3, 2))
|
||||
&& stride == Size(1, 1) && (pad == Size(0, 0) || pad == Size(0, 1))
|
||||
)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
|
||||
#endif
|
||||
// https://github.com/openvinotoolkit/openvino/issues/18731
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && stride != Size(1, 1))
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000)
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD
|
||||
@ -467,6 +454,11 @@ TEST_P(FullyConnected, Accuracy)
|
||||
{
|
||||
l1 = 0.01;
|
||||
}
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && targetId == DNN_TARGET_OPENCL)
|
||||
{
|
||||
l1 = 5e-3;
|
||||
lInf = 7e-3;
|
||||
}
|
||||
#endif
|
||||
if (targetId == DNN_TARGET_CUDA_FP16)
|
||||
l1 = 0.015;
|
||||
|
@ -465,8 +465,8 @@ TEST_P(DNNTestHighLevelAPI, predict)
|
||||
const std::string modelPath = getOpenVINOModel(modelName, isFP16);
|
||||
ASSERT_FALSE(modelPath.empty()) << modelName;
|
||||
|
||||
std::string xmlPath = findDataFile(modelPath + ".xml");
|
||||
std::string binPath = findDataFile(modelPath + ".bin");
|
||||
std::string xmlPath = findDataFile(modelPath + ".xml", false);
|
||||
std::string binPath = findDataFile(modelPath + ".bin", false);
|
||||
|
||||
Model model(xmlPath, binPath);
|
||||
Mat frame = imread(findDataFile("dnn/googlenet_1.png"));
|
||||
|
@ -215,7 +215,13 @@ TEST_P(Test_Caffe_layers, InnerProduct)
|
||||
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_CPU_FP16)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_CPU_FP16);
|
||||
|
||||
testLayerUsingCaffeModels("layer_inner_product", true);
|
||||
double l1 = 0.0, lInf = 0.0;
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
|
||||
{
|
||||
l1 = 5e-3;
|
||||
lInf = 2e-2;
|
||||
}
|
||||
testLayerUsingCaffeModels("layer_inner_product", true, true, l1, lInf);
|
||||
}
|
||||
|
||||
TEST_P(Test_Caffe_layers, Pooling_max)
|
||||
|
@ -447,14 +447,17 @@ TEST_P(Test_Model, DetectionOutput)
|
||||
{
|
||||
if (backend == DNN_BACKEND_OPENCV)
|
||||
scoreDiff = 4e-3;
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2022010000)
|
||||
else if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
scoreDiff = 4e-2;
|
||||
#endif
|
||||
else
|
||||
scoreDiff = 2e-2;
|
||||
iouDiff = 1.8e-1;
|
||||
}
|
||||
#if defined(INF_ENGINE_RELEASE)
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
{
|
||||
scoreDiff = 0.05;
|
||||
iouDiff = 0.08;
|
||||
}
|
||||
#endif
|
||||
|
||||
testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences, refBoxes,
|
||||
scoreDiff, iouDiff, confThreshold, nmsThreshold, size, mean);
|
||||
|
@ -579,9 +579,7 @@ CASE(test_dropout_default_mask_ratio)
|
||||
CASE(test_dropout_default_old)
|
||||
// no filter
|
||||
CASE(test_dropout_default_ratio)
|
||||
#if SKIP_SET_1
|
||||
SKIP;
|
||||
#endif
|
||||
// no filter
|
||||
CASE(test_dropout_random_old)
|
||||
// no filter
|
||||
CASE(test_dynamicquantizelinear)
|
||||
|
@ -52,7 +52,7 @@ public:
|
||||
}
|
||||
|
||||
void testONNXModels(const String& basename, const Extension ext = npy,
|
||||
const double l1 = 0, const float lInf = 0, const bool useSoftmax = false,
|
||||
double l1 = 0, double lInf = 0, const bool useSoftmax = false,
|
||||
bool checkNoFallbacks = true, int numInps = 1)
|
||||
{
|
||||
String onnxmodel = _tf("models/" + basename + ".onnx", required);
|
||||
@ -102,7 +102,12 @@ public:
|
||||
netSoftmax.setInput(ref);
|
||||
ref = netSoftmax.forward();
|
||||
}
|
||||
normAssert(ref, out, "", l1 ? l1 : default_l1, lInf ? lInf : default_lInf);
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
|
||||
{
|
||||
l1 = std::max(l1, 1.4e-3);
|
||||
lInf = std::max(lInf, 8e-3);
|
||||
}
|
||||
normAssert(ref, out, basename.c_str(), l1 ? l1 : default_l1, lInf ? lInf : default_lInf);
|
||||
if (checkNoFallbacks)
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
@ -1816,6 +1816,11 @@ TEST_P(Test_TensorFlow_nets, Mask_RCNN)
|
||||
|
||||
double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CPU_FP16) ? 0.2 : 2e-5;
|
||||
double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CPU_FP16) ? 0.018 : default_lInf;
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
{
|
||||
scoreDiff = std::max(scoreDiff, 0.06);
|
||||
iouDiff = std::max(iouDiff, 0.01);
|
||||
}
|
||||
normAssertDetections(refDetections, outDetections, "", /*threshold for zero confidence*/1e-5, scoreDiff, iouDiff);
|
||||
|
||||
// Output size of masks is NxCxHxW where
|
||||
|
@ -20,6 +20,14 @@ namespace opencv_test { namespace {
|
||||
using namespace cv;
|
||||
using namespace cv::dnn;
|
||||
|
||||
class Test_TFLite : public DNNTestLayer {
|
||||
public:
|
||||
void testModel(Net& net, const std::string& modelName, const Mat& input, double l1 = 0, double lInf = 0);
|
||||
void testModel(const std::string& modelName, const Mat& input, double l1 = 0, double lInf = 0);
|
||||
void testModel(const std::string& modelName, const Size& inpSize, double l1 = 0, double lInf = 0);
|
||||
void testLayer(const std::string& modelName, double l1 = 0, double lInf = 0);
|
||||
};
|
||||
|
||||
void testInputShapes(const Net& net, const std::vector<Mat>& inps) {
|
||||
std::vector<MatShape> inLayerShapes;
|
||||
std::vector<MatShape> outLayerShapes;
|
||||
@ -31,8 +39,14 @@ void testInputShapes(const Net& net, const std::vector<Mat>& inps) {
|
||||
}
|
||||
}
|
||||
|
||||
void testModel(Net& net, const std::string& modelName, const Mat& input, double l1 = 1e-5, double lInf = 1e-4)
|
||||
void Test_TFLite::testModel(Net& net, const std::string& modelName, const Mat& input, double l1, double lInf)
|
||||
{
|
||||
l1 = l1 ? l1 : default_l1;
|
||||
lInf = lInf ? lInf : default_lInf;
|
||||
|
||||
net.setPreferableBackend(backend);
|
||||
net.setPreferableTarget(target);
|
||||
|
||||
testInputShapes(net, {input});
|
||||
net.setInput(input);
|
||||
|
||||
@ -48,20 +62,20 @@ void testModel(Net& net, const std::string& modelName, const Mat& input, double
|
||||
}
|
||||
}
|
||||
|
||||
void testModel(const std::string& modelName, const Mat& input, double l1 = 1e-5, double lInf = 1e-4)
|
||||
void Test_TFLite::testModel(const std::string& modelName, const Mat& input, double l1, double lInf)
|
||||
{
|
||||
Net net = readNet(findDataFile("dnn/tflite/" + modelName + ".tflite", false));
|
||||
testModel(net, modelName, input, l1, lInf);
|
||||
}
|
||||
|
||||
void testModel(const std::string& modelName, const Size& inpSize, double l1 = 1e-5, double lInf = 1e-4)
|
||||
void Test_TFLite::testModel(const std::string& modelName, const Size& inpSize, double l1, double lInf)
|
||||
{
|
||||
Mat input = imread(findDataFile("cv/shared/lena.png"));
|
||||
input = blobFromImage(input, 1.0 / 255, inpSize, 0, true);
|
||||
testModel(modelName, input, l1, lInf);
|
||||
}
|
||||
|
||||
void testLayer(const std::string& modelName, double l1 = 1e-5, double lInf = 1e-4)
|
||||
void Test_TFLite::testLayer(const std::string& modelName, double l1, double lInf)
|
||||
{
|
||||
Mat inp = blobFromNPY(findDataFile("dnn/tflite/" + modelName + "_inp.npy"));
|
||||
Net net = readNet(findDataFile("dnn/tflite/" + modelName + ".tflite"));
|
||||
@ -69,29 +83,66 @@ void testLayer(const std::string& modelName, double l1 = 1e-5, double lInf = 1e-
|
||||
}
|
||||
|
||||
// https://google.github.io/mediapipe/solutions/face_mesh
|
||||
TEST(Test_TFLite, face_landmark)
|
||||
TEST_P(Test_TFLite, face_landmark)
|
||||
{
|
||||
testModel("face_landmark", Size(192, 192), 2e-5, 2e-4);
|
||||
if (backend == DNN_BACKEND_CUDA && target == DNN_TARGET_CUDA_FP16)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA_FP16);
|
||||
double l1 = 2e-5, lInf = 2e-4;
|
||||
if (target == DNN_TARGET_CPU_FP16 || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD ||
|
||||
(backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL))
|
||||
{
|
||||
l1 = 0.15;
|
||||
lInf = 0.82;
|
||||
}
|
||||
testModel("face_landmark", Size(192, 192), l1, lInf);
|
||||
}
|
||||
|
||||
// https://google.github.io/mediapipe/solutions/face_detection
|
||||
TEST(Test_TFLite, face_detection_short_range)
|
||||
TEST_P(Test_TFLite, face_detection_short_range)
|
||||
{
|
||||
testModel("face_detection_short_range", Size(128, 128));
|
||||
double l1 = 0, lInf = 2e-4;
|
||||
if (target == DNN_TARGET_CPU_FP16 || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD ||
|
||||
(backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL))
|
||||
{
|
||||
l1 = 0.04;
|
||||
lInf = 0.8;
|
||||
}
|
||||
testModel("face_detection_short_range", Size(128, 128), l1, lInf);
|
||||
}
|
||||
|
||||
// https://google.github.io/mediapipe/solutions/selfie_segmentation
|
||||
TEST(Test_TFLite, selfie_segmentation)
|
||||
TEST_P(Test_TFLite, selfie_segmentation)
|
||||
{
|
||||
testModel("selfie_segmentation", Size(256, 256));
|
||||
double l1 = 0, lInf = 0;
|
||||
if (target == DNN_TARGET_CPU_FP16 || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD ||
|
||||
(backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL))
|
||||
{
|
||||
l1 = 0.01;
|
||||
lInf = 0.48;
|
||||
}
|
||||
testModel("selfie_segmentation", Size(256, 256), l1, lInf);
|
||||
}
|
||||
|
||||
TEST(Test_TFLite, max_unpooling)
|
||||
TEST_P(Test_TFLite, max_unpooling)
|
||||
{
|
||||
if (backend == DNN_BACKEND_CUDA)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA);
|
||||
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target != DNN_TARGET_CPU) {
|
||||
if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||
if (target == DNN_TARGET_OPENCL) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||
}
|
||||
|
||||
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
||||
|
||||
// Due Max Unpoling is a numerically unstable operation and small difference between frameworks
|
||||
// might lead to positional difference of maximal elements in the tensor, this test checks
|
||||
// behavior of Max Unpooling layer only.
|
||||
Net net = readNet(findDataFile("dnn/tflite/hair_segmentation.tflite", false));
|
||||
net.setPreferableBackend(backend);
|
||||
net.setPreferableTarget(target);
|
||||
|
||||
Mat input = imread(findDataFile("cv/shared/lena.png"));
|
||||
cvtColor(input, input, COLOR_BGR2RGBA);
|
||||
@ -101,7 +152,15 @@ TEST(Test_TFLite, max_unpooling)
|
||||
net.setInput(input);
|
||||
|
||||
std::vector<std::vector<Mat> > outs;
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
|
||||
// TODO: seems like a bug with a retrieving intermediate tensors
|
||||
net.forward(outs, {"conv2d_transpose_4", "p_re_lu_1", "max_pooling_with_argmax2d", "conv2d_86", "max_unpooling2d_2"});
|
||||
outs.erase(outs.begin());
|
||||
}
|
||||
else {
|
||||
net.forward(outs, {"p_re_lu_1", "max_pooling_with_argmax2d", "conv2d_86", "max_unpooling2d_2"});
|
||||
}
|
||||
|
||||
ASSERT_EQ(outs.size(), 4);
|
||||
ASSERT_EQ(outs[0].size(), 1);
|
||||
ASSERT_EQ(outs[1].size(), 2);
|
||||
@ -117,6 +176,8 @@ TEST(Test_TFLite, max_unpooling)
|
||||
ASSERT_EQ(poolOut.size, poolIds.size);
|
||||
ASSERT_EQ(poolOut.size, unpoolInp.size);
|
||||
|
||||
ASSERT_EQ(countNonZero(poolInp), poolInp.total());
|
||||
|
||||
for (int c = 0; c < 32; ++c) {
|
||||
float *poolInpData = poolInp.ptr<float>(0, c);
|
||||
float *poolOutData = poolOut.ptr<float>(0, c);
|
||||
@ -135,15 +196,19 @@ TEST(Test_TFLite, max_unpooling)
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(poolInpData[maxIdx], poolOutData[y * 64 + x]) << errMsg;
|
||||
if (backend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
|
||||
EXPECT_EQ(poolIdsData[y * 64 + x], (float)maxIdx) << errMsg;
|
||||
}
|
||||
EXPECT_EQ(unpoolOutData[maxIdx], unpoolInpData[y * 64 + x]) << errMsg;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Test_TFLite, EfficientDet_int8) {
|
||||
TEST_P(Test_TFLite, EfficientDet_int8) {
|
||||
Net net = readNet(findDataFile("dnn/tflite/coco_efficientdet_lite0_v1_1.0_quant_2021_09_06.tflite", false));
|
||||
net.setPreferableBackend(backend);
|
||||
net.setPreferableTarget(target);
|
||||
|
||||
Mat img = imread(findDataFile("dnn/dog416.png"));
|
||||
Mat blob = blobFromImage(img, 1.0, Size(320, 320));
|
||||
@ -158,10 +223,18 @@ TEST(Test_TFLite, EfficientDet_int8) {
|
||||
normAssertDetections(ref, out, "", 0.5, 0.05, 0.1);
|
||||
}
|
||||
|
||||
TEST(Test_TFLite, replicate_by_pack) {
|
||||
testLayer("replicate_by_pack");
|
||||
TEST_P(Test_TFLite, replicate_by_pack) {
|
||||
double l1 = 0, lInf = 0;
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
|
||||
{
|
||||
l1 = 4e-4;
|
||||
lInf = 2e-3;
|
||||
}
|
||||
testLayer("replicate_by_pack", l1, lInf);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Test_TFLite, dnnBackendsAndTargets());
|
||||
|
||||
}} // namespace
|
||||
|
||||
#endif // OPENCV_TEST_DNN_TFLITE
|
||||
|
@ -39,6 +39,12 @@ public:
|
||||
GAPI_WRAP
|
||||
PyParams& cfgAddExecutionProvider(ep::DirectML ep);
|
||||
|
||||
GAPI_WRAP
|
||||
PyParams& cfgAddExecutionProvider(ep::CUDA ep);
|
||||
|
||||
GAPI_WRAP
|
||||
PyParams& cfgAddExecutionProvider(ep::TensorRT ep);
|
||||
|
||||
GAPI_WRAP
|
||||
PyParams& cfgDisableMemPattern();
|
||||
|
||||
|
@ -32,6 +32,56 @@ namespace onnx {
|
||||
*/
|
||||
namespace ep {
|
||||
|
||||
/**
|
||||
* @brief This structure provides functions
|
||||
* that fill inference options for CUDA Execution Provider.
|
||||
* Please follow https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#cuda-execution-provider
|
||||
*/
|
||||
struct GAPI_EXPORTS_W_SIMPLE CUDA {
|
||||
// NB: Used from python.
|
||||
/// @private -- Exclude this constructor from OpenCV documentation
|
||||
GAPI_WRAP
|
||||
CUDA() = default;
|
||||
|
||||
/** @brief Class constructor.
|
||||
|
||||
Constructs CUDA parameters based on device type information.
|
||||
|
||||
@param dev_id Target device id to use.
|
||||
*/
|
||||
GAPI_WRAP
|
||||
explicit CUDA(const int dev_id)
|
||||
: device_id(dev_id) {
|
||||
}
|
||||
|
||||
int device_id;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief This structure provides functions
|
||||
* that fill inference options for TensorRT Execution Provider.
|
||||
* Please follow https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html#tensorrt-execution-provider
|
||||
*/
|
||||
struct GAPI_EXPORTS_W_SIMPLE TensorRT {
|
||||
// NB: Used from python.
|
||||
/// @private -- Exclude this constructor from OpenCV documentation
|
||||
GAPI_WRAP
|
||||
TensorRT() = default;
|
||||
|
||||
/** @brief Class constructor.
|
||||
|
||||
Constructs TensorRT parameters based on device type information.
|
||||
|
||||
@param dev_id Target device id to use.
|
||||
*/
|
||||
GAPI_WRAP
|
||||
explicit TensorRT(const int dev_id)
|
||||
: device_id(dev_id) {
|
||||
}
|
||||
|
||||
int device_id;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief This structure provides functions
|
||||
* that fill inference options for ONNX OpenVINO Execution Provider.
|
||||
@ -143,7 +193,11 @@ public:
|
||||
DeviceDesc ddesc;
|
||||
};
|
||||
|
||||
using EP = cv::util::variant<cv::util::monostate, OpenVINO, DirectML>;
|
||||
using EP = cv::util::variant< cv::util::monostate
|
||||
, OpenVINO
|
||||
, DirectML
|
||||
, CUDA
|
||||
, TensorRT>;
|
||||
|
||||
} // namespace ep
|
||||
|
||||
@ -431,6 +485,34 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
/** @brief Adds execution provider for runtime.
|
||||
|
||||
The function is used to add ONNX Runtime CUDA Execution Provider options.
|
||||
|
||||
@param ep CUDA Execution Provider options.
|
||||
@see cv::gapi::onnx::ep::CUDA.
|
||||
|
||||
@return the reference on modified object.
|
||||
*/
|
||||
Params<Net>& cfgAddExecutionProvider(ep::CUDA&& ep) {
|
||||
desc.execution_providers.emplace_back(std::move(ep));
|
||||
return *this;
|
||||
}
|
||||
|
||||
/** @brief Adds execution provider for runtime.
|
||||
|
||||
The function is used to add ONNX Runtime TensorRT Execution Provider options.
|
||||
|
||||
@param ep TensorRT Execution Provider options.
|
||||
@see cv::gapi::onnx::ep::TensorRT.
|
||||
|
||||
@return the reference on modified object.
|
||||
*/
|
||||
Params<Net>& cfgAddExecutionProvider(ep::TensorRT&& ep) {
|
||||
desc.execution_providers.emplace_back(std::move(ep));
|
||||
return *this;
|
||||
}
|
||||
|
||||
/** @brief Disables the memory pattern optimization.
|
||||
|
||||
@return the reference on modified object.
|
||||
@ -491,6 +573,16 @@ public:
|
||||
desc.execution_providers.emplace_back(std::move(ep));
|
||||
}
|
||||
|
||||
/** @see onnx::Params::cfgAddExecutionProvider. */
|
||||
void cfgAddExecutionProvider(ep::CUDA&& ep) {
|
||||
desc.execution_providers.emplace_back(std::move(ep));
|
||||
}
|
||||
|
||||
/** @see onnx::Params::cfgAddExecutionProvider. */
|
||||
void cfgAddExecutionProvider(ep::TensorRT&& ep) {
|
||||
desc.execution_providers.emplace_back(std::move(ep));
|
||||
}
|
||||
|
||||
/** @see onnx::Params::cfgDisableMemPattern. */
|
||||
void cfgDisableMemPattern() {
|
||||
desc.disable_mem_pattern = true;
|
||||
|
@ -31,6 +31,8 @@ using map_string_and_vector_float = std::map<std::string, std::vector<float>>;
|
||||
using map_int_and_double = std::map<int, double>;
|
||||
using ep_OpenVINO = cv::gapi::onnx::ep::OpenVINO;
|
||||
using ep_DirectML = cv::gapi::onnx::ep::DirectML;
|
||||
using ep_CUDA = cv::gapi::onnx::ep::CUDA;
|
||||
using ep_TensorRT = cv::gapi::onnx::ep::TensorRT;
|
||||
|
||||
// NB: Python wrapper generate T_U for T<U>
|
||||
// This behavior is only observed for inputs
|
||||
|
@ -33,6 +33,18 @@ cv::gapi::onnx::PyParams::cfgAddExecutionProvider(cv::gapi::onnx::ep::DirectML e
|
||||
return *this;
|
||||
}
|
||||
|
||||
cv::gapi::onnx::PyParams&
|
||||
cv::gapi::onnx::PyParams::cfgAddExecutionProvider(cv::gapi::onnx::ep::CUDA ep) {
|
||||
m_priv->cfgAddExecutionProvider(std::move(ep));
|
||||
return *this;
|
||||
}
|
||||
|
||||
cv::gapi::onnx::PyParams&
|
||||
cv::gapi::onnx::PyParams::cfgAddExecutionProvider(cv::gapi::onnx::ep::TensorRT ep) {
|
||||
m_priv->cfgAddExecutionProvider(std::move(ep));
|
||||
return *this;
|
||||
}
|
||||
|
||||
cv::gapi::onnx::PyParams&
|
||||
cv::gapi::onnx::PyParams::cfgDisableMemPattern() {
|
||||
m_priv->cfgDisableMemPattern();
|
||||
|
@ -145,9 +145,39 @@ public:
|
||||
void run();
|
||||
};
|
||||
|
||||
static void addCUDAExecutionProvider(Ort::SessionOptions *session_options,
|
||||
const cv::gapi::onnx::ep::CUDA &cuda_ep) {
|
||||
OrtCUDAProviderOptions options{};
|
||||
options.device_id = cuda_ep.device_id;
|
||||
|
||||
try {
|
||||
session_options->AppendExecutionProvider_CUDA(options);
|
||||
} catch (const std::exception &e) {
|
||||
std::stringstream ss;
|
||||
ss << "ONNX Backend: Failed to enable CUDA"
|
||||
<< " Execution Provider: " << e.what();
|
||||
cv::util::throw_error(std::runtime_error(ss.str()));
|
||||
}
|
||||
}
|
||||
|
||||
static void addTensorRTExecutionProvider(Ort::SessionOptions *session_options,
|
||||
const cv::gapi::onnx::ep::TensorRT &trt_ep) {
|
||||
OrtTensorRTProviderOptions options{};
|
||||
options.device_id = trt_ep.device_id;
|
||||
|
||||
try {
|
||||
session_options->AppendExecutionProvider_TensorRT(options);
|
||||
} catch (const std::exception &e) {
|
||||
std::stringstream ss;
|
||||
ss << "ONNX Backend: Failed to enable TensorRT"
|
||||
<< " Execution Provider: " << e.what();
|
||||
cv::util::throw_error(std::runtime_error(ss.str()));
|
||||
}
|
||||
}
|
||||
|
||||
static void addOpenVINOExecutionProvider(Ort::SessionOptions *session_options,
|
||||
const cv::gapi::onnx::ep::OpenVINO &ov_ep) {
|
||||
OrtOpenVINOProviderOptions options;
|
||||
OrtOpenVINOProviderOptions options{};
|
||||
options.device_type = ov_ep.device_type.c_str();
|
||||
options.cache_dir = ov_ep.cache_dir.c_str();
|
||||
options.num_of_threads = ov_ep.num_of_threads;
|
||||
@ -181,6 +211,18 @@ static void addExecutionProvider(Ort::SessionOptions *session_options,
|
||||
addDMLExecutionProvider(session_options, dml_ep);
|
||||
break;
|
||||
}
|
||||
case ep::EP::index_of<ep::CUDA>(): {
|
||||
GAPI_LOG_INFO(NULL, "CUDA Execution Provider is added.");
|
||||
const auto &cuda_ep = cv::util::get<ep::CUDA>(execution_provider);
|
||||
addCUDAExecutionProvider(session_options, cuda_ep);
|
||||
break;
|
||||
}
|
||||
case ep::EP::index_of<ep::TensorRT>(): {
|
||||
GAPI_LOG_INFO(NULL, "TensorRT Execution Provider is added.");
|
||||
const auto &trt_ep = cv::util::get<ep::TensorRT>(execution_provider);
|
||||
addTensorRTExecutionProvider(session_options, trt_ep);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
GAPI_LOG_INFO(NULL, "CPU Execution Provider is added.");
|
||||
break;
|
||||
|
@ -260,6 +260,10 @@ typedef uint32_t __u32;
|
||||
#define V4L2_CID_IRIS_ABSOLUTE (V4L2_CID_CAMERA_CLASS_BASE+17)
|
||||
#endif
|
||||
|
||||
#ifndef v4l2_fourcc_be
|
||||
#define v4l2_fourcc_be(a, b, c, d) (v4l2_fourcc(a, b, c, d) | (1U << 31))
|
||||
#endif
|
||||
|
||||
#ifndef V4L2_PIX_FMT_Y10
|
||||
#define V4L2_PIX_FMT_Y10 v4l2_fourcc('Y', '1', '0', ' ')
|
||||
#endif
|
||||
|
@ -65,7 +65,7 @@ inline std::string fourccToStringSafe(int fourcc)
|
||||
{
|
||||
std::string res = fourccToString(fourcc);
|
||||
// TODO: return hex values for invalid characters
|
||||
std::transform(res.begin(), res.end(), res.begin(), [](uint8_t c) { return (c >= '0' && c <= 'z') ? c : (c == ' ' ? '_' : 'x'); });
|
||||
std::transform(res.begin(), res.end(), res.begin(), [](char c) -> char { return (c >= '0' && c <= 'z') ? c : (c == ' ' ? '_' : 'x'); });
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -22,6 +22,9 @@
|
||||
#include <linux/videodev2.h>
|
||||
|
||||
// workarounds for older versions
|
||||
#ifndef v4l2_fourcc_be
|
||||
#define v4l2_fourcc_be(a, b, c, d) (v4l2_fourcc(a, b, c, d) | (1U << 31))
|
||||
#endif
|
||||
#ifndef V4L2_PIX_FMT_Y10
|
||||
#define V4L2_PIX_FMT_Y10 v4l2_fourcc('Y', '1', '0', ' ')
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user