opencv/modules/dnn/src/op_inf_engine.hpp
Dmitry Kurtaev c7ec0d599a
Merge pull request #23987 from dkurt:openvino_int8_backend
OpenVINO backend for INT8 models #23987

### Pull Request Readiness Checklist

TODO:
- [x] DetectionOutput layer (https://github.com/opencv/opencv/pull/24069)
- [x] Less FP32 fallbacks (i.e. Sigmoid, eltwise sum)
- [x] Accuracy, performance tests (https://github.com/opencv/opencv/pull/24039)
- [x] Single layer tests (convolution)
- [x] ~~Fixes for OpenVINO 2022.1 (https://pullrequest.opencv.org/buildbot/builders/precommit_custom_linux/builds/100334)~~


Performace results for object detection model `coco_efficientdet_lite0_v1_1.0_quant_2021_09_06.tflite`:
| backend | performance (median time) |
|---|---|
| OpenCV | 77.42ms |
| OpenVINO 2023.0 | 10.90ms |

CPU: `11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz`

Serialized model per-layer stats (note that Convolution should use `*_I8` primitives if they are quantized correctly): https://gist.github.com/dkurt/7772bbf1907035441bb5454f19f0feef

---

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
2023-09-28 16:24:43 +03:00

165 lines
4.7 KiB
C++

// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2018-2019, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
#ifndef __OPENCV_DNN_OP_INF_ENGINE_HPP__
#define __OPENCV_DNN_OP_INF_ENGINE_HPP__
#include "opencv2/core/cvdef.h"
#include "opencv2/core/cvstd.hpp"
#include "opencv2/dnn.hpp"
#include "opencv2/core/async.hpp"
#include "opencv2/core/detail/async_promise.hpp"
#include "opencv2/dnn/utils/inference_engine.hpp"
#ifdef HAVE_INF_ENGINE
#define INF_ENGINE_RELEASE_2020_2 2020020000
#define INF_ENGINE_RELEASE_2020_3 2020030000
#define INF_ENGINE_RELEASE_2020_4 2020040000
#define INF_ENGINE_RELEASE_2021_1 2021010000
#define INF_ENGINE_RELEASE_2021_2 2021020000
#define INF_ENGINE_RELEASE_2021_3 2021030000
#define INF_ENGINE_RELEASE_2021_4 2021040000
#define INF_ENGINE_RELEASE_2022_1 2022010000
#define INF_ENGINE_RELEASE_2023_0 2023000000
#ifndef INF_ENGINE_RELEASE
#warning("IE version have not been provided via command-line. Using 2021.4 by default")
#define INF_ENGINE_RELEASE INF_ENGINE_RELEASE_2021_4
#endif
#define INF_ENGINE_VER_MAJOR_GT(ver) (((INF_ENGINE_RELEASE) / 10000) > ((ver) / 10000))
#define INF_ENGINE_VER_MAJOR_GE(ver) (((INF_ENGINE_RELEASE) / 10000) >= ((ver) / 10000))
#define INF_ENGINE_VER_MAJOR_LT(ver) (((INF_ENGINE_RELEASE) / 10000) < ((ver) / 10000))
#define INF_ENGINE_VER_MAJOR_LE(ver) (((INF_ENGINE_RELEASE) / 10000) <= ((ver) / 10000))
#define INF_ENGINE_VER_MAJOR_EQ(ver) (((INF_ENGINE_RELEASE) / 10000) == ((ver) / 10000))
#if defined(__GNUC__) && __GNUC__ >= 5
//#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wsuggest-override"
#endif
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2022_1)
#include <openvino/openvino.hpp>
#include <openvino/pass/serialize.hpp>
#include <openvino/pass/convert_fp32_to_fp16.hpp>
#else
#include <inference_engine.hpp>
#endif
#if defined(__GNUC__) && __GNUC__ >= 5
//#pragma GCC diagnostic pop
#endif
#endif // HAVE_INF_ENGINE
#define CV_ERROR_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 do { CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support (legacy API is not supported anymore)"); } while (0)
namespace cv { namespace dnn {
CV__DNN_INLINE_NS_BEGIN
namespace openvino {
// TODO: use std::string as parameter
bool checkTarget(Target target);
} // namespace openvino
CV__DNN_INLINE_NS_END
#ifdef HAVE_INF_ENGINE
Backend& getInferenceEngineBackendTypeParam();
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2022_1)
Mat infEngineBlobToMat(const ov::Tensor& blob);
void infEngineBlobsToMats(const ov::TensorVector& blobs,
std::vector<Mat>& mats);
#else
Mat infEngineBlobToMat(const InferenceEngine::Blob::Ptr& blob);
void infEngineBlobsToMats(const std::vector<InferenceEngine::Blob::Ptr>& blobs,
std::vector<Mat>& mats);
#endif // OpenVINO >= 2022.1
CV__DNN_INLINE_NS_BEGIN
void switchToOpenVINOBackend(Net& net);
bool isMyriadX();
bool isArmComputePlugin();
CV__DNN_INLINE_NS_END
// A series of wrappers for classes from OpenVINO API 2.0.
// Need just for less conditional compilation inserts.
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2022_1)
namespace InferenceEngine {
class CNNNetwork {
public:
CNNNetwork();
CNNNetwork(std::shared_ptr<ov::Model> model);
std::shared_ptr<ov::Model> getFunction() const;
void serialize(const std::string& xmlPath, const std::string& binPath);
void reshape(const std::map<std::string, std::vector<size_t> >& shapes);
private:
std::shared_ptr<ov::Model> model = nullptr;
};
typedef ov::InferRequest InferRequest;
class ExecutableNetwork : public ov::CompiledModel {
public:
ExecutableNetwork();
ExecutableNetwork(const ov::CompiledModel& copy);
ov::InferRequest CreateInferRequest();
};
class Core : public ov::Core {
public:
std::vector<std::string> GetAvailableDevices();
void UnregisterPlugin(const std::string& id);
CNNNetwork ReadNetwork(const std::string& xmlPath, const std::string& binPath);
ExecutableNetwork LoadNetwork(CNNNetwork net, const std::string& device,
const std::map<std::string, std::string>& config);
};
}
#endif // OpenVINO >= 2022.1
InferenceEngine::Core& getCore(const std::string& id);
template<typename T = size_t>
static inline std::vector<T> getShape(const Mat& mat)
{
std::vector<T> result(mat.dims);
for (int i = 0; i < mat.dims; i++)
result[i] = (T)mat.size[i];
return result;
}
#endif // HAVE_INF_ENGINE
}} // namespace dnn, namespace cv
#endif // __OPENCV_DNN_OP_INF_ENGINE_HPP__