Merge pull request #24584 from TolyaTalamanov:at/implement-inference-only-mode-for-ov-backend

G-API: Implement inference only mode for OV backend #24584 ### Changes overview Introduced `cv::gapi::wip::ov::benchmark_mode{}` compile argument which if enabled force `OpenVINO` backend to run only inference without populating input and copying back output tensors. This mode is only relevant for measuring the performance of pure inference without data transfers. Similar approach is using on OpenVINO side in `benchmark_app`: https://github.com/openvinotoolkit/openvino/blob/master/samples/cpp/benchmark_app/benchmark_app.hpp#L134-L139 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
2025-08-06 14:36:36 +08:00 · 2023-11-29 14:40:45 +00:00 · 2023-11-29 14:40:45 +00:00 · 79797a3eb6
commit 79797a3eb6
parent 7833c63388
3 changed files with 73 additions and 9 deletions
--- a/modules/gapi/include/opencv2/gapi/infer/ov.hpp
+++ b/modules/gapi/include/opencv2/gapi/infer/ov.hpp
@ -679,7 +679,32 @@ protected:
 };

 } // namespace ov
+
+namespace wip { namespace ov {
+/**
+ * @brief Ask G-API OpenVINO backend to run only inference of model provided.
+ *
+ * G-API OpenVINO backend will perform only the inference of the model provided
+ * without populating input and copying back output data.
+ * This mode is used to evaluate the pure inference performance of the model without
+ * taking into account the i/o data transfer.
+ */
+struct benchmark_mode { };
+/** @} */
+
+} // namespace ov
+} // namespace wip
+
 } // namespace gapi
+
+namespace detail
+{
+    template<> struct CompileArgTag<cv::gapi::wip::ov::benchmark_mode>
+    {
+        static const char* tag() { return "gapi.wip.ov.benchmark_mode"; }
+    };
+}
+
 } // namespace cv

 #endif // OPENCV_GAPI_INFER_OV_HPP
--- a/modules/gapi/src/backends/ov/govbackend.cpp
+++ b/modules/gapi/src/backends/ov/govbackend.cpp
@ -252,7 +252,8 @@ public:
                  const std::vector<cv::gimpl::RcDesc>              &  outs,
                  cv::GRunArg::Meta                                 && meta,
                  std::vector<cv::gimpl::GIslandExecutable::InObj>  && input_objs,
-                  std::vector<cv::gimpl::GIslandExecutable::OutObj> && output_objs);
+                  std::vector<cv::gimpl::GIslandExecutable::OutObj> && output_objs,
+                  const cv::gimpl::ov::Options                      &  options);

    const cv::GArgs& inArgs() const;

@ -281,6 +282,9 @@ public:
    std::exception_ptr eptr;

    const cv::GRunArg::Meta& getMeta() { return m_meta; };
+
+    const cv::gimpl::ov::Options& getOptions() const { return m_options; };
+
 private:
    cv::detail::VectorRef& outVecRef(std::size_t idx);

@ -301,6 +305,8 @@ private:
    // Input parameters passed to an inference operation.
    cv::GArgs m_args;
    cv::GShapes m_in_shapes;
+
+    cv::gimpl::ov::Options m_options;
 };

 OVCallContext::OVCallContext(const OVUnit                                      &  unit,
@ -309,9 +315,11 @@ OVCallContext::OVCallContext(const OVUnit                                      &
                             const std::vector<cv::gimpl::RcDesc>              &  outs,
                             cv::GRunArg::Meta                                 && meta,
                             std::vector<cv::gimpl::GIslandExecutable::InObj>  && input_objs,
-                             std::vector<cv::gimpl::GIslandExecutable::OutObj> && output_objs)
+                             std::vector<cv::gimpl::GIslandExecutable::OutObj> && output_objs,
+                             const cv::gimpl::ov::Options                      &  options)
 : uu(unit), out(output), m_meta(std::move(meta)),
-  m_input_objs(std::move(input_objs)), m_output_objs(std::move(output_objs))
+  m_input_objs(std::move(input_objs)), m_output_objs(std::move(output_objs)),
+  m_options(options)
 {
    for (auto& it : m_input_objs)  cv::gimpl::magazine::bindInArg (m_res, it.first, it.second);
    for (auto& it : m_output_objs) cv::gimpl::magazine::bindOutArg(m_res, it.first, it.second);
@ -577,9 +585,10 @@ static void PostOutputs(::ov::InferRequest             &infer_request,

    ctx->eptr = std::move(eptr);
    for (auto i : ade::util::iota(ctx->uu.params.num_out)) {
-        // NB: Copy data back only if execution finished sucessfuly.
-        // Otherwise just post outputs to keep streaming executor contract.
-        if (!ctx->eptr) {
+        // NB: Copy data back only if execution finished sucessfuly
+        // and inference only mode is disabled.
+        // Otherwise just post outputs to maintain streaming executor contract.
+        if (!ctx->eptr && !ctx->getOptions().inference_only) {
            const auto& out_name = ctx->uu.params.output_names[i];
            copyFromOV(infer_request.get_tensor(out_name),
                       ctx->outMatR(i));
@ -990,6 +999,11 @@ struct Infer: public cv::detail::KernelTag {
        reqPool.getIdleRequest()->execute(
                IInferExecutor::Task {
                    [ctx](::ov::InferRequest &infer_request) {
+                        // NB: No need to populate model inputs with data
+                        // if it's inference only mode.
+                        if (ctx->getOptions().inference_only) {
+                            return;
+                        }
                        for (auto i : ade::util::iota(ctx->uu.params.num_in)) {
                            const auto& input_name = ctx->uu.params.input_names[i];
                            auto input_tensor = infer_request.get_tensor(input_name);
@ -1069,6 +1083,10 @@ struct InferROI: public cv::detail::KernelTag {
    static void run(std::shared_ptr<OVCallContext> ctx,
                    cv::gimpl::ov::RequestPool     &reqPool) {
        using namespace std::placeholders;
+        if (ctx->getOptions().inference_only) {
+            cv::util::throw_error(
+                    std::logic_error("OV Backend: Inference only mode is not supported for InferROI!"));
+        }
        reqPool.getIdleRequest()->execute(
            IInferExecutor::Task {
                [ctx](::ov::InferRequest &infer_request) {
@ -1141,6 +1159,10 @@ struct InferList: public cv::detail::KernelTag {

    static void run(std::shared_ptr<OVCallContext> ctx,
                    cv::gimpl::ov::RequestPool     &reqPool) {
+        if (ctx->getOptions().inference_only) {
+            cv::util::throw_error(
+                    std::logic_error("OV Backend: Inference only mode is not supported for InferList!"));
+        }
        const auto& in_roi_vec = ctx->inArg<cv::detail::VectorRef>(0u).rref<cv::Rect>();
        // NB: In case there is no input data need to post output anyway
        if (in_roi_vec.empty()) {
@ -1257,6 +1279,10 @@ struct InferList2: public cv::detail::KernelTag {

    static void run(std::shared_ptr<OVCallContext> ctx,
                    cv::gimpl::ov::RequestPool     &reqPool) {
+        if (ctx->getOptions().inference_only) {
+            cv::util::throw_error(
+                    std::logic_error("OV Backend: Inference only mode is not supported for InferList2!"));
+        }
        GAPI_Assert(ctx->inArgs().size() > 1u
                && "This operation must have at least two arguments");
        // NB: This blob will be used to make roi from its, so
@ -1348,9 +1374,9 @@ class GOVBackendImpl final: public cv::gapi::GBackend::Priv {
    }

    virtual EPtr compile(const ade::Graph &graph,
-                         const cv::GCompileArgs &,
+                         const cv::GCompileArgs &compileArgs,
                         const std::vector<ade::NodeHandle> &nodes) const override {
-        return EPtr{new cv::gimpl::ov::GOVExecutable(graph, nodes)};
+        return EPtr{new cv::gimpl::ov::GOVExecutable(graph, compileArgs, nodes)};
    }

    virtual cv::GKernelPackage auxiliaryKernels() const override {
@ -1391,9 +1417,12 @@ createInferRequests(::ov::CompiledModel &compiled_model,

 // GOVExecutable implementation //////////////////////////////////////////////
 cv::gimpl::ov::GOVExecutable::GOVExecutable(const ade::Graph &g,
+                                            const cv::GCompileArgs &compileArgs,
                                            const std::vector<ade::NodeHandle> &nodes)
    : m_g(g), m_gm(m_g) {

+    m_options.inference_only =
+        cv::gapi::getCompileArg<cv::gapi::wip::ov::benchmark_mode>(compileArgs).has_value();
    // FIXME: Currently this backend is capable to run a single inference node only.
    // Need to extend our island fusion with merge/not-to-merge decision making parametrization
    GConstGOVModel ovm(g);
@ -1471,7 +1500,7 @@ void cv::gimpl::ov::GOVExecutable::run(cv::gimpl::GIslandExecutable::IInput  &in
    const auto &op = m_gm.metadata(this_nh).get<Op>();

    auto ctx = std::make_shared<OVCallContext>(uu, out, op.args, op.outs,
-            std::move(stub_meta), std::move(input_objs), std::move(output_objs));
+            std::move(stub_meta), std::move(input_objs), std::move(output_objs), m_options);

    const auto &kk = giem.metadata(this_nh).get<OVCallable>();

--- a/modules/gapi/src/backends/ov/govbackend.hpp
+++ b/modules/gapi/src/backends/ov/govbackend.hpp
@ -26,6 +26,12 @@ struct OVCompiled {

 class RequestPool;

+struct Options {
+    // Only performs inference of the model
+    // without i/o data transfer if enabled.
+    bool inference_only = false;
+};
+
 class GOVExecutable final: public GIslandExecutable
 {
    const ade::Graph &m_g;
@ -42,8 +48,12 @@ class GOVExecutable final: public GIslandExecutable
    // To manage multiple async requests
    std::unique_ptr<RequestPool> m_reqPool;

+    // To manage additional execution options
+    Options m_options;
+
 public:
    GOVExecutable(const ade::Graph                   &graph,
+                  const cv::GCompileArgs             &compileArgs,
                  const std::vector<ade::NodeHandle> &nodes);

    virtual inline bool canReshape() const override { return false; }