From ebea65777f30e0090807d410b8ab878517795a32 Mon Sep 17 00:00:00 2001 From: Daria Mityagina Date: Wed, 24 Apr 2024 16:25:00 +0300 Subject: [PATCH] Merge pull request #24938 from DariaMityagina:icv/dm/add-media-frame-support-to-govbackend G-API OV backend requires cv::MediaFrame #24938 ### Pull Request Readiness Checklist **Background_subtraction demo G-API issue. Update:** Porting to API20 resulted in an error (both for CPU and NPU): ``` [ERROR] OpenCV(4.9.0-dev) /home/runner/work/open_model_zoo/open_model_zoo/cache/opencv/modules/gapi/src/backends/ov/govbackend.cpp:813: error: (-215: assertion not done ) cv::util::holds_alternative(input_meta) in function 'cfgPreProcessing' ``` Adding cv::MediaFrame support to govbackend resulted in the following (tested with CPU): image ### TODO - [ ] **As part of the review process [this comment](https://github.com/opencv/opencv/pull/24938#discussion_r1487694043) was addressed which make it impossible to run the demo. I will bring those changes back in a separate PR [support `PartialShape`]** See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake --- modules/gapi/src/backends/ov/govbackend.cpp | 166 ++++++++++++++---- .../gapi/test/infer/gapi_infer_ov_tests.cpp | 166 ++++++++++++++++++ 2 files changed, 301 insertions(+), 31 deletions(-) diff --git a/modules/gapi/src/backends/ov/govbackend.cpp b/modules/gapi/src/backends/ov/govbackend.cpp index abbe5f9f5b..6b26144099 100644 --- a/modules/gapi/src/backends/ov/govbackend.cpp +++ b/modules/gapi/src/backends/ov/govbackend.cpp @@ -129,7 +129,7 @@ static int toCV(const ov::element::Type &type) { static void copyFromOV(const ov::Tensor &tensor, cv::Mat &mat) { const auto total = mat.total() * mat.channels(); if (toCV(tensor.get_element_type()) != mat.depth() || - tensor.get_size() != total ) { + tensor.get_size() != total) { std::stringstream ss; ss << "Failed to copy data from ov::Tensor to cv::Mat." << " Data type or number of elements mismatch." @@ -151,6 +151,30 @@ static void copyFromOV(const ov::Tensor &tensor, cv::Mat &mat) { } } +cv::Mat wrapOV(const cv::MediaFrame::View& view, + const cv::GFrameDesc& desc) { + cv::Mat out; + switch (desc.fmt) { + case cv::MediaFormat::BGR: { + out = cv::Mat(desc.size, CV_8UC3, view.ptr[0], view.stride[0]); + return out; + } + case cv::MediaFormat::NV12: { + auto y_plane = cv::Mat(desc.size, CV_8UC1, view.ptr[0], view.stride[0]); + auto uv_plane = cv::Mat(desc.size / 2, CV_8UC2, view.ptr[1], view.stride[1]); + cvtColorTwoPlane(y_plane, uv_plane, out, cv::COLOR_YUV2BGR_NV12); + return out; + } + case cv::MediaFormat::GRAY: { + out = cv::Mat(desc.size, CV_8UC1, view.ptr[0], view.stride[0]); + return out; + } + default: + GAPI_Error("OV Backend: Unsupported media format"); + } + return out; +} + static void copyToOV(const cv::Mat &mat, ov::Tensor &tensor) { // TODO: Ideally there should be check that mat and tensor // dimensions are compatible. @@ -177,6 +201,12 @@ static void copyToOV(const cv::Mat &mat, ov::Tensor &tensor) { } } +static void copyToOV(const cv::MediaFrame &frame, ov::Tensor &tensor) { + const auto view = cv::MediaFrame::View(frame.access(cv::MediaFrame::Access::R)); + auto matFromFrame = wrapOV(view, frame.desc()); + copyToOV(matFromFrame, tensor); +} + std::vector cv::gapi::ov::util::to_ocv(const ::ov::Shape &shape) { return toCV(shape); } @@ -269,8 +299,9 @@ public: } // Syntax sugar - cv::GShape inShape(std::size_t input) const; - const cv::Mat& inMat (std::size_t input) const; + cv::GShape inShape (std::size_t input) const; + const cv::Mat& inMat (std::size_t input) const; + const cv::MediaFrame& inFrame (std::size_t input) const; cv::GRunArgP output (std::size_t idx); cv::Mat& outMatR(std::size_t idx); @@ -355,6 +386,10 @@ const cv::Mat& OVCallContext::inMat(std::size_t input) const { return inArg(input); } +const cv::MediaFrame& OVCallContext::inFrame(std::size_t input) const { + return inArg(input); +} + cv::Mat& OVCallContext::outMatR(std::size_t idx) { return *cv::util::get(m_results.at(idx)); } @@ -394,6 +429,8 @@ cv::GArg OVCallContext::packArg(const cv::GArg &arg) { // (and constructed by either bindIn/Out or resetInternal) case cv::GShape::GOPAQUE: return cv::GArg(m_res.slot().at(ref.id)); + case cv::GShape::GFRAME: return cv::GArg(m_res.slot()[ref.id]); + default: cv::util::throw_error(std::logic_error("Unsupported GShape type")); break; @@ -655,6 +692,19 @@ void PostOutputsList::operator()(::ov::InferRequest &infer_request, } } +static void copyToOV(std::shared_ptr ctx, uint32_t input_idx, ov::Tensor &tensor) { + switch (ctx->inShape(input_idx)) { + case cv::GShape::GMAT: + copyToOV(ctx->inMat(input_idx), tensor); + break; + case cv::GShape::GFRAME: + copyToOV(ctx->inFrame(input_idx), tensor); + break; + default: + GAPI_Assert("Unsupported input shape for OV backend"); + } +} + namespace cv { namespace gimpl { namespace ov { @@ -730,6 +780,37 @@ static cv::Mat preprocess(const cv::Mat &in_mat, return out; } +// NB: This function is used to preprocess input image +// for InferROI, InferList, InferList2 kernels. +cv::Mat preprocess(MediaFrame::View& view, + const cv::GFrameDesc& desc, + const cv::Rect& roi, + const ::ov::Shape &model_shape) { + return preprocess(wrapOV(view, desc), roi, model_shape); +} + +static void preprocess_and_copy(std::shared_ptr ctx, + uint32_t input_idx, + const cv::Rect &roi, + const ::ov::Shape &model_shape, + ::ov::Tensor& tensor) { + switch (ctx->inShape(input_idx)) { + case cv::GShape::GMAT: { + auto roi_mat = preprocess(ctx->inMat(input_idx), roi, model_shape); + copyToOV(roi_mat, tensor); + break; + } + case cv::GShape::GFRAME: { + auto currentFrame = ctx->inFrame(input_idx); + auto view = cv::MediaFrame::View(currentFrame.access(cv::MediaFrame::Access::R)); + auto roi_mat = preprocess(view, currentFrame.desc(), roi, model_shape); + copyToOV(roi_mat, tensor); + } + default: + GAPI_Assert("Unsupported input shape for OV backend"); + } +} + static bool isImage(const cv::GMatDesc &desc, const ::ov::Shape &model_shape) { return (model_shape.size() == 4u) && @@ -739,6 +820,16 @@ static bool isImage(const cv::GMatDesc &desc, (desc.depth == CV_8U); } +static bool isImage(const cv::GMetaArg &meta, + const ::ov::Shape &shape) { + if (cv::util::holds_alternative(meta)) { + return true; + } + GAPI_Assert(cv::util::holds_alternative(meta)); + auto matdesc = cv::util::get(meta); + return isImage(matdesc, shape); +} + class PrePostProcWrapper { public: PrePostProcWrapper(std::shared_ptr<::ov::Model> &model, @@ -821,9 +912,8 @@ public: void cfgPreProcessing(const std::string &input_name, const cv::GMetaArg &input_meta, const bool disable_img_resize = false) { - GAPI_Assert(cv::util::holds_alternative(input_meta)); - const auto &matdesc = cv::util::get(input_meta); - + GAPI_Assert(cv::util::holds_alternative(input_meta) || + cv::util::holds_alternative(input_meta)); const auto explicit_in_tensor_layout = lookUp(m_input_tensor_layout, input_name); const auto explicit_in_model_layout = lookUp(m_input_model_layout, input_name); const auto explicit_resize = lookUp(m_interpolation, input_name); @@ -838,24 +928,35 @@ public: const auto &input_shape = m_model->input(input_name).get_shape(); auto &input_info = m_ppp.input(input_name); - m_ppp.input(input_name).tensor().set_element_type(toOV(matdesc.depth)); - if (isImage(matdesc, input_shape)) { + auto isMat = cv::util::holds_alternative(input_meta); + auto prec = isMat ? cv::util::get(input_meta).depth : CV_8U; + m_ppp.input(input_name).tensor().set_element_type(toOV(prec)); + + const auto &matdesc = isMat ? cv::util::get(input_meta) : cv::GMatDesc(); + const auto &framedesc = !isMat ? cv::util::get(input_meta) : cv::GFrameDesc(); + if (isImage(input_meta, input_shape)) { // NB: Image case - all necessary preprocessng is configured automatically. GAPI_LOG_DEBUG(NULL, "OV Backend: Input: \"" << input_name << "\" is image."); - if (explicit_in_tensor_layout && - *explicit_in_tensor_layout != "NHWC") { + if (explicit_in_tensor_layout && *explicit_in_tensor_layout != "NHWC") { + std::stringstream desc_str; + if (isMat) { + desc_str << matdesc; + } else { + desc_str << framedesc; + } std::stringstream ss; ss << "OV Backend: Provided tensor layout " << *explicit_in_tensor_layout - << " is not compatible with input data " << matdesc << " for layer \"" - << input_name << "\". Expecting NHWC"; + << " is not compatible with input data " << desc_str.str() << " for layer \"" + << input_name << "\". Expecting NHWC"; util::throw_error(std::logic_error(ss.str())); } else { input_info.tensor().set_layout(::ov::Layout("NHWC")); } if (!disable_img_resize) { - input_info.tensor().set_spatial_static_shape(matdesc.size.height, - matdesc.size.width); + const auto size = isMat ? cv::util::get(input_meta).size : cv::util::get(input_meta).size; + input_info.tensor().set_spatial_static_shape(size.height, + size.width); // NB: Even though resize is automatically configured // user have an opportunity to specify the interpolation algorithm. auto interp = explicit_resize @@ -877,8 +978,8 @@ public: if (!explicit_in_tensor_layout && model_layout.empty()) { std::stringstream ss; ss << "Resize for input layer: " << input_name - << "can't be configured." - << " Failed to extract H and W positions from layout."; + << "can't be configured." + << " Failed to extract H and W positions from layout."; util::throw_error(std::logic_error(ss.str())); } else { const auto layout = explicit_in_tensor_layout @@ -982,7 +1083,6 @@ struct Infer: public cv::detail::KernelTag { ade::util::toRange(in_metas))) { const auto &input_name = std::get<0>(it); const auto &mm = std::get<1>(it); - ppp.cfgLayouts(input_name); ppp.cfgPreProcessing(input_name, mm); ppp.cfgScaleMean(input_name, mm); @@ -1025,7 +1125,7 @@ struct Infer: public cv::detail::KernelTag { auto input_tensor = infer_request.get_tensor(input_name); // TODO: In some cases wrapping existing data pointer // might be faster than copy. Make it a strategy. - copyToOV(ctx->inMat(i), input_tensor); + copyToOV(ctx, i, input_tensor); } }, std::bind(PostOutputs, _1, _2, ctx) @@ -1054,13 +1154,13 @@ struct InferROI: public cv::detail::KernelTag { const auto &input_name = uu.params.input_names.at(0); const auto &mm = in_metas.at(1u); - GAPI_Assert(cv::util::holds_alternative(mm)); - const auto &matdesc = cv::util::get(mm); - + GAPI_Assert(cv::util::holds_alternative(mm) || + cv::util::holds_alternative(mm)); const bool is_model = cv::util::holds_alternative(uu.params.kind); const auto &input_shape = is_model ? uu.model->input(input_name).get_shape() : uu.compiled_model.input(input_name).get_shape(); - if (!isImage(matdesc, input_shape)) { + + if (!isImage(mm, input_shape)) { util::throw_error(std::runtime_error( "OV Backend: InferROI supports only image as the 1th argument")); } @@ -1111,8 +1211,7 @@ struct InferROI: public cv::detail::KernelTag { auto input_tensor = infer_request.get_tensor(input_name); const auto &shape = input_tensor.get_shape(); const auto &roi = ctx->inArg(0).rref(); - const auto roi_mat = preprocess(ctx->inMat(1), roi, shape); - copyToOV(roi_mat, input_tensor); + preprocess_and_copy(ctx, 1, roi, shape, input_tensor); }, std::bind(PostOutputs, _1, _2, ctx) } @@ -1147,11 +1246,11 @@ struct InferList: public cv::detail::KernelTag { size_t idx = 1u; for (auto &&input_name : uu.params.input_names) { const auto &mm = in_metas[idx++]; - GAPI_Assert(cv::util::holds_alternative(mm)); - const auto &matdesc = cv::util::get(mm); + GAPI_Assert(cv::util::holds_alternative(mm) || + cv::util::holds_alternative(mm)); const auto &input_shape = uu.model->input(input_name).get_shape(); - if (!isImage(matdesc, input_shape)) { + if (!isImage(mm, input_shape)) { util::throw_error(std::runtime_error( "OV Backend: Only image is supported" " as the " + std::to_string(idx) + "th argument for InferList")); @@ -1208,8 +1307,7 @@ struct InferList: public cv::detail::KernelTag { const auto &input_name = ctx->uu.params.input_names[0]; auto input_tensor = infer_request.get_tensor(input_name); const auto &shape = input_tensor.get_shape(); - const auto roi_mat = preprocess(ctx->inMat(1), rc, shape); - copyToOV(roi_mat, input_tensor); + preprocess_and_copy(ctx, 1, rc, shape, input_tensor); }, std::bind(callback, std::placeholders::_1, std::placeholders::_2, pos) } @@ -1247,12 +1345,18 @@ struct InferList2: public cv::detail::KernelTag { const auto &input_name_0 = uu.params.input_names.front(); const auto &mm_0 = in_metas[0u]; - const auto &matdesc = cv::util::get(mm_0); + + if (!(cv::util::holds_alternative(mm_0) || + cv::util::holds_alternative(mm_0))) { + util::throw_error(std::runtime_error( + "OV Backend: Unsupported input meta" + " for 0th argument in OV backend")); + } const bool is_model = cv::util::holds_alternative(uu.params.kind); const auto &input_shape = is_model ? uu.model->input(input_name_0).get_shape() : uu.compiled_model.input(input_name_0).get_shape(); - if (!isImage(matdesc, input_shape)) { + if (!isImage(mm_0, input_shape)) { util::throw_error(std::runtime_error( "OV Backend: InferList2 supports only image as the 0th argument")); } diff --git a/modules/gapi/test/infer/gapi_infer_ov_tests.cpp b/modules/gapi/test/infer/gapi_infer_ov_tests.cpp index 8a15d5e741..49652db387 100644 --- a/modules/gapi/test/infer/gapi_infer_ov_tests.cpp +++ b/modules/gapi/test/infer/gapi_infer_ov_tests.cpp @@ -319,8 +319,174 @@ struct TestAgeGenderListOV : public BaseAgeGenderOV { } }; +class TestMediaBGR final: public cv::MediaFrame::IAdapter { + cv::Mat m_mat; + using Cb = cv::MediaFrame::View::Callback; + Cb m_cb; + +public: + explicit TestMediaBGR(cv::Mat m, Cb cb = [](){}) + : m_mat(m), m_cb(cb) { + } + cv::GFrameDesc meta() const override { + return cv::GFrameDesc{cv::MediaFormat::BGR, cv::Size(m_mat.cols, m_mat.rows)}; + } + cv::MediaFrame::View access(cv::MediaFrame::Access) override { + cv::MediaFrame::View::Ptrs pp = { m_mat.ptr(), nullptr, nullptr, nullptr }; + cv::MediaFrame::View::Strides ss = { m_mat.step, 0u, 0u, 0u }; + return cv::MediaFrame::View(std::move(pp), std::move(ss), Cb{m_cb}); + } +}; + +struct MediaFrameTestAgeGenderOV: public ::testing::Test { + MediaFrameTestAgeGenderOV() { + initDLDTDataPath(); + xml_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + bin_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); + device = "CPU"; + blob_path = "age-gender-recognition-retail-0013.blob"; + + cv::Size sz{62, 62}; + m_in_mat = cv::Mat(sz, CV_8UC3); + cv::resize(m_in_mat, m_in_mat, sz); + + m_in_y = cv::Mat{sz, CV_8UC1}; + cv::randu(m_in_y, 0, 255); + m_in_uv = cv::Mat{sz / 2, CV_8UC2}; + cv::randu(m_in_uv, 0, 255); + } + + cv::Mat m_in_y; + cv::Mat m_in_uv; + + cv::Mat m_in_mat; + + cv::Mat m_out_ov_age; + cv::Mat m_out_ov_gender; + + cv::Mat m_out_gapi_age; + cv::Mat m_out_gapi_gender; + + std::string xml_path; + std::string bin_path; + std::string blob_path; + std::string device; + std::string image_path; + + using AGInfo = std::tuple; + G_API_NET(AgeGender, , "typed-age-gender"); + + void validate() { + normAssert(m_out_ov_age, m_out_gapi_age, "0: Test age output"); + normAssert(m_out_ov_gender, m_out_gapi_gender, "0: Test gender output"); + } +}; // MediaFrameTestAgeGenderOV + } // anonymous namespace +TEST_F(MediaFrameTestAgeGenderOV, InferMediaInputBGR) +{ + // OpenVINO + AGNetOVComp ref(xml_path, bin_path, device); + ref.cfgPrePostProcessing([](ov::preprocess::PrePostProcessor &ppp) { + ppp.input().tensor().set_element_type(ov::element::u8); + ppp.input().tensor().set_layout("NHWC"); + }); + ref.compile()(m_in_mat, m_out_ov_age, m_out_ov_gender); + + // G-API + cv::GFrame in; + cv::GMat age, gender; + std::tie(age, gender) = cv::gapi::infer(in); + cv::GComputation comp{cv::GIn(in), cv::GOut(age, gender)}; + + auto frame = MediaFrame::Create(m_in_mat); + auto pp = cv::gapi::ov::Params { + xml_path, bin_path, device + }.cfgOutputLayers({ "age_conv3", "prob" }); + + comp.apply(cv::gin(frame), + cv::gout(m_out_gapi_age, m_out_gapi_gender), + cv::compile_args(cv::gapi::networks(pp))); + + validate(); +} + +TEST_F(MediaFrameTestAgeGenderOV, InferROIGenericMediaInputBGR) { + // OpenVINO + cv::Rect roi(cv::Rect(cv::Point{20, 25}, cv::Size{16, 16})); + auto frame = MediaFrame::Create(m_in_mat); + static constexpr const char* tag = "age-gender-generic"; + + // OpenVINO + AGNetOVComp ref(xml_path, bin_path, device); + ref.cfgPrePostProcessing([](ov::preprocess::PrePostProcessor &ppp) { + ppp.input().tensor().set_element_type(ov::element::u8); + ppp.input().tensor().set_layout("NHWC"); + }); + ref.compile()(m_in_mat, roi, m_out_ov_age, m_out_ov_gender); + + // G-API + cv::GFrame in; + cv::GOpaque rr; + GInferInputs inputs; + inputs["data"] = in; + auto outputs = cv::gapi::infer(tag, rr, inputs); + auto age = outputs.at("age_conv3"); + auto gender = outputs.at("prob"); + cv::GComputation comp{cv::GIn(in, rr), cv::GOut(age, gender)}; + + auto pp = AGNetROIGenComp::params(xml_path, bin_path, device); + + comp.apply(cv::gin(frame, roi), cv::gout(m_out_gapi_age, m_out_gapi_gender), + cv::compile_args(cv::gapi::networks(pp))); + + validate(); +} + +class TestMediaNV12 final: public cv::MediaFrame::IAdapter { + cv::Mat m_y; + cv::Mat m_uv; + +public: + TestMediaNV12(cv::Mat y, cv::Mat uv) : m_y(y), m_uv(uv) { + } + cv::GFrameDesc meta() const override { + return cv::GFrameDesc{cv::MediaFormat::NV12, cv::Size(m_y.cols, m_y.rows)}; + } + cv::MediaFrame::View access(cv::MediaFrame::Access) override { + cv::MediaFrame::View::Ptrs pp = { + m_y.ptr(), m_uv.ptr(), nullptr, nullptr + }; + cv::MediaFrame::View::Strides ss = { + m_y.step, m_uv.step, 0u, 0u + }; + return cv::MediaFrame::View(std::move(pp), std::move(ss)); + } +}; + +TEST_F(MediaFrameTestAgeGenderOV, TestMediaNV12AgeGenderOV) +{ + cv::GFrame in; + cv::GOpaque rr; + GInferInputs inputs; + inputs["data"] = in; + static constexpr const char* tag = "age-gender-generic"; + auto outputs = cv::gapi::infer(tag, rr, inputs); + auto age = outputs.at("age_conv3"); + auto gender = outputs.at("prob"); + cv::GComputation comp{cv::GIn(in, rr), cv::GOut(age, gender)}; + + auto frame = MediaFrame::Create(m_in_y, m_in_uv); + auto pp = AGNetROIGenComp::params(xml_path, bin_path, device); + + cv::Rect roi(cv::Rect(cv::Point{20, 25}, cv::Size{16, 16})); + + EXPECT_NO_THROW(comp.apply(cv::gin(frame, roi), + cv::gout(m_out_gapi_age, m_out_gapi_gender), + cv::compile_args(cv::gapi::networks(pp)))); +} + // TODO: Make all of tests below parmetrized to avoid code duplication TEST_F(TestAgeGenderOV, Infer_Tensor) { const auto in_mat = getRandomTensor({1, 3, 62, 62}, CV_32F);