diff --git a/modules/gapi/include/opencv2/gapi/infer/ie.hpp b/modules/gapi/include/opencv2/gapi/infer/ie.hpp index 470d50ac98..b403479ca2 100644 --- a/modules/gapi/include/opencv2/gapi/infer/ie.hpp +++ b/modules/gapi/include/opencv2/gapi/infer/ie.hpp @@ -2,7 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2019-2021 Intel Corporation +// Copyright (C) 2019-2023 Intel Corporation #ifndef OPENCV_GAPI_INFER_IE_HPP #define OPENCV_GAPI_INFER_IE_HPP @@ -55,6 +55,21 @@ using IEConfig = std::map; enum InferMode {Sync, Async}; namespace detail { + +template +using AttrMap = std::map; +// NB: This type is used to hold in/out layers +// attributes such as precision, layout, shape etc. +// +// User can provide attributes either: +// 1. cv::util::monostate - No value specified explicitly. +// 2. Attr - value specified explicitly that should be broadcasted to all layers. +// 3. AttrMap[str->T] - map specifies value for particular layer. +template +using LayerVariantAttr = cv::util::variant< cv::util::monostate + , AttrMap + , Attr>; + struct ParamDesc { std::string model_path; std::string weights_path; @@ -103,7 +118,11 @@ struct ParamDesc { using PrecisionVariantT = cv::util::variant; + PrecisionVariantT output_precision; + LayerVariantAttr input_layout; + LayerVariantAttr output_layout; + LayerVariantAttr interpolation; }; } // namespace detail @@ -150,6 +169,9 @@ public: , {} , {} , InferMode::Async + , {} + , {} + , {} , {} } { }; @@ -176,6 +198,9 @@ public: , {} , {} , InferMode::Async + , {} + , {} + , {} , {} } { }; @@ -412,6 +437,80 @@ public: return *this; } + /** @brief Specifies the input layout for model. + + The function is used to set an input layout for model. + + @param layout Layout in string representation ("NCHW", "NHWC", etc) + will be applied to all input layers. + @return reference to this parameter structure. + */ + Params& cfgInputLayout(std::string layout) { + desc.input_layout = std::move(layout); + return *this; + } + + /** @overload + + @param layout_map Map of pairs: name of corresponding input layer + and its layout in string representation ("NCHW", "NHWC", etc) + @return reference to this parameter structure. + */ + Params& + cfgInputLayout(detail::AttrMap layout_map) { + desc.input_layout = std::move(layout_map); + return *this; + } + + /** @brief Specifies the output layout for model. + + The function is used to set an output layout for model. + + @param layout Layout in string representation ("NCHW", "NHWC", etc) + will be applied to all output layers. + @return reference to this parameter structure. + */ + Params& cfgOutputLayout(std::string layout) { + desc.output_layout = std::move(layout); + return *this; + } + + /** @overload + + @param layout_map Map of pairs: name of corresponding output layer + and its layout in string representation ("NCHW", "NHWC", etc) + @return reference to this parameter structure. + */ + Params& + cfgOutputLayout(detail::AttrMap layout_map) { + desc.output_layout = std::move(layout_map); + return *this; + } + + /** @brief Specifies resize interpolation algorithm. + * + The function is used to configure resize preprocessing for input layer. + + @param interpolation Resize interpolation algorithm. + Supported algorithms: #INTER_LINEAR, #INTER_AREA. + @return reference to this parameter structure. + */ + Params& cfgResize(int interpolation) { + desc.interpolation = interpolation; + return *this; + } + + /** @overload + + @param interpolation Map of pairs: name of corresponding input layer + and its resize algorithm. + @return reference to this parameter structure. + */ + Params& cfgResize(detail::AttrMap interpolation) { + desc.interpolation = std::move(interpolation); + return *this; + } + // BEGIN(G-API's network parametrization API) GBackend backend() const { return cv::gapi::ie::backend(); } std::string tag() const { return Net::tag(); } @@ -446,7 +545,7 @@ public: const std::string &device) : desc{ model, weights, device, {}, {}, {}, 0u, 0u, detail::ParamDesc::Kind::Load, true, {}, {}, {}, 1u, - {}, {}, {}, {}, InferMode::Async, {} }, + {}, {}, {}, {}, InferMode::Async, {}, {}, {}, {} }, m_tag(tag) { }; @@ -464,7 +563,7 @@ public: const std::string &device) : desc{ model, {}, device, {}, {}, {}, 0u, 0u, detail::ParamDesc::Kind::Import, true, {}, {}, {}, 1u, - {}, {}, {}, {}, InferMode::Async, {} }, + {}, {}, {}, {}, InferMode::Async, {}, {}, {}, {} }, m_tag(tag) { }; @@ -556,6 +655,44 @@ public: return *this; } + /** @see ie::Params::cfgInputLayout */ + Params& cfgInputLayout(std::string layout) { + desc.input_layout = std::move(layout); + return *this; + } + + /** @overload */ + Params& + cfgInputLayout(detail::AttrMap layout_map) { + desc.input_layout = std::move(layout_map); + return *this; + } + + /** @see ie::Params::cfgOutputLayout */ + Params& cfgOutputLayout(std::string layout) { + desc.output_layout = std::move(layout); + return *this; + } + + /** @overload */ + Params& + cfgOutputLayout(detail::AttrMap layout_map) { + desc.output_layout = std::move(layout_map); + return *this; + } + + /** @see ie::Params::cfgResize */ + Params& cfgResize(int interpolation) { + desc.interpolation = interpolation; + return *this; + } + + /** @overload */ + Params& cfgResize(detail::AttrMap interpolation) { + desc.interpolation = std::move(interpolation); + return *this; + } + // BEGIN(G-API's network parametrization API) GBackend backend() const { return cv::gapi::ie::backend(); } std::string tag() const { return m_tag; } diff --git a/modules/gapi/src/backends/ie/giebackend.cpp b/modules/gapi/src/backends/ie/giebackend.cpp index 00ab3c5c40..a257c21252 100644 --- a/modules/gapi/src/backends/ie/giebackend.cpp +++ b/modules/gapi/src/backends/ie/giebackend.cpp @@ -2,7 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2018-2022 Intel Corporation +// Copyright (C) 2018-2023 Intel Corporation #include "precomp.hpp" @@ -71,6 +71,28 @@ namespace IE = InferenceEngine; namespace { +IE::Layout toIE(const std::string &layout) { + const std::unordered_map layouts = { + {"NCDHW", IE::Layout::NCDHW}, + {"NDHWC", IE::Layout::NDHWC}, + {"NHWC" , IE::Layout::NHWC }, + {"NCHW" , IE::Layout::NCHW }, + {"CHW" , IE::Layout::CHW }, + {"HWC" , IE::Layout::HWC }, + {"HW" , IE::Layout::HW }, + {"NC" , IE::Layout::NC }, + {"CN" , IE::Layout::CN }, + {"C" , IE::Layout::C }, + }; + + const auto it = layouts.find(layout); + if (it == layouts.end()) { + cv::util::throw_error( + std::logic_error("IE Backend: Unsupported layout: " + layout)); + } + return it->second; +}; + inline IE::ROI toIE(const cv::Rect &rc) { return IE::ROI { 0u @@ -130,52 +152,90 @@ inline int toCV(IE::Precision prec) { return -1; } -// NB: In short: Tensor - ND or 2D + precision != U8. -cv::gapi::ie::TraitAs clarifyTrait(const cv::GMatDesc &mat_desc, - const IE::TensorDesc &tensor_desc) { - // NB: This check does not include 2D matrices like {32, 16}, - // which also falls under the category of tensors. - // The reason for this is that RGB images are also considered 2D - // as the channel component is not taken into account in this case. - if (mat_desc.isND() || mat_desc.planar) { - return cv::gapi::ie::TraitAs::TENSOR; +inline IE::ResizeAlgorithm toIEInterp(int interpolation) { + switch (interpolation) { + case cv::INTER_LINEAR: return IE::RESIZE_BILINEAR; + case cv::INTER_AREA: return IE::RESIZE_AREA; + default: GAPI_Error("IE Backend: Unsupported resize algorithm"); } - // NB: If user provided 2D data in U8 precision - // and network expects NHWC/NCHW layout - data is image (most likely). - const auto layout = tensor_desc.getLayout(); - if (layout == IE::Layout::NCHW || layout == IE::Layout::NHWC) { - if (mat_desc.depth == CV_8U) { - return cv::gapi::ie::TraitAs::IMAGE; + // Unreachable code + GAPI_Assert(false); +} + +template +using AttrMap = cv::gapi::ie::detail::AttrMap; + +template +using LayerVariantAttr = cv::gapi::ie::detail::LayerVariantAttr; + +template AttrMap +broadcastLayerAttr(const LayerVariantAttr &layer_attr, + const std::vector &layer_names) { + AttrMap map; + if (cv::util::holds_alternative>(layer_attr)) { + map = cv::util::get>(layer_attr); + // NB: Validate map: + std::unordered_set existing_layers = + {layer_names.begin(), layer_names.end()}; + + for (const auto &p : map) { + const auto it = existing_layers.find(p.first); + if (it == existing_layers.end()) { + cv::util::throw_error( + std::logic_error("IE Backend: Failed to" + " find layer with name: " + p.first)); + } + } + } else if (cv::util::holds_alternative(layer_attr)) { + // NB: Broadcast value to all layers. + auto elem = cv::util::get(layer_attr); + for (auto &&layer_name : layer_names) { + map.emplace(layer_name, elem); } - // NB: 2D data with precision != U8 provided for 4D layout. - std::stringstream ss; - ss << "IE Backend: An inconsistency has been detected between" - " the provided data: " << mat_desc << - " and the network layout: " << layout; - cv::util::throw_error(std::logic_error(ss.str())); } - // NB: Otherwise trait is TENSOR. - // If there is an inconsistency between the data dimensions - // and the network layout, the "setBlob" will fail. + return map; +} + +// TODO: Move it to some common place +template +cv::optional lookUp(const std::map &map, const K& key) { + const auto it = map.find(key); + if (it == map.end()) { + return {}; + } + return cv::util::make_optional(std::move(it->second)); +} + +static bool isImage(const cv::GMatDesc &desc, + const IE::SizeVector &model_dims) { + return (model_dims.size() == 4u) && + (!desc.isND()) /* dims == 2 */ && + (desc.chan == 1 || desc.chan == 3) && + (desc.size.height != 1 && desc.size.width != 1) && + (desc.depth == CV_8U); +} + +cv::gapi::ie::TraitAs clarifyTrait(const cv::GMatDesc &mat_desc, + const IE::SizeVector &model_dims) { + if (isImage(mat_desc, model_dims)) { + return cv::gapi::ie::TraitAs::IMAGE; + } return cv::gapi::ie::TraitAs::TENSOR; } cv::gapi::ie::TraitAs clarifyTrait(const cv::GMetaArg &meta, - const IE::TensorDesc &tensor_desc) { + const IE::SizeVector &model_dims) { // NB: All media formats: BGR, NV12, Gray // are traited as image. if (cv::util::holds_alternative(meta)) { return cv::gapi::ie::TraitAs::IMAGE; } GAPI_Assert(cv::util::holds_alternative(meta)); - return clarifyTrait(cv::util::get(meta), tensor_desc); + return clarifyTrait(cv::util::get(meta), model_dims); } inline IE::TensorDesc toIE(const cv::Mat &mat, cv::gapi::ie::TraitAs hint) { const auto &sz = mat.size; - // NB: For some reason RGB image is 2D image - // (since channel component is not counted here). - // Note: regular 2D vectors also fall into this category if (sz.dims() == 2 && hint == cv::gapi::ie::TraitAs::IMAGE) { // NB: This logic is mainly taken from IE samples @@ -196,8 +256,72 @@ inline IE::TensorDesc toIE(const cv::Mat &mat, cv::gapi::ie::TraitAs hint) { return IE::TensorDesc(toIE(mat.depth()), toIE(sz), toIELayout(sz.dims())); } -inline IE::Blob::Ptr wrapIE(const cv::Mat &mat, cv::gapi::ie::TraitAs hint) { - const auto tDesc = toIE(mat, hint); +// NB: Inference dimmensions always follow NCDHW order +// even though the real layout is different. +// E.g if user provided Mat({1, 240, 320, 3}, CV_8U) + NHWC layout +// need to create Blob(U8, {1, 3, 240, 320}, NHWC). +inline IE::SizeVector toIEDims(const IE::SizeVector &dims, + const IE::Layout layout) { + switch (layout) { + case IE::Layout::NDHWC: // NCDHW + return {dims[0], dims[4], dims[1], dims[2], dims[3]}; + case IE::Layout::NHWC: // NCHW + return {dims[0], dims[3], dims[1], dims[2]}; + case IE::Layout::HWC: // CHW + return {dims[2], dims[0], dims[1]}; + default: return dims; + } + GAPI_Assert(false); +} + +// NB: Inference dimmensions always follow NCDHW order +// even though the real layout is different. +// E.g if U8 blob has {1, 3, 240, 320} dims and NHWC layout +// need to create cv::Mat({1, 240, 320, 3}, CV_8U); +inline std::vector toCVDims(const std::vector &dims, + const IE::Layout layout) { + switch (layout) { + case IE::Layout::NDHWC: // NCDHW + return {dims[0], dims[2], dims[3], dims[4], dims[1]}; + case IE::Layout::NHWC: // NCHW + return {dims[0], dims[2], dims[3], dims[1]}; + case IE::Layout::HWC: // CHW + return {dims[1], dims[2], dims[0]}; + default: return dims; + } + GAPI_Assert(false); +} + +inline IE::TensorDesc toIE(const cv::Mat &mat, + const cv::gapi::ie::TraitAs hint, + const IE::Layout layout) { + const auto &sz = mat.size; + if (sz.dims() == 2 && hint == cv::gapi::ie::TraitAs::IMAGE) + { + // NB: This logic is mainly taken from IE samples + const size_t channels = mat.channels(); + const size_t height = mat.size().height; + const size_t width = mat.size().width; + + const size_t strideH = mat.step1(); + IE::BlockingDesc bdesc({1, height, width, channels} /* blocking dims */, + {0, 2, 3, 1} /* order for NHWC */, + 0 /* offset */, + {0, 0, 0, 0} /* offsets for dims */, + {strideH * height, strideH, channels, 1} /* strides for dims */); + + return IE::TensorDesc(toIE(mat.depth()), + IE::SizeVector{1, channels, height, width}, bdesc); + } + return IE::TensorDesc(toIE(mat.depth()), + toIEDims(toIE(sz), layout), + layout); +} + +inline IE::Blob::Ptr wrapIE(const cv::Mat &mat, + cv::gapi::ie::TraitAs hint, + const IE::Layout layout = IE::Layout::ANY) { + const auto tDesc = toIE(mat, hint, layout); switch (mat.depth()) { // NB: Seems there's no way to create an untyped (T-less) Blob::Ptr // in IE given only precision via TensorDesc. So we have to do this: @@ -344,6 +468,7 @@ struct IEUnit { }; InputFramesDesc net_input_params; + std::unordered_map inputs_type; explicit IEUnit(const cv::gapi::ie::detail::ParamDesc &pp) : params(pp) { @@ -522,6 +647,8 @@ public: cv::GRunArgP output (std::size_t idx); cv::Mat& outMatR(std::size_t idx); + cv::gapi::ie::TraitAs getInputType(const std::string &layer_name) const; + const IEUnit &uu; cv::gimpl::GIslandExecutable::IOutput &out; @@ -565,6 +692,9 @@ private: // keep alive preprocessed frames std::mutex keep_alive_frames_mutex; std::unordered_map keep_alive_pp_frames; + + // NB: Hint to wrap input data properly into IE::Blob (see: wrapIE) + std::unordered_map input_type; }; IECallContext::IECallContext(const IEUnit & unit, @@ -599,6 +729,16 @@ IECallContext::IECallContext(const IEUnit & } } +cv::gapi::ie::TraitAs +IECallContext::getInputType(const std::string &layer_name) const { + const auto it = uu.inputs_type.find(layer_name); + if (it == uu.inputs_type.end()) { + cv::util::throw_error(std::logic_error( + "Failed to find input type for layer: \"" + layer_name + "\"")); + } + return it->second; +} + const cv::GArgs& IECallContext::inArgs() const { return m_args; } @@ -773,7 +913,8 @@ cv::MediaFrame preprocess_frame_impl(cv::MediaFrame &&in_frame, const std::strin inline IE::Blob::Ptr extractBlob(IECallContext& ctx, std::size_t i, - cv::gapi::ie::TraitAs hint, + const cv::gapi::ie::TraitAs hint, + const IE::Layout &layout, const std::string& layer_name, const cv::util::optional &opt_roi, cv::MediaFrame* out_keep_alive_frame = nullptr, @@ -821,7 +962,7 @@ inline IE::Blob::Ptr extractBlob(IECallContext& ctx, return wrapIE(*(ctx.views.back()), frame.desc()); } case cv::GShape::GMAT: { - return wrapIE(ctx.inMat(i), hint); + return wrapIE(ctx.inMat(i), hint, layout); } default: GAPI_Assert("Unsupported input shape for IE backend"); @@ -829,7 +970,6 @@ inline IE::Blob::Ptr extractBlob(IECallContext& ctx, GAPI_Error("InternalError"); } - static void setBlob(InferenceEngine::InferRequest& req, const std::string& layer_name, const IE::Blob::Ptr& blob, @@ -1203,79 +1343,110 @@ static void configureInputReshapeByImage(const IE::InputInfo::Ptr& ii, input_reshape_table.emplace(layer_name, input_dims); } -// NB: This function is used in order to configure -// preprocessing for "Load" case networks. -static void configureInputInfo(const IE::InputInfo::Ptr& ii, const cv::GMetaArg mm) { +static void cfgInputPrecision(const IE::InputInfo::Ptr& ii, const cv::GMetaArg mm) { switch (mm.index()) { - case cv::GMetaArg::index_of(): - { + case cv::GMetaArg::index_of(): { const auto &desc = util::get(mm); ii->setPrecision(toIE(desc.depth)); - // NB: Configure resize only for images. - const auto trait = clarifyTrait(desc, ii->getTensorDesc()); - if (trait == cv::gapi::ie::TraitAs::IMAGE) { - ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR); - } break; } case cv::GMetaArg::index_of(): - { - const auto &meta = util::get(mm); - switch (meta.fmt) { - case cv::MediaFormat::NV12: - ii->getPreProcess().setColorFormat(IE::ColorFormat::NV12); - break; - case cv::MediaFormat::BGR: - // NB: Do nothing - break; - case cv::MediaFormat::GRAY: - // NB: Do nothing - break; - default: - GAPI_Error("Unsupported media format for IE backend"); - } ii->setPrecision(toIE(CV_8U)); - // NB: Always configure resize because media formats are images. - ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR); break; - } default: util::throw_error(std::runtime_error("Unsupported input meta for IE backend")); } } -// NB: This function is used in order to configure -// preprocessing for "Import" case networks. -static IE::PreProcessInfo configurePreProcInfo(const IE::InputInfo::CPtr& ii, - const cv::GMetaArg& mm) { - IE::PreProcessInfo info; - switch (mm.index()) { - // NB: Note that it doesn't specify precision. - case cv::GMetaArg::index_of(): - { - // NB: Configure resize only for images. - const auto &desc = cv::util::get(mm); - const auto trait = clarifyTrait(desc, ii->getTensorDesc()); - if (trait == cv::gapi::ie::TraitAs::IMAGE) { - info.setResizeAlgorithm(IE::RESIZE_BILINEAR); - } - break; - } - // NB: Note that it doesn't specify precision. - case cv::GMetaArg::index_of(): - { - const auto &desc = cv::util::get(mm); - if (desc.fmt == cv::MediaFormat::NV12) { - info.setColorFormat(IE::ColorFormat::NV12); - } - // NB: Always configure resize because media formats are images. - info.setResizeAlgorithm(IE::RESIZE_BILINEAR); - break; - } - default: - util::throw_error(std::runtime_error("Unsupported input meta for IE backend")); +static void cfgImagePreprocessing(const IE::InputInfo::Ptr &ii, + const cv::GMetaArg &mm, + const IE::ResizeAlgorithm interp) { + if (!cv::util::holds_alternative(mm) && + !cv::util::holds_alternative(mm)) { + util::throw_error(std::runtime_error("Unsupported input meta for IE backend")); } + ii->getPreProcess().setResizeAlgorithm(interp); + if (cv::util::holds_alternative(mm)) { + const auto &meta = util::get(mm); + if (meta.fmt == cv::MediaFormat::NV12) { + ii->getPreProcess().setColorFormat(IE::ColorFormat::NV12); + } + } +} + +// NB: This function is used in order to configure +// preprocessing for "Load" case networks. +static void cfgInputPreprocessing(const cv::gapi::ie::TraitAs trait, + const IE::InputInfo::Ptr &ii, + const cv::GMetaArg &mm, + const std::string &layer_name, + const AttrMap &layout_map, + const AttrMap &interp_map) { + cfgInputPrecision(ii, mm); + const auto explicit_input_layout = lookUp(layout_map, layer_name); + const auto explicit_resize = lookUp(interp_map, layer_name); + if (trait == cv::gapi::ie::TraitAs::IMAGE) { + // NB: Image case - preprocessing is configured automatically. + GAPI_LOG_DEBUG(NULL, "IE Backend: Input: \"" << + layer_name << " " << mm << "\" is image."); + // NB: BlockingDesc is used instead (see wrapIE) + if (explicit_input_layout) { + util::throw_error(std::logic_error("Input data provided for layer: \"" + + layer_name + "\" is recognized as \"image\". Explicitly" + + " specified layout is prohibited.")); + } + const auto interp = explicit_resize ? toIEInterp(*explicit_resize) + : IE::RESIZE_BILINEAR; + cfgImagePreprocessing(ii, mm, interp); + } else { + // NB: Tensor case - preprocessing is configured only if user asked. + GAPI_LOG_DEBUG(NULL, "IE Backend: Input: \"" << + layer_name << "\" " << mm << " is tensor."); + if (explicit_input_layout) { + GAPI_LOG_DEBUG(NULL, "IE Backend: Set input layout \"" << + *explicit_input_layout << "\" for layer \"" << layer_name << "\""); + ii->setLayout(toIE(*explicit_input_layout)); + } + if (explicit_resize) { + GAPI_LOG_DEBUG(NULL, "IE Backend: Set resize for layer \"" << layer_name << "\""); + ii->getPreProcess().setResizeAlgorithm(toIEInterp(*explicit_resize)); + } + } +} + +static IE::PreProcessInfo createImagePreProcInfo(const cv::GMetaArg &mm, + const IE::ResizeAlgorithm interp) { + if (!cv::util::holds_alternative(mm) && + !cv::util::holds_alternative(mm)) { + util::throw_error(std::runtime_error("Unsupported input meta for IE backend")); + } + IE::PreProcessInfo info; + info.setResizeAlgorithm(interp); + if (cv::util::holds_alternative(mm)) { + const auto &meta = util::get(mm); + if (meta.fmt == cv::MediaFormat::NV12) { + info.setColorFormat(IE::ColorFormat::NV12); + } + } + return info; +} + +// NB: This function is used in order to create +// preprocessing for "Import" case networks. +static IE::PreProcessInfo createPreProcInfo(const cv::gapi::ie::TraitAs trait, + const cv::GMetaArg& mm, + const cv::optional explicit_resize) { + if (trait == cv::gapi::ie::TraitAs::IMAGE) { + const auto interp = explicit_resize ? toIEInterp(*explicit_resize) + : IE::RESIZE_BILINEAR; + return createImagePreProcInfo(mm, interp); + } + // NB: In case "tensor" only resize can't be spefied for "import" models. + IE::PreProcessInfo info; + if (explicit_resize) { + info.setResizeAlgorithm(toIEInterp(*explicit_resize)); + } return info; } @@ -1301,6 +1472,13 @@ static void configureOutputPrecision(const IE::OutputsDataMap &outputs ); } +static void configureOutputLayout(const IE::OutputsDataMap &outputs_info, + const AttrMap &output_layout) { + for (const auto it : output_layout) { + outputs_info.at(it.first)->setLayout(toIE(it.second)); + } +} + // NB: This is a callback used by async infer // to post outputs blobs (cv::GMat's). static void PostOutputs(InferenceEngine::InferRequest &request, @@ -1420,6 +1598,10 @@ struct Infer: public cv::detail::KernelTag { GAPI_Assert(uu.params.input_names.size() == in_metas.size() && "Known input layers count doesn't match input meta count"); + const auto input_layout = broadcastLayerAttr(uu.params.input_layout, + uu.params.input_names); + const auto interpolation = broadcastLayerAttr(uu.params.interpolation, + uu.params.input_names); // NB: Configuring input/output precision and network reshape must be done // only in the loadNetwork case. using namespace cv::gapi::ie::detail; @@ -1429,21 +1611,24 @@ struct Infer: public cv::detail::KernelTag { ade::util::toRange(in_metas))) { const auto &input_name = std::get<0>(it); auto ii = inputs.at(input_name); - const auto & mm = std::get<1>(it); + const auto &mm = std::get<1>(it); - configureInputInfo(ii, mm); if (uu.params.layer_names_to_reshape.find(input_name) != uu.params.layer_names_to_reshape.end()) { configureInputReshapeByImage(ii, mm, input_reshape_table); } - + const auto trait = clarifyTrait(mm, ii->getTensorDesc().getDims()); + // FIXME: This is the only place where information about input type + // can be stored for the futher execution. + const_cast(uu).inputs_type.emplace(input_name, trait); + cfgInputPreprocessing(trait, ii, mm, input_name, + input_layout, interpolation); // NB: configure input param for further preproc if (uu.net_input_params.is_applicable(mm)) { const_cast(uu.net_input_params) .set_param(input_name, ii->getTensorDesc()); } } - for (auto &&p : uu.params.const_inputs) { const auto ii = inputs.at(p.first); ii->setPrecision(toIE(p.second.first.depth())); @@ -1455,6 +1640,10 @@ struct Infer: public cv::detail::KernelTag { if (!input_reshape_table.empty()) { const_cast(&uu.net)->reshape(input_reshape_table); } + + const auto output_layout = broadcastLayerAttr(uu.params.output_layout, + uu.params.output_names); + configureOutputLayout(uu.net.getOutputsInfo(), output_layout); configureOutputPrecision(uu.net.getOutputsInfo(), uu.params.output_precision); } else { GAPI_Assert(uu.params.kind == ParamDesc::Kind::Import); @@ -1466,7 +1655,13 @@ struct Infer: public cv::detail::KernelTag { const auto &input_name = std::get<0>(it); auto ii = inputs.at(input_name); const auto & mm = std::get<1>(it); - non_const_prepm->emplace(input_name, configurePreProcInfo(ii, mm)); + const auto trait = clarifyTrait(mm, ii->getTensorDesc().getDims()); + // FIXME: This is the only place where information about input type + // can be stored for the futher execution. + const_cast(uu).inputs_type.emplace(input_name, trait); + const auto explicit_resize = lookUp(interpolation, input_name); + non_const_prepm->emplace( + input_name, createPreProcInfo(trait, mm, explicit_resize)); // NB: configure input param for further preproc if (uu.net_input_params.is_applicable(mm)) { @@ -1488,7 +1683,7 @@ struct Infer: public cv::detail::KernelTag { : uu.this_network.GetOutputsInfo().at(out_name)->getTensorDesc(); cv::GMatDesc outm(toCV(desc.getPrecision()), - toCV(desc.getDims())); + toCVDims(toCV(desc.getDims()), desc.getLayout())); result.emplace_back(outm); } return result; @@ -1504,15 +1699,10 @@ struct Infer: public cv::detail::KernelTag { // - assumes all inputs/outputs are always Mats for (auto i : ade::util::iota(ctx->uu.params.num_in)) { const auto& layer_name = ctx->uu.params.input_names[i]; - auto layout = - ctx->uu.this_network.GetInputsInfo(). - at(layer_name)->getTensorDesc().getLayout(); - auto hint = - (layout == IE::Layout::NCHW || layout == IE::Layout::NHWC) - ? cv::gapi::ie::TraitAs::IMAGE : cv::gapi::ie::TraitAs::TENSOR; - + const auto hint = ctx->getInputType(layer_name); + const auto layout = req.GetBlob(layer_name)->getTensorDesc().getLayout(); IE::Blob::Ptr this_blob = extractBlob(*ctx, i, hint, - layer_name, + layout, layer_name, cv::util::optional{}); setBlob(req, layer_name, this_blob, *ctx); } @@ -1552,7 +1742,7 @@ struct InferROI: public cv::detail::KernelTag { if (cv::util::holds_alternative(mm) || cv::util::holds_alternative(mm)) { - const auto trait = clarifyTrait(mm, tensor_desc); + const auto trait = clarifyTrait(mm, tensor_desc.getDims()); if (trait != cv::gapi::ie::TraitAs::IMAGE) { util::throw_error(std::runtime_error( "IE Backend: Only image is supported" @@ -1566,16 +1756,22 @@ struct InferROI: public cv::detail::KernelTag { // NB: Configuring input precision and network reshape must be done // only in the loadNetwork case. + const auto input_layout = broadcastLayerAttr(uu.params.input_layout, + uu.params.input_names); + const auto interpolation = broadcastLayerAttr(uu.params.interpolation, + uu.params.input_names); + const auto trait = cv::gapi::ie::TraitAs::IMAGE; if (uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load) { // 0th is ROI, 1st is input image auto inputs = uu.net.getInputsInfo(); auto ii = inputs.at(input_name); - configureInputInfo(ii, mm); if (uu.params.layer_names_to_reshape.find(input_name) != uu.params.layer_names_to_reshape.end()) { configureInputReshapeByImage(ii, mm, input_reshape_table); } + cfgInputPreprocessing(trait, ii, mm, input_name, + input_layout, interpolation); // FIXME: This isn't the best place to call reshape function. // Сorrect solution would be to do this in compile() method of network, @@ -1594,6 +1790,9 @@ struct InferROI: public cv::detail::KernelTag { inputs.at(p.first)->setPrecision(toIE(p.second.first.depth())); } + const auto output_layout = broadcastLayerAttr(uu.params.output_layout, + uu.params.output_names); + configureOutputLayout(uu.net.getOutputsInfo(), output_layout); configureOutputPrecision(uu.net.getOutputsInfo(), uu.params.output_precision); } else { GAPI_Assert(uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Import); @@ -1601,8 +1800,9 @@ struct InferROI: public cv::detail::KernelTag { // FIXME: This isn't the best place to collect PreProcMap. auto* non_const_prepm = const_cast(&uu.preproc_map); auto ii = inputs.at(input_name); - - non_const_prepm->emplace(input_name, configurePreProcInfo(ii, mm)); + const auto explicit_resize = lookUp(interpolation, input_name); + non_const_prepm->emplace( + input_name, createPreProcInfo(trait, mm, explicit_resize)); // NB: configure intput param for further preproc if (uu.net_input_params.is_applicable(mm)) { @@ -1623,7 +1823,7 @@ struct InferROI: public cv::detail::KernelTag { : uu.this_network.GetOutputsInfo().at(out_name)->getTensorDesc(); cv::GMatDesc outm(toCV(desc.getPrecision()), - toCV(desc.getDims())); + toCVDims(toCV(desc.getDims()), desc.getLayout())); result.emplace_back(outm); } return result; @@ -1646,6 +1846,7 @@ struct InferROI: public cv::detail::KernelTag { bool preprocessed = false; IE::Blob::Ptr this_blob = extractBlob(*ctx, 1, cv::gapi::ie::TraitAs::IMAGE, + IE::Layout::ANY, *(ctx->uu.params.input_names.begin()), cv::util::make_optional(this_roi), slot_ptr, &preprocessed); @@ -1691,6 +1892,10 @@ struct InferList: public cv::detail::KernelTag { // NB: Configuring input precision and network reshape must be done // only in the loadNetwork case. + const auto input_layout = broadcastLayerAttr(uu.params.input_layout, + uu.params.input_names); + const auto interpolation = broadcastLayerAttr(uu.params.interpolation, + uu.params.input_names); if (uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load) { std::size_t idx = 1u; auto inputs = uu.net.getInputsInfo(); @@ -1699,20 +1904,19 @@ struct InferList: public cv::detail::KernelTag { const auto & mm = in_metas[idx++]; // NB: InferList expects the input starts with index 1 wil be the images. - const auto input_trait = clarifyTrait(mm, ii->getTensorDesc()); + const auto input_trait = clarifyTrait(mm, ii->getTensorDesc().getDims()); if (input_trait != cv::gapi::ie::TraitAs::IMAGE) { util::throw_error(std::runtime_error( "IE Backend: Only image is supported" " as the " + std::to_string(idx) + "th argument for InferList")); } - configureInputInfo(ii, mm); if (uu.params.layer_names_to_reshape.find(input_name) != uu.params.layer_names_to_reshape.end()) { configureInputReshapeByImage(ii, mm, input_reshape_table); } - - + cfgInputPreprocessing(input_trait, ii, mm, + input_name, input_layout, interpolation); } // FIXME: This isn't the best place to call reshape function. @@ -1727,6 +1931,9 @@ struct InferList: public cv::detail::KernelTag { ii->setPrecision(toIE(p.second.first.depth())); } + const auto output_layout = broadcastLayerAttr(uu.params.output_layout, + uu.params.output_names); + configureOutputLayout(uu.net.getOutputsInfo(), output_layout); configureOutputPrecision(uu.net.getOutputsInfo(), uu.params.output_precision); } else { GAPI_Assert(uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Import); @@ -1738,14 +1945,16 @@ struct InferList: public cv::detail::KernelTag { const auto & mm = in_metas[idx++]; // NB: InferList expects the input starts with index 1 wil be the images. - const auto input_trait = clarifyTrait(mm, ii->getTensorDesc()); + const auto input_trait = clarifyTrait(mm, ii->getTensorDesc().getDims()); if (input_trait != cv::gapi::ie::TraitAs::IMAGE) { util::throw_error(std::runtime_error( "IE Backend: Only image is supported" " as the " + std::to_string(idx) + "th argument for InferList")); } - non_const_prepm->emplace(input_name, configurePreProcInfo(ii, mm)); + const auto explicit_resize = lookUp(interpolation, input_name); + non_const_prepm->emplace( + input_name, createPreProcInfo(input_trait, mm, explicit_resize)); } } @@ -1773,6 +1982,7 @@ struct InferList: public cv::detail::KernelTag { // NB: This blob will be used to make roi from its, so // it should be treated as image IE::Blob::Ptr this_blob = extractBlob(*ctx, 1, cv::gapi::ie::TraitAs::IMAGE, + IE::Layout::ANY, ctx->uu.params.input_names[0u], cv::util::optional{}); @@ -1783,7 +1993,7 @@ struct InferList: public cv::detail::KernelTag { ctx->uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load ? ctx->uu.net.getOutputsInfo().at(out_name)->getTensorDesc() : ctx->uu.this_network.GetOutputsInfo().at(out_name)->getTensorDesc(); - cached_dims[i] = toCV(desc.getDims()); + cached_dims[i] = toCVDims(toCV(desc.getDims()), desc.getLayout()); // FIXME: Isn't this should be done automatically // by some resetInternalData(), etc? (Probably at the GExecutor level) auto& out_vec = ctx->outVecR(i); @@ -1849,7 +2059,7 @@ struct InferList2: public cv::detail::KernelTag { if (cv::util::holds_alternative(mm_0) || cv::util::holds_alternative(mm_0)) { - const auto trait = clarifyTrait(mm_0, tensor_desc_0); + const auto trait = clarifyTrait(mm_0, tensor_desc_0.getDims()); if (trait != cv::gapi::ie::TraitAs::IMAGE) { util::throw_error(std::runtime_error( "IE Backend: Only images is" @@ -1862,23 +2072,29 @@ struct InferList2: public cv::detail::KernelTag { } std::size_t idx = 1u; + const auto input_layout = broadcastLayerAttr(uu.params.input_layout, + uu.params.input_names); + const auto interpolation = broadcastLayerAttr(uu.params.interpolation, + uu.params.input_names); for (auto &&input_name : uu.params.input_names) { const auto &mm = in_metas[idx]; GAPI_Assert(util::holds_alternative(mm) && "Non-array inputs are not supported"); if (op.k.inKinds[idx] == cv::detail::OpaqueKind::CV_RECT) { + const auto input_trait = cv::gapi::ie::TraitAs::IMAGE; // NB: Configuring input precision and network reshape must be done // only in the loadNetwork case. if (uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load) { auto inputs = uu.net.getInputsInfo(); // This is a cv::Rect -- configure the IE preprocessing auto ii = inputs.at(input_name); - configureInputInfo(ii, mm_0); if (uu.params.layer_names_to_reshape.find(input_name) != uu.params.layer_names_to_reshape.end()) { configureInputReshapeByImage(ii, mm_0, input_reshape_table); } + cfgInputPreprocessing(input_trait, ii, mm_0, + input_name, input_layout, interpolation); for (auto &&p : uu.params.const_inputs) { inputs.at(p.first)->setPrecision(toIE(p.second.first.depth())); @@ -1890,19 +2106,32 @@ struct InferList2: public cv::detail::KernelTag { if (!input_reshape_table.empty()) { const_cast(&uu.net)->reshape(input_reshape_table); } + const auto output_layout = broadcastLayerAttr(uu.params.output_layout, + uu.params.output_names); + configureOutputLayout(uu.net.getOutputsInfo(), output_layout); configureOutputPrecision(uu.net.getOutputsInfo(), uu.params.output_precision); } else { GAPI_Assert(uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Import); auto inputs = uu.this_network.GetInputsInfo(); auto* non_const_prepm = const_cast(&uu.preproc_map); auto ii = inputs.at(input_name); - non_const_prepm->emplace(input_name, configurePreProcInfo(ii, mm_0)); + const auto explicit_resize = lookUp(interpolation, input_name); + non_const_prepm->emplace( + input_name, createPreProcInfo(input_trait, mm_0, explicit_resize)); } } else { // This is a cv::GMat (equals to: cv::Mat) // Just validate that it is really the type // (other types are prohibited here) GAPI_Assert(op.k.inKinds[idx] == cv::detail::OpaqueKind::CV_MAT); + // NB: Well, it's even impossible to specify the precision since + // there is not such info in GArray + const auto explicit_resize = lookUp(interpolation, input_name); + const auto explicit_layout = lookUp(input_layout , input_name); + if (explicit_resize || explicit_layout) { + util::throw_error(std::logic_error( + "InferList2 doesn't support preprocessing for \"tensor\"'s arguments!")); + } } idx++; // NB: Never forget to increment the counter } @@ -1922,6 +2151,7 @@ struct InferList2: public cv::detail::KernelTag { // NB: This blob will be used to make roi from its, so // it should be treated as image IE::Blob::Ptr blob_0 = extractBlob(*ctx, 0, cv::gapi::ie::TraitAs::IMAGE, + IE::Layout::ANY, ctx->uu.params.input_names[0u], cv::util::optional{}); const auto list_size = ctx->inArg(1u).size(); @@ -1941,7 +2171,7 @@ struct InferList2: public cv::detail::KernelTag { ctx->uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load ? ctx->uu.net.getOutputsInfo().at(out_name)->getTensorDesc() : ctx->uu.this_network.GetOutputsInfo().at(out_name)->getTensorDesc(); - cached_dims[i] = toCV(desc.getDims()); + cached_dims[i] = toCVDims(toCV(desc.getDims()), desc.getLayout()); // FIXME: Isn't this should be done automatically // by some resetInternalData(), etc? (Probably at the GExecutor level) auto& out_vec = ctx->outVecR(i); @@ -1964,8 +2194,10 @@ struct InferList2: public cv::detail::KernelTag { } else if (this_vec.getKind() == cv::detail::OpaqueKind::CV_MAT) { const auto &vec = this_vec.rref(); const auto &mat = vec[list_idx]; - setBlob(req, ctx->uu.params.input_names[in_idx], - wrapIE(mat, cv::gapi::ie::TraitAs::TENSOR), + const auto layer_name = ctx->uu.params.input_names[in_idx]; + const auto layout = req.GetBlob(layer_name)->getTensorDesc().getLayout(); + setBlob(req, layer_name, + wrapIE(mat, cv::gapi::ie::TraitAs::TENSOR, layout), *ctx); } else { GAPI_Assert(false && diff --git a/modules/gapi/test/infer/gapi_infer_ie_test.cpp b/modules/gapi/test/infer/gapi_infer_ie_test.cpp index 1fcf9da044..4056dd323f 100644 --- a/modules/gapi/test/infer/gapi_infer_ie_test.cpp +++ b/modules/gapi/test/infer/gapi_infer_ie_test.cpp @@ -2,7 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2019-2021 Intel Corporation +// Copyright (C) 2019-2023 Intel Corporation #include "../test_precomp.hpp" @@ -3057,6 +3057,73 @@ TEST_F(AgeGenderInferTest, ChangeSpecificOutputPrecison) { validate(); } +TEST_F(AgeGenderInferTest, ThrowIfSetLayoutForImage) { + auto pp = cv::gapi::ie::Params { + m_params.model_path, m_params.weights_path, m_params.device_id + }.cfgOutputLayers({ "age_conv3", "prob" }) + .cfgOutputPrecision({{"prob", CV_8U}}) + .cfgInputLayout("NHWC"); + + EXPECT_ANY_THROW(buildGraph().apply(cv::gin(m_in_mat), cv::gout(m_gapi_age, m_gapi_gender), + cv::compile_args(cv::gapi::networks(pp)))); +} + +TEST(TestAgeGenderIE, InferTensorWithPreproc) { + initDLDTDataPath(); + + cv::gapi::ie::detail::ParamDesc params; + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.device_id = "CPU"; + + // Load IE network, initialize input data using that. + cv::Mat in_mat({1, 240, 320, 3}, CV_8U); + cv::randu(in_mat, 0, 255); + cv::Mat gapi_age, gapi_gender; + + IE::Blob::Ptr ie_age, ie_gender; + { + auto plugin = cv::gimpl::ie::wrap::getPlugin(params); + auto net = cv::gimpl::ie::wrap::readNetwork(params); + auto ii = net.getInputsInfo().at("data"); + + ii->setPrecision(IE::Precision::U8); + ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR); + ii->setLayout(IE::Layout::NHWC); + + auto this_network = cv::gimpl::ie::wrap::loadNetwork(plugin, net, params); + auto infer_request = this_network.CreateInferRequest(); + IE::TensorDesc desc{IE::Precision::U8, {1, 3, 240, 320}, IE::Layout::NHWC}; + auto blob = IE::make_shared_blob(desc, const_cast(in_mat.ptr())); + infer_request.SetBlob("data", blob); + infer_request.Infer(); + ie_age = infer_request.GetBlob("age_conv3"); + ie_gender = infer_request.GetBlob("prob"); + } + + // Configure & run G-API + using AGInfo = std::tuple; + G_API_NET(AgeGender, , "test-age-gender"); + + cv::GMat in; + cv::GMat age, gender; + std::tie(age, gender) = cv::gapi::infer(in); + cv::GComputation comp(cv::GIn(in), cv::GOut(age, gender)); + + auto pp = cv::gapi::ie::Params { + params.model_path, params.weights_path, params.device_id + }.cfgOutputLayers({ "age_conv3", "prob" }) + .cfgResize(cv::INTER_LINEAR) + .cfgInputLayout("NHWC"); + + comp.apply(cv::gin(in_mat), cv::gout(gapi_age, gapi_gender), + cv::compile_args(cv::gapi::networks(pp))); + + // Validate with IE itself (avoid DNN module dependency here) + normAssert(cv::gapi::ie::util::to_ocv(ie_age), gapi_age, "Test age output" ); + normAssert(cv::gapi::ie::util::to_ocv(ie_gender), gapi_gender, "Test gender output"); +} + } // namespace opencv_test #endif // HAVE_INF_ENGINE