#include #include #include #include #include #include #include #include #include #include #include #include #include // CommandLineParser #include #ifdef HAVE_INF_ENGINE #include // ParamMap #ifdef HAVE_DIRECTX #ifdef HAVE_D3D11 #pragma comment(lib,"d3d11.lib") // get rid of generate macro max/min/etc from DX side #define D3D11_NO_HELPERS #define NOMINMAX #include #include #pragma comment(lib, "dxgi") #undef NOMINMAX #undef D3D11_NO_HELPERS #endif // HAVE_D3D11 #endif // HAVE_DIRECTX #endif // HAVE_INF_ENGINE #ifdef __linux__ #if defined(HAVE_VA) || defined(HAVE_VA_INTEL) #include "va/va.h" #include "va/va_drm.h" #include #include #endif // defined(HAVE_VA) || defined(HAVE_VA_INTEL) #endif // __linux__ const std::string about = "This is an OpenCV-based version of oneVPLSource decoder example"; const std::string keys = "{ h help | | Print this help message }" "{ input | | Path to the input demultiplexed video file }" "{ output | | Path to the output RAW video file. Use .avi extension }" "{ facem | face-detection-adas-0001.xml | Path to OpenVINO IE face detection model (.xml) }" "{ faced | GPU | Target device for face detection model (e.g. AUTO, GPU, VPU, ...) }" "{ cfg_params | | Semicolon separated list of oneVPL mfxVariants which is used for configuring source (see `MFXSetConfigFilterProperty` by https://spec.oneapi.io/versions/latest/elements/oneVPL/source/index.html) }" "{ streaming_queue_capacity | 1 | Streaming executor queue capacity. Calculated automatically if 0 }" "{ frames_pool_size | 0 | OneVPL source applies this parameter as preallocated frames pool size}" "{ vpp_frames_pool_size | 0 | OneVPL source applies this parameter as preallocated frames pool size for VPP preprocessing results}" "{ roi | -1,-1,-1,-1 | Region of interest (ROI) to use for inference. Identified automatically when not set }" "{ source_device | CPU | choose device for decoding }" "{ preproc_device | | choose device for preprocessing }"; namespace { bool is_gpu(const std::string &device_name) { return device_name.find("GPU") != std::string::npos; } std::string get_weights_path(const std::string &model_path) { const auto EXT_LEN = 4u; const auto sz = model_path.size(); GAPI_Assert(sz > EXT_LEN); auto ext = model_path.substr(sz - EXT_LEN); std::transform(ext.begin(), ext.end(), ext.begin(), [](unsigned char c){ return static_cast(std::tolower(c)); }); GAPI_Assert(ext == ".xml"); return model_path.substr(0u, sz - EXT_LEN) + ".bin"; } // TODO: It duplicates infer_single_roi sample cv::util::optional parse_roi(const std::string &rc) { cv::Rect rv; char delim[3]; std::stringstream is(rc); is >> rv.x >> delim[0] >> rv.y >> delim[1] >> rv.width >> delim[2] >> rv.height; if (is.bad()) { return cv::util::optional(); // empty value } const auto is_delim = [](char c) { return c == ','; }; if (!std::all_of(std::begin(delim), std::end(delim), is_delim)) { return cv::util::optional(); // empty value } if (rv.x < 0 || rv.y < 0 || rv.width <= 0 || rv.height <= 0) { return cv::util::optional(); // empty value } return cv::util::make_optional(std::move(rv)); } #ifdef HAVE_DIRECTX #ifdef HAVE_D3D11 // Since ATL headers might not be available on specific MSVS Build Tools // we use simple `CComPtr` implementation like as `ComPtrGuard` // which is not supposed to be the full functional replacement of `CComPtr` // and it uses as RAII to make sure utilization is correct template void release(COMNonManageableType *ptr) { if (ptr) { ptr->Release(); } } template using ComPtrGuard = std::unique_ptr)>; template ComPtrGuard createCOMPtrGuard(COMNonManageableType *ptr = nullptr) { return ComPtrGuard {ptr, &release}; } using AccelParamsType = std::tuple, ComPtrGuard>; AccelParamsType create_device_with_ctx(IDXGIAdapter* adapter) { UINT flags = 0; D3D_FEATURE_LEVEL feature_levels[] = { D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0, }; D3D_FEATURE_LEVEL featureLevel; ID3D11Device* ret_device_ptr = nullptr; ID3D11DeviceContext* ret_ctx_ptr = nullptr; HRESULT err = D3D11CreateDevice(adapter, D3D_DRIVER_TYPE_UNKNOWN, nullptr, flags, feature_levels, ARRAYSIZE(feature_levels), D3D11_SDK_VERSION, &ret_device_ptr, &featureLevel, &ret_ctx_ptr); if (FAILED(err)) { throw std::runtime_error("Cannot create D3D11CreateDevice, error: " + std::to_string(HRESULT_CODE(err))); } return std::make_tuple(createCOMPtrGuard(ret_device_ptr), createCOMPtrGuard(ret_ctx_ptr)); } #endif // HAVE_D3D11 #endif // HAVE_DIRECTX } // anonymous namespace namespace custom { G_API_NET(FaceDetector, , "face-detector"); using GDetections = cv::GArray; using GRect = cv::GOpaque; using GSize = cv::GOpaque; using GPrims = cv::GArray; G_API_OP(ParseSSD, , "sample.custom.parse-ssd") { static cv::GArrayDesc outMeta(const cv::GMatDesc &, const cv::GOpaqueDesc &, const cv::GOpaqueDesc &) { return cv::empty_array_desc(); } }; // TODO: It duplicates infer_single_roi sample G_API_OP(LocateROI, , "sample.custom.locate-roi") { static cv::GOpaqueDesc outMeta(const cv::GOpaqueDesc &) { return cv::empty_gopaque_desc(); } }; G_API_OP(BBoxes, , "sample.custom.b-boxes") { static cv::GArrayDesc outMeta(const cv::GArrayDesc &, const cv::GOpaqueDesc &) { return cv::empty_array_desc(); } }; GAPI_OCV_KERNEL(OCVLocateROI, LocateROI) { // This is the place where we can run extra analytics // on the input image frame and select the ROI (region // of interest) where we want to detect our objects (or // run any other inference). // // Currently it doesn't do anything intelligent, // but only crops the input image to square (this is // the most convenient aspect ratio for detectors to use) static void run(const cv::Size& in_size, cv::Rect &out_rect) { // Identify the central point & square size (- some padding) const auto center = cv::Point{in_size.width/2, in_size.height/2}; auto sqside = std::min(in_size.width, in_size.height); // Now build the central square ROI out_rect = cv::Rect{ center.x - sqside/2 , center.y - sqside/2 , sqside , sqside }; } }; GAPI_OCV_KERNEL(OCVBBoxes, BBoxes) { // This kernel converts the rectangles into G-API's // rendering primitives static void run(const std::vector &in_face_rcs, const cv::Rect &in_roi, std::vector &out_prims) { out_prims.clear(); const auto cvt = [](const cv::Rect &rc, const cv::Scalar &clr) { return cv::gapi::wip::draw::Rect(rc, clr, 2); }; out_prims.emplace_back(cvt(in_roi, CV_RGB(0,255,255))); // cyan for (auto &&rc : in_face_rcs) { out_prims.emplace_back(cvt(rc, CV_RGB(0,255,0))); // green } } }; GAPI_OCV_KERNEL(OCVParseSSD, ParseSSD) { static void run(const cv::Mat &in_ssd_result, const cv::Rect &in_roi, const cv::Size &in_parent_size, std::vector &out_objects) { const auto &in_ssd_dims = in_ssd_result.size; GAPI_Assert(in_ssd_dims.dims() == 4u); const int MAX_PROPOSALS = in_ssd_dims[2]; const int OBJECT_SIZE = in_ssd_dims[3]; GAPI_Assert(OBJECT_SIZE == 7); // fixed SSD object size const cv::Size up_roi = in_roi.size(); const cv::Rect surface({0,0}, in_parent_size); out_objects.clear(); const float *data = in_ssd_result.ptr(); for (int i = 0; i < MAX_PROPOSALS; i++) { const float image_id = data[i * OBJECT_SIZE + 0]; const float label = data[i * OBJECT_SIZE + 1]; const float confidence = data[i * OBJECT_SIZE + 2]; const float rc_left = data[i * OBJECT_SIZE + 3]; const float rc_top = data[i * OBJECT_SIZE + 4]; const float rc_right = data[i * OBJECT_SIZE + 5]; const float rc_bottom = data[i * OBJECT_SIZE + 6]; (void) label; // unused if (image_id < 0.f) { break; // marks end-of-detections } if (confidence < 0.5f) { continue; // skip objects with low confidence } // map relative coordinates to the original image scale // taking the ROI into account cv::Rect rc; rc.x = static_cast(rc_left * up_roi.width); rc.y = static_cast(rc_top * up_roi.height); rc.width = static_cast(rc_right * up_roi.width) - rc.x; rc.height = static_cast(rc_bottom * up_roi.height) - rc.y; rc.x += in_roi.x; rc.y += in_roi.y; out_objects.emplace_back(rc & surface); } } }; } // namespace custom namespace cfg { typename cv::gapi::wip::onevpl::CfgParam create_from_string(const std::string &line); struct flow { flow(bool preproc, bool rctx) : vpl_preproc_enable(preproc), ie_remote_ctx_enable(rctx) { } bool vpl_preproc_enable = false; bool ie_remote_ctx_enable = false; }; using support_matrix = std::map >>>; support_matrix resolved_conf{{ {"GPU", {{ {"", {{ "CPU", std::make_shared(false, false)}, { "GPU", {/* unsupported: * ie GPU preproc isn't available */}} }}, {"CPU", {{ "CPU", {/* unsupported: preproc mix */}}, { "GPU", {/* unsupported: preproc mix */}} }}, {"GPU", {{ "CPU", std::make_shared(true, false)}, { "GPU", std::make_shared(true, true)}}} }} }, {"CPU", {{ {"", {{ "CPU", std::make_shared(false, false)}, { "GPU", std::make_shared(false, false)} }}, {"CPU", {{ "CPU", std::make_shared(true, false)}, { "GPU", std::make_shared(true, false)} }}, {"GPU", {{ "CPU", {/* unsupported: preproc mix */}}, { "GPU", {/* unsupported: preproc mix */}}}} }} } }}; static void print_available_cfg(std::ostream &out, const std::string &source_device, const std::string &preproc_device, const std::string &ie_device_id) { const std::string source_device_cfg_name("--source_device="); const std::string preproc_device_cfg_name("--preproc_device="); const std::string ie_cfg_name("--faced="); out << "unsupported acceleration param combinations:\n" << source_device_cfg_name << source_device << " " << preproc_device_cfg_name << preproc_device << " " << ie_cfg_name << ie_device_id << "\n\nSupported matrix:\n\n" << std::endl; for (const auto &s_d : cfg::resolved_conf) { std::string prefix = source_device_cfg_name + s_d.first; for (const auto &p_d : s_d.second) { std::string mid_prefix = prefix + +"\t" + preproc_device_cfg_name + (p_d.first.empty() ? "" : p_d.first); for (const auto &i_d : p_d.second) { if (i_d.second) { std::cerr << mid_prefix << "\t" << ie_cfg_name <("input"); const auto output = cmd.get("output"); const auto opt_roi = parse_roi(cmd.get("roi")); const auto face_model_path = cmd.get("facem"); const auto streaming_queue_capacity = cmd.get("streaming_queue_capacity"); const auto source_decode_queue_capacity = cmd.get("frames_pool_size"); const auto source_vpp_queue_capacity = cmd.get("vpp_frames_pool_size"); const auto device_id = cmd.get("faced"); const auto source_device = cmd.get("source_device"); const auto preproc_device = cmd.get("preproc_device"); // validate support matrix std::shared_ptr flow_settings = cfg::resolved_conf[source_device][preproc_device][device_id]; if (!flow_settings) { cfg::print_available_cfg(std::cerr, source_device, preproc_device, device_id); return -1; } // check output file extension if (!output.empty()) { auto ext = output.find_last_of("."); if (ext == std::string::npos || (output.substr(ext + 1) != "avi")) { std::cerr << "Output file should have *.avi extension for output video" << std::endl; return -1; } } // get oneVPL cfg params from cmd std::stringstream params_list(cmd.get("cfg_params")); std::vector source_cfgs; try { std::string line; while (std::getline(params_list, line, ';')) { source_cfgs.push_back(cfg::create_from_string(line)); } } catch (const std::exception& ex) { std::cerr << "Invalid cfg parameter: " << ex.what() << std::endl; return -1; } // apply VPL source optimization params if (source_decode_queue_capacity != 0) { source_cfgs.push_back(cv::gapi::wip::onevpl::CfgParam::create_frames_pool_size(source_decode_queue_capacity)); } if (source_vpp_queue_capacity != 0) { source_cfgs.push_back(cv::gapi::wip::onevpl::CfgParam::create_vpp_frames_pool_size(source_vpp_queue_capacity)); } auto face_net = cv::gapi::ie::Params { face_model_path, // path to topology IR get_weights_path(face_model_path), // path to weights device_id }; // It is allowed (and highly recommended) to reuse predefined device_ptr & context_ptr objects // received from user application. Current sample demonstrate how to deal with this situation. // // But if you do not need this fine-grained acceleration devices configuration then // just use default constructors for onevpl::GSource, IE and preprocessing module. // But please pay attention that default pipeline construction in this case will be // very inefficient and carries out multiple CPU-GPU memory copies // // If you want to reach max performance and seize copy-free approach for specific // device & context selection then follow the steps below. // The situation is complicated a little bit in comparison with default configuration, thus // let's focusing this: // // - all component-participants (Source, Preprocessing, Inference) // must share the same device & context instances // // - you must wrapping your available device & context instancs into thin // `cv::gapi::wip::Device` & `cv::gapi::wip::Context`. // !!! Please pay attention that both objects are weak wrapper so you must ensure // that device & context would be alived before full pipeline created !!! // // - you should pass such wrappers as constructor arguments for each component in pipeline: // a) use extended constructor for `onevpl::GSource` for activating predefined device & context // b) use `cfgContextParams` method of `cv::gapi::ie::Params` to enable `PreprocesingEngine` // for predefined device & context // c) use `InferenceEngine::ParamMap` to activate remote ctx in Inference Engine for given // device & context // // //// P.S. the current sample supports heterogenous pipeline construction also. //// It is possible to make up mixed device approach. //// Please feel free to explore different configurations! cv::util::optional gpu_accel_device; cv::util::optional gpu_accel_ctx; cv::gapi::wip::onevpl::Device cpu_accel_device = cv::gapi::wip::onevpl::create_host_device(); cv::gapi::wip::onevpl::Context cpu_accel_ctx = cv::gapi::wip::onevpl::create_host_context(); // create GPU device if requested if (is_gpu(device_id) || is_gpu(source_device) || is_gpu(preproc_device)) { #ifdef HAVE_DIRECTX #ifdef HAVE_D3D11 // create DX11 device & context owning handles. // wip::Device & wip::Context provide non-owning semantic of resources and act // as weak references API wrappers in order to carry type-erased resources type // into appropriate modules: onevpl::GSource, PreprocEngine and InferenceEngine // Until modules are not created owner handles must stay alive auto dx11_dev = createCOMPtrGuard(); auto dx11_ctx = createCOMPtrGuard(); auto adapter_factory = createCOMPtrGuard(); { IDXGIFactory* out_factory = nullptr; HRESULT err = CreateDXGIFactory(__uuidof(IDXGIFactory), reinterpret_cast(&out_factory)); if (FAILED(err)) { std::cerr << "Cannot create CreateDXGIFactory, error: " << HRESULT_CODE(err) << std::endl; return -1; } adapter_factory = createCOMPtrGuard(out_factory); } auto intel_adapter = createCOMPtrGuard(); UINT adapter_index = 0; const unsigned int refIntelVendorID = 0x8086; IDXGIAdapter* out_adapter = nullptr; while (adapter_factory->EnumAdapters(adapter_index, &out_adapter) != DXGI_ERROR_NOT_FOUND) { DXGI_ADAPTER_DESC desc{}; out_adapter->GetDesc(&desc); if (desc.VendorId == refIntelVendorID) { intel_adapter = createCOMPtrGuard(out_adapter); break; } ++adapter_index; } if (!intel_adapter) { std::cerr << "No Intel GPU adapter on aboard. Exit" << std::endl; return -1; } std::tie(dx11_dev, dx11_ctx) = create_device_with_ctx(intel_adapter.get()); gpu_accel_device = cv::util::make_optional( cv::gapi::wip::onevpl::create_dx11_device( reinterpret_cast(dx11_dev.get()), "GPU")); gpu_accel_ctx = cv::util::make_optional( cv::gapi::wip::onevpl::create_dx11_context( reinterpret_cast(dx11_ctx.get()))); #endif // HAVE_D3D11 #endif // HAVE_DIRECTX #ifdef __linux__ #if defined(HAVE_VA) || defined(HAVE_VA_INTEL) static const char *predefined_vaapi_devices_list[] {"/dev/dri/renderD128", "/dev/dri/renderD129", "/dev/dri/card0", "/dev/dri/card1", nullptr}; std::stringstream ss; int device_fd = -1; VADisplay va_handle = nullptr; for (const char **device_path = predefined_vaapi_devices_list; *device_path != nullptr; device_path++) { device_fd = open(*device_path, O_RDWR); if (device_fd < 0) { std::string info("Cannot open GPU file: \""); info = info + *device_path + "\", error: " + strerror(errno); ss << info << std::endl; continue; } va_handle = vaGetDisplayDRM(device_fd); if (!va_handle) { close(device_fd); std::string info("VAAPI device vaGetDisplayDRM failed, error: "); info += strerror(errno); ss << info << std::endl; continue; } int major_version = 0, minor_version = 0; VAStatus status {}; status = vaInitialize(va_handle, &major_version, &minor_version); if (VA_STATUS_SUCCESS != status) { close(device_fd); va_handle = nullptr; std::string info("Cannot initialize VAAPI device, error: "); info += vaErrorStr(status); ss << info << std::endl; continue; } std::cout << "VAAPI created for device: " << *device_path << ", version: " << major_version << "." << minor_version << std::endl; break; } // check device creation if (!va_handle) { std::cerr << "Cannot create VAAPI device. Log:\n" << ss.str() << std::endl; return -1; } gpu_accel_device = cv::util::make_optional( cv::gapi::wip::onevpl::create_vaapi_device(reinterpret_cast(va_handle), "GPU", device_fd)); gpu_accel_ctx = cv::util::make_optional( cv::gapi::wip::onevpl::create_vaapi_context(nullptr)); #endif // defined(HAVE_VA) || defined(HAVE_VA_INTEL) #endif // #ifdef __linux__ } #ifdef HAVE_INF_ENGINE // activate remote ctx in Inference Engine for GPU device // when other pipeline component use the GPU device too if (flow_settings->ie_remote_ctx_enable) { InferenceEngine::ParamMap ctx_config({{"CONTEXT_TYPE", "VA_SHARED"}, {"VA_DEVICE", gpu_accel_device.value().get_ptr()} }); face_net.cfgContextParams(ctx_config); std::cout << "enforce InferenceEngine remote context on device: " << device_id << std::endl; // NB: consider NV12 surface because it's one of native GPU image format face_net.pluginConfig({{"GPU_NV12_TWO_INPUTS", "YES" }}); std::cout << "enforce InferenceEngine NV12 blob" << std::endl; } #endif // HAVE_INF_ENGINE // turn on VPP PreprocesingEngine if available & requested if (flow_settings->vpl_preproc_enable) { if (is_gpu(preproc_device)) { // activate VPP PreprocesingEngine on GPU face_net.cfgPreprocessingParams(gpu_accel_device.value(), gpu_accel_ctx.value()); } else { // activate VPP PreprocesingEngine on CPU face_net.cfgPreprocessingParams(cpu_accel_device, cpu_accel_ctx); } std::cout << "enforce VPP preprocessing on device: " << preproc_device << std::endl; } else { std::cout << "use InferenceEngine default preprocessing" << std::endl; } auto kernels = cv::gapi::kernels < custom::OCVLocateROI , custom::OCVParseSSD , custom::OCVBBoxes>(); auto networks = cv::gapi::networks(face_net); auto face_detection_args = cv::compile_args(networks, kernels); if (streaming_queue_capacity != 0) { face_detection_args += cv::compile_args(cv::gapi::streaming::queue_capacity{ streaming_queue_capacity }); } // Create source cv::gapi::wip::IStreamSource::Ptr cap; try { if (is_gpu(source_device)) { std::cout << "enforce VPL Source deconding on device: " << source_device << std::endl; // use special 'Device' constructor for `onevpl::GSource` cap = cv::gapi::wip::make_onevpl_src(file_path, source_cfgs, gpu_accel_device.value(), gpu_accel_ctx.value()); } else { cap = cv::gapi::wip::make_onevpl_src(file_path, source_cfgs); } std::cout << "oneVPL source description: " << cap->descr_of() << std::endl; } catch (const std::exception& ex) { std::cerr << "Cannot create source: " << ex.what() << std::endl; return -1; } cv::GMetaArg descr = cap->descr_of(); auto frame_descr = cv::util::get(descr); cv::GOpaque in_roi; auto inputs = cv::gin(cap); // Now build the graph cv::GFrame in; auto size = cv::gapi::streaming::size(in); auto graph_inputs = cv::GIn(in); if (!opt_roi.has_value()) { // Automatically detect ROI to infer. Make it output parameter std::cout << "ROI is not set or invalid. Locating it automatically" << std::endl; in_roi = custom::LocateROI::on(size); } else { // Use the value provided by user std::cout << "Will run inference for static region " << opt_roi.value() << " only" << std::endl; graph_inputs += cv::GIn(in_roi); inputs += cv::gin(opt_roi.value()); } auto blob = cv::gapi::infer(in_roi, in); cv::GArray rcs = custom::ParseSSD::on(blob, in_roi, size); auto out_frame = cv::gapi::wip::draw::renderFrame(in, custom::BBoxes::on(rcs, in_roi)); auto out = cv::gapi::streaming::BGR(out_frame); cv::GStreamingCompiled pipeline = cv::GComputation(std::move(graph_inputs), cv::GOut(out)) // and move here .compileStreaming(std::move(face_detection_args)); // The execution part pipeline.setSource(std::move(inputs)); pipeline.start(); size_t frames = 0u; cv::TickMeter tm; cv::VideoWriter writer; if (!output.empty() && !writer.isOpened()) { const auto sz = cv::Size{frame_descr.size.width, frame_descr.size.height}; writer.open(output, cv::VideoWriter::fourcc('M','J','P','G'), 25.0, sz); GAPI_Assert(writer.isOpened()); } cv::Mat outMat; tm.start(); while (pipeline.pull(cv::gout(outMat))) { cv::imshow("Out", outMat); cv::waitKey(1); if (!output.empty()) { writer << outMat; } ++frames; } tm.stop(); std::cout << "Processed " << frames << " frames" << " (" << frames / tm.getTimeSec() << " FPS)" << std::endl; return 0; } namespace cfg { typename cv::gapi::wip::onevpl::CfgParam create_from_string(const std::string &line) { using namespace cv::gapi::wip; if (line.empty()) { throw std::runtime_error("Cannot parse CfgParam from emply line"); } std::string::size_type name_endline_pos = line.find(':'); if (name_endline_pos == std::string::npos) { throw std::runtime_error("Cannot parse CfgParam from: " + line + "\nExpected separator \":\""); } std::string name = line.substr(0, name_endline_pos); std::string value = line.substr(name_endline_pos + 1); return cv::gapi::wip::onevpl::CfgParam::create(name, value, /* vpp params strongly optional */ name.find("vpp.") == std::string::npos); } }