diff --git a/modules/gapi/include/opencv2/gapi/streaming/cap.hpp b/modules/gapi/include/opencv2/gapi/streaming/cap.hpp index adf1133c3f..9c2185c1ab 100644 --- a/modules/gapi/include/opencv2/gapi/streaming/cap.hpp +++ b/modules/gapi/include/opencv2/gapi/streaming/cap.hpp @@ -22,6 +22,7 @@ * because of this file. */ #include +#include #include #include @@ -47,8 +48,16 @@ namespace wip { class GCaptureSource: public IStreamSource { public: - explicit GCaptureSource(int id) : cap(id) { prep(); } - explicit GCaptureSource(const std::string &path) : cap(path) { prep(); } + explicit GCaptureSource(int id, const std::map &properties = {}) + : cap(id) { prep(properties); } + + explicit GCaptureSource(const std::string &path, + const std::map &properties = {}) + : cap(path) { prep(properties); } + + void set(int propid, double value) { + cap.set(propid, value); + } // TODO: Add more constructor overloads to make it // fully compatible with VideoCapture's interface. @@ -59,8 +68,12 @@ protected: bool first_pulled = false; int64_t counter = 0; - void prep() + void prep(const std::map &properties) { + for (const auto &it : properties) { + cap.set(it.first, it.second); + } + // Prepare first frame to report its meta to engine // when needed GAPI_Assert(first.empty()); @@ -114,15 +127,19 @@ protected: }; // NB: Overload for using from python -GAPI_EXPORTS_W cv::Ptr inline make_capture_src(const std::string& path) +GAPI_EXPORTS_W cv::Ptr +inline make_capture_src(const std::string& path, + const std::map& properties = {}) { - return make_src(path); + return make_src(path, properties); } // NB: Overload for using from python -GAPI_EXPORTS_W cv::Ptr inline make_capture_src(const int id) +GAPI_EXPORTS_W cv::Ptr +inline make_capture_src(const int id, + const std::map& properties = {}) { - return make_src(id); + return make_src(id, properties); } } // namespace wip diff --git a/modules/gapi/misc/python/pyopencv_gapi.hpp b/modules/gapi/misc/python/pyopencv_gapi.hpp index 736d8cc86e..70698ffd48 100644 --- a/modules/gapi/misc/python/pyopencv_gapi.hpp +++ b/modules/gapi/misc/python/pyopencv_gapi.hpp @@ -28,6 +28,7 @@ using map_string_and_string = std::map; using map_string_and_string = std::map; using map_string_and_vector_size_t = std::map>; using map_string_and_vector_float = std::map>; +using map_int_and_double = std::map; // NB: Python wrapper generate T_U for T // This behavior is only observed for inputs diff --git a/modules/gapi/samples/semantic_segmentation.cpp b/modules/gapi/samples/semantic_segmentation.cpp index fd3ec27750..db358816d1 100644 --- a/modules/gapi/samples/semantic_segmentation.cpp +++ b/modules/gapi/samples/semantic_segmentation.cpp @@ -5,34 +5,41 @@ #include #include +#include +#include + +#include + const std::string keys = "{ h help | | Print this help message }" + "{ desync | false | Desynchronize inference }" "{ input | | Path to the input video file }" "{ output | | Path to the output video file }" "{ ssm | semantic-segmentation-adas-0001.xml | Path to OpenVINO IE semantic segmentation model (.xml) }"; // 20 colors for 20 classes of semantic-segmentation-adas-0001 -const std::vector colors = { - { 128, 64, 128 }, - { 232, 35, 244 }, - { 70, 70, 70 }, - { 156, 102, 102 }, - { 153, 153, 190 }, - { 153, 153, 153 }, - { 30, 170, 250 }, - { 0, 220, 220 }, - { 35, 142, 107 }, - { 152, 251, 152 }, - { 180, 130, 70 }, - { 60, 20, 220 }, - { 0, 0, 255 }, - { 142, 0, 0 }, - { 70, 0, 0 }, - { 100, 60, 0 }, - { 90, 0, 0 }, - { 230, 0, 0 }, - { 32, 11, 119 }, - { 0, 74, 111 }, +static std::vector colors = { + { 0, 0, 0 }, + { 0, 0, 128 }, + { 0, 128, 0 }, + { 0, 128, 128 }, + { 128, 0, 0 }, + { 128, 0, 128 }, + { 128, 128, 0 }, + { 128, 128, 128 }, + { 0, 0, 64 }, + { 0, 0, 192 }, + { 0, 128, 64 }, + { 0, 128, 192 }, + { 128, 0, 64 }, + { 128, 0, 192 }, + { 128, 128, 64 }, + { 128, 128, 192 }, + { 0, 64, 0 }, + { 0, 64, 128 }, + { 0, 192, 0 }, + { 0, 192, 128 }, + { 128, 64, 0 } }; namespace { @@ -43,12 +50,23 @@ std::string get_weights_path(const std::string &model_path) { auto ext = model_path.substr(sz - EXT_LEN); std::transform(ext.begin(), ext.end(), ext.begin(), [](unsigned char c){ - return static_cast(std::tolower(c)); - }); + return static_cast(std::tolower(c)); + }); CV_Assert(ext == ".xml"); return model_path.substr(0u, sz - EXT_LEN) + ".bin"; } +bool isNumber(const std::string &str) { + return !str.empty() && std::all_of(str.begin(), str.end(), + [](unsigned char ch) { return std::isdigit(ch); }); +} + +std::string toStr(double value) { + std::stringstream ss; + ss << std::fixed << std::setprecision(1) << value; + return ss.str(); +} + void classesToColors(const cv::Mat &out_blob, cv::Mat &mask_img) { const int H = out_blob.size[0]; @@ -97,6 +115,25 @@ void probsToClasses(const cv::Mat& probs, cv::Mat& classes) { } // anonymous namespace +namespace vis { + +static void putText(cv::Mat& mat, const cv::Point &position, const std::string &message) { + auto fontFace = cv::FONT_HERSHEY_COMPLEX; + int thickness = 2; + cv::Scalar color = {200, 10, 10}; + double fontScale = 0.65; + + cv::putText(mat, message, position, fontFace, + fontScale, cv::Scalar(255, 255, 255), thickness + 1); + cv::putText(mat, message, position, fontFace, fontScale, color, thickness); +} + +static void drawResults(cv::Mat &img, const cv::Mat &color_mask) { + img = img / 2 + color_mask / 2; +} + +} // namespace vis + namespace custom { G_API_OP(PostProcessing, , "sample.custom.post_processing") { static cv::GMatDesc outMeta(const cv::GMatDesc &in, const cv::GMatDesc &) { @@ -106,19 +143,34 @@ G_API_OP(PostProcessing, , "sample.custom.post_pro GAPI_OCV_KERNEL(OCVPostProcessing, PostProcessing) { static void run(const cv::Mat &in, const cv::Mat &out_blob, cv::Mat &out) { + int C = -1, H = -1, W = -1; + if (out_blob.size.dims() == 4u) { + C = 1; H = 2, W = 3; + } else if (out_blob.size.dims() == 3u) { + C = 0; H = 1, W = 2; + } else { + throw std::logic_error( + "Number of dimmensions for model output must be 3 or 4!"); + } cv::Mat classes; // NB: If output has more than single plane, it contains probabilities // otherwise class id. - if (out_blob.size[1] > 1) { + if (out_blob.size[C] > 1) { probsToClasses(out_blob, classes); } else { - out_blob.convertTo(classes, CV_8UC1); - classes = classes.reshape(1, out_blob.size[2]); + if (out_blob.depth() != CV_32S) { + throw std::logic_error( + "Single channel output must have integer precision!"); + } + cv::Mat view(out_blob.size[H], // cols + out_blob.size[W], // rows + CV_32SC1, + out_blob.data); + view.convertTo(classes, CV_8UC1); } - cv::Mat mask_img; classesToColors(classes, mask_img); - cv::resize(mask_img, out, in.size()); + cv::resize(mask_img, out, in.size(), 0, 0, cv::INTER_NEAREST); } }; } // namespace custom @@ -134,6 +186,7 @@ int main(int argc, char *argv[]) { const std::string input = cmd.get("input"); const std::string output = cmd.get("output"); const auto model_path = cmd.get("ssm"); + const bool desync = cmd.get("desync"); const auto weights_path = get_weights_path(model_path); const auto device = "CPU"; G_API_NET(SemSegmNet, , "semantic-segmentation"); @@ -145,40 +198,87 @@ int main(int argc, char *argv[]) { // Now build the graph cv::GMat in; - cv::GMat out_blob = cv::gapi::infer(in); - cv::GMat post_proc_out = custom::PostProcessing::on(in, out_blob); - cv::GMat blending_in = in * 0.3f; - cv::GMat blending_out = post_proc_out * 0.7f; - cv::GMat out = blending_in + blending_out; + cv::GMat bgr = cv::gapi::copy(in); + cv::GMat frame = desync ? cv::gapi::streaming::desync(bgr) : bgr; + cv::GMat out_blob = cv::gapi::infer(frame); + cv::GMat out = custom::PostProcessing::on(frame, out_blob); - cv::GStreamingCompiled pipeline = cv::GComputation(cv::GIn(in), cv::GOut(out)) - .compileStreaming(cv::compile_args(kernels, networks)); - auto inputs = cv::gin(cv::gapi::wip::make_src(input)); + cv::GStreamingCompiled pipeline = cv::GComputation(cv::GIn(in), cv::GOut(bgr, out)) + .compileStreaming(cv::compile_args(kernels, networks, + cv::gapi::streaming::queue_capacity{1})); + + std::shared_ptr source; + if (isNumber(input)) { + source = std::make_shared( + std::stoi(input), + std::map { + {cv::CAP_PROP_FRAME_WIDTH, 1280}, + {cv::CAP_PROP_FRAME_HEIGHT, 720}, + {cv::CAP_PROP_BUFFERSIZE, 1}, + {cv::CAP_PROP_AUTOFOCUS, true} + } + ); + } else { + source = std::make_shared(input); + } + auto inputs = cv::gin( + static_cast(source)); // The execution part pipeline.setSource(std::move(inputs)); - cv::VideoWriter writer; cv::TickMeter tm; - cv::Mat outMat; + cv::VideoWriter writer; + + cv::util::optional color_mask; + cv::util::optional image; + cv::Mat last_image; + cv::Mat last_color_mask; + + pipeline.start(); + tm.start(); std::size_t frames = 0u; - tm.start(); - pipeline.start(); - while (pipeline.pull(cv::gout(outMat))) { - ++frames; - cv::imshow("Out", outMat); - cv::waitKey(1); - if (!output.empty()) { - if (!writer.isOpened()) { - const auto sz = cv::Size{outMat.cols, outMat.rows}; - writer.open(output, cv::VideoWriter::fourcc('M','J','P','G'), 25.0, sz); - CV_Assert(writer.isOpened()); + std::size_t masks = 0u; + while (pipeline.pull(cv::gout(image, color_mask))) { + if (image.has_value()) { + ++frames; + last_image = std::move(*image); + } + + if (color_mask.has_value()) { + ++masks; + last_color_mask = std::move(*color_mask); + } + + if (!last_image.empty() && !last_color_mask.empty()) { + tm.stop(); + + std::string stream_fps = "Stream FPS: " + toStr(frames / tm.getTimeSec()); + std::string inference_fps = "Inference FPS: " + toStr(masks / tm.getTimeSec()); + + cv::Mat tmp = last_image.clone(); + + vis::drawResults(tmp, last_color_mask); + vis::putText(tmp, {10, 22}, stream_fps); + vis::putText(tmp, {10, 22 + 30}, inference_fps); + + cv::imshow("Out", tmp); + cv::waitKey(1); + if (!output.empty()) { + if (!writer.isOpened()) { + const auto sz = cv::Size{tmp.cols, tmp.rows}; + writer.open(output, cv::VideoWriter::fourcc('M','J','P','G'), 25.0, sz); + CV_Assert(writer.isOpened()); + } + writer << tmp; } - writer << outMat; + + tm.start(); } } tm.stop(); - std::cout << "Processed " << frames << " frames" << " (" << frames / tm.getTimeSec() << " FPS)" << std::endl; + std::cout << "Processed " << frames << " frames" << " (" + << frames / tm.getTimeSec()<< " FPS)" << std::endl; return 0; } diff --git a/modules/python/src2/cv2_convert.hpp b/modules/python/src2/cv2_convert.hpp index 73ef10968b..43ef7b2302 100644 --- a/modules/python/src2/cv2_convert.hpp +++ b/modules/python/src2/cv2_convert.hpp @@ -268,6 +268,11 @@ PyObject* pyopencv_from(const std::vector& value) template bool pyopencv_to(PyObject *obj, std::map &map, const ArgInfo& info) { + if (!obj || obj == Py_None) + { + return true; + } + PyObject* py_key = nullptr; PyObject* py_value = nullptr; Py_ssize_t pos = 0; diff --git a/modules/python/src2/typing_stubs_generation/predefined_types.py b/modules/python/src2/typing_stubs_generation/predefined_types.py index 39a641c021..2ebe08d162 100644 --- a/modules/python/src2/typing_stubs_generation/predefined_types.py +++ b/modules/python/src2/typing_stubs_generation/predefined_types.py @@ -191,13 +191,15 @@ _PREDEFINED_TYPES = ( PrimitiveTypeNode.str_()) ), export_name="SearchParams"), AliasTypeNode.dict_("map_string_and_string", PrimitiveTypeNode.str_("map_string_and_string::key"), - PrimitiveTypeNode.str_("map_string_and_string::key::value")), + PrimitiveTypeNode.str_("map_string_and_string::value")), AliasTypeNode.dict_("map_string_and_int", PrimitiveTypeNode.str_("map_string_and_int::key"), - PrimitiveTypeNode.int_("map_string_and_int::key::value")), + PrimitiveTypeNode.int_("map_string_and_int::value")), AliasTypeNode.dict_("map_string_and_vector_size_t", PrimitiveTypeNode.str_("map_string_and_vector_size_t::key"), - SequenceTypeNode("map_string_and_vector_size_t::key::value", PrimitiveTypeNode.int_("size_t"))), + SequenceTypeNode("map_string_and_vector_size_t::value", PrimitiveTypeNode.int_("size_t"))), AliasTypeNode.dict_("map_string_and_vector_float", PrimitiveTypeNode.str_("map_string_and_vector_float::key"), - SequenceTypeNode("map_string_and_vector_float::key::value", PrimitiveTypeNode.float_())), + SequenceTypeNode("map_string_and_vector_float::value", PrimitiveTypeNode.float_())), + AliasTypeNode.dict_("map_int_and_double", PrimitiveTypeNode.int_("map_int_and_double::key"), + PrimitiveTypeNode.float_("map_int_and_double::value")), ) PREDEFINED_TYPES = dict(zip((t.ctype_name for t in _PREDEFINED_TYPES), _PREDEFINED_TYPES))