#include #include #include #include #include #include #include #include #include const std::string keys = "{ h help | | Print this help message }" "{ desync | false | Desynchronize inference }" "{ input | | Path to the input video file }" "{ output | | Path to the output video file }" "{ ssm | semantic-segmentation-adas-0001.xml | Path to OpenVINO IE semantic segmentation model (.xml) }"; // 20 colors for 20 classes of semantic-segmentation-adas-0001 static std::vector colors = { { 0, 0, 0 }, { 0, 0, 128 }, { 0, 128, 0 }, { 0, 128, 128 }, { 128, 0, 0 }, { 128, 0, 128 }, { 128, 128, 0 }, { 128, 128, 128 }, { 0, 0, 64 }, { 0, 0, 192 }, { 0, 128, 64 }, { 0, 128, 192 }, { 128, 0, 64 }, { 128, 0, 192 }, { 128, 128, 64 }, { 128, 128, 192 }, { 0, 64, 0 }, { 0, 64, 128 }, { 0, 192, 0 }, { 0, 192, 128 }, { 128, 64, 0 } }; namespace { std::string get_weights_path(const std::string &model_path) { const auto EXT_LEN = 4u; const auto sz = model_path.size(); CV_Assert(sz > EXT_LEN); auto ext = model_path.substr(sz - EXT_LEN); std::transform(ext.begin(), ext.end(), ext.begin(), [](unsigned char c){ return static_cast(std::tolower(c)); }); CV_Assert(ext == ".xml"); return model_path.substr(0u, sz - EXT_LEN) + ".bin"; } bool isNumber(const std::string &str) { return !str.empty() && std::all_of(str.begin(), str.end(), [](unsigned char ch) { return std::isdigit(ch); }); } std::string toStr(double value) { std::stringstream ss; ss << std::fixed << std::setprecision(1) << value; return ss.str(); } void classesToColors(const cv::Mat &out_blob, cv::Mat &mask_img) { const int H = out_blob.size[0]; const int W = out_blob.size[1]; mask_img.create(H, W, CV_8UC3); GAPI_Assert(out_blob.type() == CV_8UC1); const uint8_t* const classes = out_blob.ptr(); for (int rowId = 0; rowId < H; ++rowId) { for (int colId = 0; colId < W; ++colId) { uint8_t class_id = classes[rowId * W + colId]; mask_img.at(rowId, colId) = class_id < colors.size() ? colors[class_id] : cv::Vec3b{0, 0, 0}; // NB: sample supports 20 classes } } } void probsToClasses(const cv::Mat& probs, cv::Mat& classes) { const int C = probs.size[1]; const int H = probs.size[2]; const int W = probs.size[3]; classes.create(H, W, CV_8UC1); GAPI_Assert(probs.depth() == CV_32F); float* out_p = reinterpret_cast(probs.data); uint8_t* classes_p = reinterpret_cast(classes.data); for (int h = 0; h < H; ++h) { for (int w = 0; w < W; ++w) { double max = 0; int class_id = 0; for (int c = 0; c < C; ++c) { int idx = c * H * W + h * W + w; if (out_p[idx] > max) { max = out_p[idx]; class_id = c; } } classes_p[h * W + w] = static_cast(class_id); } } } } // anonymous namespace namespace vis { static void putText(cv::Mat& mat, const cv::Point &position, const std::string &message) { auto fontFace = cv::FONT_HERSHEY_COMPLEX; int thickness = 2; cv::Scalar color = {200, 10, 10}; double fontScale = 0.65; cv::putText(mat, message, position, fontFace, fontScale, cv::Scalar(255, 255, 255), thickness + 1); cv::putText(mat, message, position, fontFace, fontScale, color, thickness); } static void drawResults(cv::Mat &img, const cv::Mat &color_mask) { img = img / 2 + color_mask / 2; } } // namespace vis namespace custom { G_API_OP(PostProcessing, , "sample.custom.post_processing") { static cv::GMatDesc outMeta(const cv::GMatDesc &in, const cv::GMatDesc &) { return in; } }; GAPI_OCV_KERNEL(OCVPostProcessing, PostProcessing) { static void run(const cv::Mat &in, const cv::Mat &out_blob, cv::Mat &out) { int C = -1, H = -1, W = -1; if (out_blob.size.dims() == 4u) { C = 1; H = 2, W = 3; } else if (out_blob.size.dims() == 3u) { C = 0; H = 1, W = 2; } else { throw std::logic_error( "Number of dimmensions for model output must be 3 or 4!"); } cv::Mat classes; // NB: If output has more than single plane, it contains probabilities // otherwise class id. if (out_blob.size[C] > 1) { probsToClasses(out_blob, classes); } else { if (out_blob.depth() != CV_32S) { throw std::logic_error( "Single channel output must have integer precision!"); } cv::Mat view(out_blob.size[H], // cols out_blob.size[W], // rows CV_32SC1, out_blob.data); view.convertTo(classes, CV_8UC1); } cv::Mat mask_img; classesToColors(classes, mask_img); cv::resize(mask_img, out, in.size(), 0, 0, cv::INTER_NEAREST); } }; } // namespace custom int main(int argc, char *argv[]) { cv::CommandLineParser cmd(argc, argv, keys); if (cmd.has("help")) { cmd.printMessage(); return 0; } // Prepare parameters first const std::string input = cmd.get("input"); const std::string output = cmd.get("output"); const auto model_path = cmd.get("ssm"); const bool desync = cmd.get("desync"); const auto weights_path = get_weights_path(model_path); const auto device = "CPU"; G_API_NET(SemSegmNet, , "semantic-segmentation"); const auto net = cv::gapi::ie::Params { model_path, weights_path, device }; const auto kernels = cv::gapi::kernels(); const auto networks = cv::gapi::networks(net); // Now build the graph cv::GMat in; cv::GMat bgr = cv::gapi::copy(in); cv::GMat frame = desync ? cv::gapi::streaming::desync(bgr) : bgr; cv::GMat out_blob = cv::gapi::infer(frame); cv::GMat out = custom::PostProcessing::on(frame, out_blob); cv::GStreamingCompiled pipeline = cv::GComputation(cv::GIn(in), cv::GOut(bgr, out)) .compileStreaming(cv::compile_args(kernels, networks, cv::gapi::streaming::queue_capacity{1})); std::shared_ptr source; if (isNumber(input)) { source = std::make_shared( std::stoi(input), std::map { {cv::CAP_PROP_FRAME_WIDTH, 1280}, {cv::CAP_PROP_FRAME_HEIGHT, 720}, {cv::CAP_PROP_BUFFERSIZE, 1}, {cv::CAP_PROP_AUTOFOCUS, true} } ); } else { source = std::make_shared(input); } auto inputs = cv::gin( static_cast(source)); // The execution part pipeline.setSource(std::move(inputs)); cv::TickMeter tm; cv::VideoWriter writer; cv::util::optional color_mask; cv::util::optional image; cv::Mat last_image; cv::Mat last_color_mask; pipeline.start(); tm.start(); std::size_t frames = 0u; std::size_t masks = 0u; while (pipeline.pull(cv::gout(image, color_mask))) { if (image.has_value()) { ++frames; last_image = std::move(*image); } if (color_mask.has_value()) { ++masks; last_color_mask = std::move(*color_mask); } if (!last_image.empty() && !last_color_mask.empty()) { tm.stop(); std::string stream_fps = "Stream FPS: " + toStr(frames / tm.getTimeSec()); std::string inference_fps = "Inference FPS: " + toStr(masks / tm.getTimeSec()); cv::Mat tmp = last_image.clone(); vis::drawResults(tmp, last_color_mask); vis::putText(tmp, {10, 22}, stream_fps); vis::putText(tmp, {10, 22 + 30}, inference_fps); cv::imshow("Out", tmp); cv::waitKey(1); if (!output.empty()) { if (!writer.isOpened()) { const auto sz = cv::Size{tmp.cols, tmp.rows}; writer.open(output, cv::VideoWriter::fourcc('M','J','P','G'), 25.0, sz); CV_Assert(writer.isOpened()); } writer << tmp; } tm.start(); } } tm.stop(); std::cout << "Processed " << frames << " frames" << " (" << frames / tm.getTimeSec()<< " FPS)" << std::endl; return 0; }