diff --git a/samples/dnn/segmentation.cpp b/samples/dnn/segmentation.cpp index d017c15c68..1c9e976017 100644 --- a/samples/dnn/segmentation.cpp +++ b/samples/dnn/segmentation.cpp @@ -12,81 +12,176 @@ using namespace cv; using namespace std; using namespace dnn; +const string about = + "Use this script to run semantic segmentation deep learning networks using OpenCV.\n\n" + "Firstly, download required models using `download_models.py` (if not already done). Set environment variable OPENCV_DOWNLOAD_CACHE_DIR to specify where models should be downloaded. Also, point OPENCV_SAMPLES_DATA_PATH to opencv/samples/data.\n" + "To run:\n" + "\t ./example_dnn_classification modelName(e.g. u2netp) --input=$OPENCV_SAMPLES_DATA_PATH/butterfly.jpg (or ignore this argument to use device camera)\n" + "Model path can also be specified using --model argument."; + const string param_keys = - "{ help h | | Print help message. }" - "{ @alias | | An alias name of model to extract preprocessing parameters from models.yml file. }" - "{ zoo | models.yml | An optional path to file with preprocessing parameters }" - "{ device | 0 | camera device number. }" - "{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera. }" - "{ classes | | Optional path to a text file with names of classes. }" - "{ colors | | Optional path to a text file with colors for an every class. " + "{ help h | | Print help message. }" + "{ @alias | | An alias name of model to extract preprocessing parameters from models.yml file. }" + "{ zoo | ../dnn/models.yml | An optional path to file with preprocessing parameters }" + "{ device | 0 | camera device number. }" + "{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera. }" + "{ colors | | Optional path to a text file with colors for an every class. " "Every color is represented with three values from 0 to 255 in BGR channels order. }"; const string backend_keys = format( - "{ backend | 0 | Choose one of computation backends: " - "%d: automatically (by default), " - "%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), " - "%d: OpenCV implementation, " - "%d: VKCOM, " - "%d: CUDA }", - DNN_BACKEND_DEFAULT, DNN_BACKEND_INFERENCE_ENGINE, DNN_BACKEND_OPENCV, DNN_BACKEND_VKCOM, DNN_BACKEND_CUDA); + "{ backend | default | Choose one of computation backends: " + "default: automatically (by default), " + "openvino: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), " + "opencv: OpenCV implementation, " + "vkcom: VKCOM, " + "cuda: CUDA, " + "webnn: WebNN }"); const string target_keys = format( - "{ target | 0 | Choose one of target computation devices: " - "%d: CPU target (by default), " - "%d: OpenCL, " - "%d: OpenCL fp16 (half-float precision), " - "%d: VPU, " - "%d: Vulkan, " - "%d: CUDA, " - "%d: CUDA fp16 (half-float preprocess) }", - DNN_TARGET_CPU, DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16, DNN_TARGET_MYRIAD, DNN_TARGET_VULKAN, DNN_TARGET_CUDA, DNN_TARGET_CUDA_FP16); + "{ target | cpu | Choose one of target computation devices: " + "cpu: CPU target (by default), " + "opencl: OpenCL, " + "opencl_fp16: OpenCL fp16 (half-float precision), " + "vpu: VPU, " + "vulkan: Vulkan, " + "cuda: CUDA, " + "cuda_fp16: CUDA fp16 (half-float preprocess) }"); string keys = param_keys + backend_keys + target_keys; -vector classes; +vector labels; vector colors; -void showLegend(); -void colorizeSegmentation(const Mat &score, Mat &segm); +static void colorizeSegmentation(const Mat &score, Mat &segm) +{ + const int rows = score.size[2]; + const int cols = score.size[3]; + const int chns = score.size[1]; + + if (colors.empty()) + { + // Generate colors. + colors.push_back(Vec3b()); + for (int i = 1; i < chns; ++i) + { + Vec3b color; + for (int j = 0; j < 3; ++j) + color[j] = (colors[i - 1][j] + rand() % 256) / 2; + colors.push_back(color); + } + } + else if (chns != (int)colors.size()) + { + CV_Error(Error::StsError, format("Number of output labels does not match " + "number of colors (%d != %zu)", + chns, colors.size())); + } + + Mat maxCl = Mat::zeros(rows, cols, CV_8UC1); + Mat maxVal(rows, cols, CV_32FC1, score.data); + for (int ch = 1; ch < chns; ch++) + { + for (int row = 0; row < rows; row++) + { + const float *ptrScore = score.ptr(0, ch, row); + uint8_t *ptrMaxCl = maxCl.ptr(row); + float *ptrMaxVal = maxVal.ptr(row); + for (int col = 0; col < cols; col++) + { + if (ptrScore[col] > ptrMaxVal[col]) + { + ptrMaxVal[col] = ptrScore[col]; + ptrMaxCl[col] = (uchar)ch; + } + } + } + } + segm.create(rows, cols, CV_8UC3); + for (int row = 0; row < rows; row++) + { + const uchar *ptrMaxCl = maxCl.ptr(row); + Vec3b *ptrSegm = segm.ptr(row); + for (int col = 0; col < cols; col++) + { + ptrSegm[col] = colors[ptrMaxCl[col]]; + } + } +} + +static void showLegend(FontFace fontFace) +{ + static const int kBlockHeight = 30; + static Mat legend; + if (legend.empty()) + { + const int numClasses = (int)labels.size(); + if ((int)colors.size() != numClasses) + { + CV_Error(Error::StsError, format("Number of output labels does not match " + "number of labels (%zu != %zu)", + colors.size(), labels.size())); + } + legend.create(kBlockHeight * numClasses, 200, CV_8UC3); + for (int i = 0; i < numClasses; i++) + { + Mat block = legend.rowRange(i * kBlockHeight, (i + 1) * kBlockHeight); + block.setTo(colors[i]); + Rect r = getTextSize(Size(), labels[i], Point(), fontFace, 15, 400); + r.height += 15; // padding + r.width += 10; // padding + rectangle(block, r, Scalar::all(255), FILLED); + putText(block, labels[i], Point(10, kBlockHeight/2), Scalar(0,0,0), fontFace, 15, 400); + } + namedWindow("Legend", WINDOW_AUTOSIZE); + imshow("Legend", legend); + } +} int main(int argc, char **argv) { CommandLineParser parser(argc, argv, keys); const string modelName = parser.get("@alias"); - const string zooFile = parser.get("zoo"); + const string zooFile = findFile(parser.get("zoo")); keys += genPreprocArguments(modelName, zooFile); parser = CommandLineParser(argc, argv, keys); - parser.about("Use this script to run semantic segmentation deep learning networks using OpenCV."); - if (argc == 1 || parser.has("help")) + parser.about(about); + if (!parser.has("@alias") || parser.has("help")) { parser.printMessage(); return 0; } + string sha1 = parser.get("sha1"); float scale = parser.get("scale"); Scalar mean = parser.get("mean"); bool swapRB = parser.get("rgb"); int inpWidth = parser.get("width"); int inpHeight = parser.get("height"); - String model = findFile(parser.get("model")); - int backendId = parser.get("backend"); - int targetId = parser.get("target"); + String model = findModel(parser.get("model"), sha1); + const string backend = parser.get("backend"); + const string target = parser.get("target"); + int stdSize = 20; + int stdWeight = 400; + int stdImgSize = 512; + int imgWidth = -1; // Initialization + int fontSize = 50; + int fontWeight = 500; + FontFace fontFace("sans"); - // Open file with classes names. - if (parser.has("classes")) + // Open file with labels names. + if (parser.has("labels")) { - string file = findFile(parser.get("classes")); + string file = findFile(parser.get("labels")); ifstream ifs(file.c_str()); if (!ifs.is_open()) CV_Error(Error::StsError, "File " + file + " not found"); string line; while (getline(ifs, line)) { - classes.push_back(line); + labels.push_back(line); } } // Open file with colors. @@ -116,13 +211,17 @@ int main(int argc, char **argv) CV_Assert(!model.empty()); //! [Read and initialize network] - Net net = readNetFromONNX(model); - net.setPreferableBackend(backendId); - net.setPreferableTarget(targetId); + EngineType engine = ENGINE_AUTO; + if (backend != "default" || target != "cpu"){ + engine = ENGINE_CLASSIC; + } + Net net = readNetFromONNX(model, engine); + net.setPreferableBackend(getBackendID(backend)); + net.setPreferableTarget(getTargetID(target)); //! [Read and initialize network] // Create a window static const string kWinName = "Deep learning semantic segmentation in OpenCV"; - namedWindow(kWinName, WINDOW_NORMAL); + namedWindow(kWinName, WINDOW_AUTOSIZE); //! [Open a video file or an image file or a camera stream] VideoCapture cap; @@ -141,6 +240,11 @@ int main(int argc, char **argv) waitKey(); break; } + if (imgWidth == -1){ + imgWidth = max(frame.rows, frame.cols); + fontSize = min(fontSize, (stdSize*imgWidth)/stdImgSize); + fontWeight = min(fontWeight, (stdWeight*imgWidth)/stdImgSize); + } imshow("Original Image", frame); //! [Create a 4D blob from a frame] blobFromImage(frame, blob, scale, Size(inpWidth, inpHeight), mean, swapRB, false); @@ -185,92 +289,15 @@ int main(int argc, char **argv) double freq = getTickFrequency() / 1000; double t = net.getPerfProfile(layersTimes) / freq; string label = format("Inference time: %.2f ms", t); - putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0)); + Rect r = getTextSize(Size(), label, Point(), fontFace, fontSize, fontWeight); + r.height += fontSize; // padding + r.width += 10; // padding + rectangle(frame, r, Scalar::all(255), FILLED); + putText(frame, label, Point(10, fontSize), Scalar(0,0,0), fontFace, fontSize, fontWeight); imshow(kWinName, frame); - if (!classes.empty()) - showLegend(); + if (!labels.empty()) + showLegend(fontFace); } return 0; } - -void colorizeSegmentation(const Mat &score, Mat &segm) -{ - const int rows = score.size[2]; - const int cols = score.size[3]; - const int chns = score.size[1]; - - if (colors.empty()) - { - // Generate colors. - colors.push_back(Vec3b()); - for (int i = 1; i < chns; ++i) - { - Vec3b color; - for (int j = 0; j < 3; ++j) - color[j] = (colors[i - 1][j] + rand() % 256) / 2; - colors.push_back(color); - } - } - else if (chns != (int)colors.size()) - { - CV_Error(Error::StsError, format("Number of output classes does not match " - "number of colors (%d != %zu)", - chns, colors.size())); - } - - Mat maxCl = Mat::zeros(rows, cols, CV_8UC1); - Mat maxVal(rows, cols, CV_32FC1, score.data); - for (int ch = 1; ch < chns; ch++) - { - for (int row = 0; row < rows; row++) - { - const float *ptrScore = score.ptr(0, ch, row); - uint8_t *ptrMaxCl = maxCl.ptr(row); - float *ptrMaxVal = maxVal.ptr(row); - for (int col = 0; col < cols; col++) - { - if (ptrScore[col] > ptrMaxVal[col]) - { - ptrMaxVal[col] = ptrScore[col]; - ptrMaxCl[col] = (uchar)ch; - } - } - } - } - segm.create(rows, cols, CV_8UC3); - for (int row = 0; row < rows; row++) - { - const uchar *ptrMaxCl = maxCl.ptr(row); - Vec3b *ptrSegm = segm.ptr(row); - for (int col = 0; col < cols; col++) - { - ptrSegm[col] = colors[ptrMaxCl[col]]; - } - } -} - -void showLegend() -{ - static const int kBlockHeight = 30; - static Mat legend; - if (legend.empty()) - { - const int numClasses = (int)classes.size(); - if ((int)colors.size() != numClasses) - { - CV_Error(Error::StsError, format("Number of output classes does not match " - "number of labels (%zu != %zu)", - colors.size(), classes.size())); - } - legend.create(kBlockHeight * numClasses, 200, CV_8UC3); - for (int i = 0; i < numClasses; i++) - { - Mat block = legend.rowRange(i * kBlockHeight, (i + 1) * kBlockHeight); - block.setTo(colors[i]); - putText(block, classes[i], Point(0, kBlockHeight / 2), FONT_HERSHEY_SIMPLEX, 0.5, Vec3b(255, 255, 255)); - } - namedWindow("Legend", WINDOW_NORMAL); - imshow("Legend", legend); - } -} diff --git a/samples/dnn/segmentation.py b/samples/dnn/segmentation.py index bca11bc1d8..39b5edc8b5 100644 --- a/samples/dnn/segmentation.py +++ b/samples/dnn/segmentation.py @@ -1,140 +1,176 @@ import cv2 as cv import argparse import numpy as np -import sys from common import * -backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV, - cv.dnn.DNN_BACKEND_VKCOM, cv.dnn.DNN_BACKEND_CUDA) -targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD, cv.dnn.DNN_TARGET_HDDL, - cv.dnn.DNN_TARGET_VULKAN, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16) +def help(): + print( + ''' + Firstly, download required models using `download_models.py` (if not already done). Set environment variable OPENCV_DOWNLOAD_CACHE_DIR to specify where models should be downloaded. Also, point OPENCV_SAMPLES_DATA_PATH to opencv/samples/data.\n"\n -parser = argparse.ArgumentParser(add_help=False) -parser.add_argument('--zoo', default=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'models.yml'), - help='An optional path to file with preprocessing parameters.') -parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.') -parser.add_argument('--colors', help='Optional path to a text file with colors for an every class. ' - 'An every color is represented with three values from 0 to 255 in BGR channels order.') -parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, + To run: + python segmentation.py model_name(e.g. u2netp) --input=path/to/your/input/image/or/video (don't give --input flag if want to use device camera) + + Model path can also be specified using --model argument + ''' + ) + +def get_args_parser(func_args): + backends = ("default", "openvino", "opencv", "vkcom", "cuda") + targets = ("cpu", "opencl", "opencl_fp16", "ncs2_vpu", "hddl_vpu", "vulkan", "cuda", "cuda_fp16") + + parser = argparse.ArgumentParser(add_help=False) + parser.add_argument('--zoo', default=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'models.yml'), + help='An optional path to file with preprocessing parameters.') + parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.') + parser.add_argument('--colors', help='Optional path to a text file with colors for an every class. ' + 'An every color is represented with three values from 0 to 255 in BGR channels order.') + parser.add_argument('--backend', default="default", type=str, choices=backends, help="Choose one of computation backends: " - "%d: automatically (by default), " - "%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), " - "%d: OpenCV implementation, " - "%d: VKCOM, " - "%d: CUDA"% backends) -parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int, - help='Choose one of target computation devices: ' - '%d: CPU target (by default), ' - '%d: OpenCL, ' - '%d: OpenCL fp16 (half-float precision), ' - '%d: NCS2 VPU, ' - '%d: HDDL VPU, ' - '%d: Vulkan, ' - '%d: CUDA, ' - '%d: CUDA fp16 (half-float preprocess)'% targets) -args, _ = parser.parse_known_args() -add_preproc_args(args.zoo, parser, 'segmentation') -parser = argparse.ArgumentParser(parents=[parser], - description='Use this script to run semantic segmentation deep learning networks using OpenCV.', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) -args = parser.parse_args() + "default: automatically (by default), " + "openvino: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), " + "opencv: OpenCV implementation, " + "vkcom: VKCOM, " + "cuda: CUDA, " + "webnn: WebNN") + parser.add_argument('--target', default="cpu", type=str, choices=targets, + help="Choose one of target computation devices: " + "cpu: CPU target (by default), " + "opencl: OpenCL, " + "opencl_fp16: OpenCL fp16 (half-float precision), " + "ncs2_vpu: NCS2 VPU, " + "hddl_vpu: HDDL VPU, " + "vulkan: Vulkan, " + "cuda: CUDA, " + "cuda_fp16: CUDA fp16 (half-float preprocess)") -args.model = findFile(args.model) -args.classes = findFile(args.classes) + args, _ = parser.parse_known_args() + add_preproc_args(args.zoo, parser, 'segmentation') + parser = argparse.ArgumentParser(parents=[parser], + description='Use this script to run semantic segmentation deep learning networks using OpenCV.', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + return parser.parse_args(func_args) -np.random.seed(324) - -# Load names of classes -classes = None -if args.classes: - with open(args.classes, 'rt') as f: - classes = f.read().rstrip('\n').split('\n') - -# Load colors -colors = None -if args.colors: - with open(args.colors, 'rt') as f: - colors = [np.array(color.split(' '), np.uint8) for color in f.read().rstrip('\n').split('\n')] - -legend = None -def showLegend(classes): - global legend - if not classes is None and legend is None: +def showLegend(labels, colors, legend): + if not labels is None and legend is None: blockHeight = 30 - assert(len(classes) == len(colors)) + assert(len(labels) == len(colors)) legend = np.zeros((blockHeight * len(colors), 200, 3), np.uint8) - for i in range(len(classes)): + for i in range(len(labels)): block = legend[i * blockHeight:(i + 1) * blockHeight] block[:,:] = colors[i] - cv.putText(block, classes[i], (0, blockHeight//2), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255)) + cv.putText(block, labels[i], (0, blockHeight//2), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0)) - cv.namedWindow('Legend', cv.WINDOW_NORMAL) + cv.namedWindow('Legend', cv.WINDOW_AUTOSIZE) cv.imshow('Legend', legend) - classes = None + labels = None -# Load a network -net = cv.dnn.readNetFromONNX(args.model) -net.setPreferableBackend(args.backend) -net.setPreferableTarget(args.target) +def main(func_args=None): + args = get_args_parser(func_args) + if args.alias is None or hasattr(args, 'help'): + help() + exit(1) -winName = 'Deep learning semantic segmentation in OpenCV' -cv.namedWindow(winName, cv.WINDOW_NORMAL) + args.model = findModel(args.model, args.sha1) + if args.labels is not None: + args.labels = findFile(args.labels) -cap = cv.VideoCapture(cv.samples.findFile(args.input) if args.input else 0) -legend = None -while cv.waitKey(1) < 0: - hasFrame, frame = cap.read() - if not hasFrame: - cv.waitKey() - break + np.random.seed(324) - cv.imshow("Original Image", frame) - frameHeight = frame.shape[0] - frameWidth = frame.shape[1] - # Create a 4D blob from a frame. - inpWidth = args.width if args.width else frameWidth - inpHeight = args.height if args.height else frameHeight + stdSize = 0.8 + stdWeight = 2 + stdImgSize = 512 + imgWidth = -1 # Initialization + fontSize = 1.5 + fontThickness = 1 - blob = cv.dnn.blobFromImage(frame, args.scale, (inpWidth, inpHeight), args.mean, args.rgb, crop=False) - net.setInput(blob) + # Load names of labels + labels = None + if args.labels: + with open(args.labels, 'rt') as f: + labels = f.read().rstrip('\n').split('\n') - if args.alias == 'u2netp': - output = net.forward(net.getUnconnectedOutLayersNames()) - pred = output[0][0, 0, :, :] - mask = (pred * 255).astype(np.uint8) - mask = cv.resize(mask, (frame.shape[1], frame.shape[0]), interpolation=cv.INTER_AREA) - # Create overlays for foreground and background - foreground_overlay = np.zeros_like(frame, dtype=np.uint8) - # Set foreground (object) to red and background to blue - foreground_overlay[:, :, 2] = mask # Red foreground - # Blend the overlays with the original frame - frame = cv.addWeighted(frame, 0.25, foreground_overlay, 0.75, 0) - else: - score = net.forward() + # Load colors + colors = None + if args.colors: + with open(args.colors, 'rt') as f: + colors = [np.array(color.split(' '), np.uint8) for color in f.read().rstrip('\n').split('\n')] - numClasses = score.shape[1] - height = score.shape[2] - width = score.shape[3] - # Draw segmentation - if not colors: - # Generate colors - colors = [np.array([0, 0, 0], np.uint8)] - for i in range(1, numClasses): - colors.append((colors[i - 1] + np.random.randint(0, 256, [3], np.uint8)) / 2) - classIds = np.argmax(score[0], axis=0) - segm = np.stack([colors[idx] for idx in classIds.flatten()]) - segm = segm.reshape(height, width, 3) + # Load a network + engine = cv.dnn.ENGINE_AUTO + if args.backend != "default" or args.target != "cpu": + engine = cv.dnn.ENGINE_CLASSIC + net = cv.dnn.readNetFromONNX(args.model, engine) + net.setPreferableBackend(get_backend_id(args.backend)) + net.setPreferableTarget(get_target_id(args.target)) - segm = cv.resize(segm, (frameWidth, frameHeight), interpolation=cv.INTER_NEAREST) - frame = (0.1 * frame + 0.9 * segm).astype(np.uint8) + winName = 'Deep learning semantic segmentation in OpenCV' + cv.namedWindow(winName, cv.WINDOW_AUTOSIZE) - showLegend(classes) + cap = cv.VideoCapture(cv.samples.findFile(args.input) if args.input else 0) + legend = None + while cv.waitKey(1) < 0: + hasFrame, frame = cap.read() + if not hasFrame: + cv.waitKey() + break + if imgWidth == -1: + imgWidth = max(frame.shape[:2]) + fontSize = min(fontSize, (stdSize*imgWidth)/stdImgSize) + fontThickness = max(fontThickness,(stdWeight*imgWidth)//stdImgSize) - # Put efficiency information. - t, _ = net.getPerfProfile() - label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency()) - cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0)) + cv.imshow("Original Image", frame) + frameHeight = frame.shape[0] + frameWidth = frame.shape[1] + # Create a 4D blob from a frame. + inpWidth = args.width if args.width else frameWidth + inpHeight = args.height if args.height else frameHeight - cv.imshow(winName, frame) \ No newline at end of file + blob = cv.dnn.blobFromImage(frame, args.scale, (inpWidth, inpHeight), args.mean, args.rgb, crop=False) + net.setInput(blob) + + if args.alias == 'u2netp': + output = net.forward(net.getUnconnectedOutLayersNames()) + pred = output[0][0, 0, :, :] + mask = (pred * 255).astype(np.uint8) + mask = cv.resize(mask, (frame.shape[1], frame.shape[0]), interpolation=cv.INTER_AREA) + # Create overlays for foreground and background + foreground_overlay = np.zeros_like(frame, dtype=np.uint8) + # Set foreground (object) to red and background to blue + foreground_overlay[:, :, 2] = mask # Red foreground + # Blend the overlays with the original frame + frame = cv.addWeighted(frame, 0.25, foreground_overlay, 0.75, 0) + else: + score = net.forward() + + numClasses = score.shape[1] + height = score.shape[2] + width = score.shape[3] + # Draw segmentation + if not colors: + # Generate colors + colors = [np.array([0, 0, 0], np.uint8)] + for i in range(1, numClasses): + colors.append((colors[i - 1] + np.random.randint(0, 256, [3], np.uint8)) / 2) + classIds = np.argmax(score[0], axis=0) + segm = np.stack([colors[idx] for idx in classIds.flatten()]) + segm = segm.reshape(height, width, 3) + + segm = cv.resize(segm, (frameWidth, frameHeight), interpolation=cv.INTER_NEAREST) + frame = (0.1 * frame + 0.9 * segm).astype(np.uint8) + + showLegend(labels, colors, legend) + + # Put efficiency information. + t, _ = net.getPerfProfile() + label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency()) + labelSize, _ = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, fontSize, fontThickness) + cv.rectangle(frame, (0, 0), (labelSize[0]+10, labelSize[1]), (255,255,255), cv.FILLED) + cv.putText(frame, label, (10, int(25*fontSize)), cv.FONT_HERSHEY_SIMPLEX, fontSize, (0, 0, 0), fontThickness) + + cv.imshow(winName, frame) + +if __name__ == "__main__": + main() \ No newline at end of file