diff --git a/samples/dnn/segmentation.cpp b/samples/dnn/segmentation.cpp
index d017c15c68..1c9e976017 100644
--- a/samples/dnn/segmentation.cpp
+++ b/samples/dnn/segmentation.cpp
@@ -12,81 +12,176 @@ using namespace cv;
 using namespace std;
 using namespace dnn;
 
+const string about =
+        "Use this script to run semantic segmentation deep learning networks using OpenCV.\n\n"
+        "Firstly, download required models using `download_models.py` (if not already done). Set environment variable OPENCV_DOWNLOAD_CACHE_DIR to specify where models should be downloaded. Also, point OPENCV_SAMPLES_DATA_PATH to opencv/samples/data.\n"
+        "To run:\n"
+        "\t ./example_dnn_classification modelName(e.g. u2netp) --input=$OPENCV_SAMPLES_DATA_PATH/butterfly.jpg (or ignore this argument to use device camera)\n"
+        "Model path can also be specified using --model argument.";
+
 const string param_keys =
-    "{ help  h    |            | Print help message. }"
-    "{ @alias     |            | An alias name of model to extract preprocessing parameters from models.yml file. }"
-    "{ zoo        | models.yml | An optional path to file with preprocessing parameters }"
-    "{ device     |      0     | camera device number. }"
-    "{ input i    |            | Path to input image or video file. Skip this argument to capture frames from a camera. }"
-    "{ classes    |            | Optional path to a text file with names of classes. }"
-    "{ colors     |            | Optional path to a text file with colors for an every class. "
+    "{ help  h    |                   | Print help message. }"
+    "{ @alias     |                   | An alias name of model to extract preprocessing parameters from models.yml file. }"
+    "{ zoo        | ../dnn/models.yml | An optional path to file with preprocessing parameters }"
+    "{ device     |         0         | camera device number. }"
+    "{ input i    |                   | Path to input image or video file. Skip this argument to capture frames from a camera. }"
+    "{ colors     |                   | Optional path to a text file with colors for an every class. "
     "Every color is represented with three values from 0 to 255 in BGR channels order. }";
 
 const string backend_keys = format(
-    "{ backend   | 0 | Choose one of computation backends: "
-    "%d: automatically (by default), "
-    "%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
-    "%d: OpenCV implementation, "
-    "%d: VKCOM, "
-    "%d: CUDA }",
-    DNN_BACKEND_DEFAULT, DNN_BACKEND_INFERENCE_ENGINE, DNN_BACKEND_OPENCV, DNN_BACKEND_VKCOM, DNN_BACKEND_CUDA);
+    "{ backend          | default | Choose one of computation backends: "
+                              "default: automatically (by default), "
+                              "openvino: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
+                              "opencv: OpenCV implementation, "
+                              "vkcom: VKCOM, "
+                              "cuda: CUDA, "
+                              "webnn: WebNN }");
 
 const string target_keys = format(
-    "{ target    | 0 | Choose one of target computation devices: "
-    "%d: CPU target (by default), "
-    "%d: OpenCL, "
-    "%d: OpenCL fp16 (half-float precision), "
-    "%d: VPU, "
-    "%d: Vulkan, "
-    "%d: CUDA, "
-    "%d: CUDA fp16 (half-float preprocess) }",
-    DNN_TARGET_CPU, DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16, DNN_TARGET_MYRIAD, DNN_TARGET_VULKAN, DNN_TARGET_CUDA, DNN_TARGET_CUDA_FP16);
+    "{ target           | cpu | Choose one of target computation devices: "
+                              "cpu: CPU target (by default), "
+                              "opencl: OpenCL, "
+                              "opencl_fp16: OpenCL fp16 (half-float precision), "
+                              "vpu: VPU, "
+                              "vulkan: Vulkan, "
+                              "cuda: CUDA, "
+                              "cuda_fp16: CUDA fp16 (half-float preprocess) }");
 
 string keys = param_keys + backend_keys + target_keys;
-vector<string> classes;
+vector<string> labels;
 vector<Vec3b> colors;
 
-void showLegend();
 
-void colorizeSegmentation(const Mat &score, Mat &segm);
+static void colorizeSegmentation(const Mat &score, Mat &segm)
+{
+    const int rows = score.size[2];
+    const int cols = score.size[3];
+    const int chns = score.size[1];
+
+    if (colors.empty())
+    {
+        // Generate colors.
+        colors.push_back(Vec3b());
+        for (int i = 1; i < chns; ++i)
+        {
+            Vec3b color;
+            for (int j = 0; j < 3; ++j)
+                color[j] = (colors[i - 1][j] + rand() % 256) / 2;
+            colors.push_back(color);
+        }
+    }
+    else if (chns != (int)colors.size())
+    {
+        CV_Error(Error::StsError, format("Number of output labels does not match "
+                                         "number of colors (%d != %zu)",
+                                         chns, colors.size()));
+    }
+
+    Mat maxCl = Mat::zeros(rows, cols, CV_8UC1);
+    Mat maxVal(rows, cols, CV_32FC1, score.data);
+    for (int ch = 1; ch < chns; ch++)
+    {
+        for (int row = 0; row < rows; row++)
+        {
+            const float *ptrScore = score.ptr<float>(0, ch, row);
+            uint8_t *ptrMaxCl = maxCl.ptr<uint8_t>(row);
+            float *ptrMaxVal = maxVal.ptr<float>(row);
+            for (int col = 0; col < cols; col++)
+            {
+                if (ptrScore[col] > ptrMaxVal[col])
+                {
+                    ptrMaxVal[col] = ptrScore[col];
+                    ptrMaxCl[col] = (uchar)ch;
+                }
+            }
+        }
+    }
+    segm.create(rows, cols, CV_8UC3);
+    for (int row = 0; row < rows; row++)
+    {
+        const uchar *ptrMaxCl = maxCl.ptr<uchar>(row);
+        Vec3b *ptrSegm = segm.ptr<Vec3b>(row);
+        for (int col = 0; col < cols; col++)
+        {
+            ptrSegm[col] = colors[ptrMaxCl[col]];
+        }
+    }
+}
+
+static void showLegend(FontFace fontFace)
+{
+    static const int kBlockHeight = 30;
+    static Mat legend;
+    if (legend.empty())
+    {
+        const int numClasses = (int)labels.size();
+        if ((int)colors.size() != numClasses)
+        {
+            CV_Error(Error::StsError, format("Number of output labels does not match "
+                                             "number of labels (%zu != %zu)",
+                                             colors.size(), labels.size()));
+        }
+        legend.create(kBlockHeight * numClasses, 200, CV_8UC3);
+        for (int i = 0; i < numClasses; i++)
+        {
+            Mat block = legend.rowRange(i * kBlockHeight, (i + 1) * kBlockHeight);
+            block.setTo(colors[i]);
+            Rect r = getTextSize(Size(), labels[i], Point(), fontFace, 15, 400);
+            r.height += 15; // padding
+            r.width += 10; // padding
+            rectangle(block, r, Scalar::all(255), FILLED);
+            putText(block, labels[i], Point(10, kBlockHeight/2), Scalar(0,0,0), fontFace, 15, 400);
+        }
+        namedWindow("Legend", WINDOW_AUTOSIZE);
+        imshow("Legend", legend);
+    }
+}
 
 int main(int argc, char **argv)
 {
     CommandLineParser parser(argc, argv, keys);
 
     const string modelName = parser.get<String>("@alias");
-    const string zooFile = parser.get<String>("zoo");
+    const string zooFile = findFile(parser.get<String>("zoo"));
 
     keys += genPreprocArguments(modelName, zooFile);
 
     parser = CommandLineParser(argc, argv, keys);
-    parser.about("Use this script to run semantic segmentation deep learning networks using OpenCV.");
-    if (argc == 1 || parser.has("help"))
+    parser.about(about);
+    if (!parser.has("@alias") || parser.has("help"))
     {
         parser.printMessage();
         return 0;
     }
 
+    string sha1 = parser.get<String>("sha1");
     float scale = parser.get<float>("scale");
     Scalar mean = parser.get<Scalar>("mean");
     bool swapRB = parser.get<bool>("rgb");
     int inpWidth = parser.get<int>("width");
     int inpHeight = parser.get<int>("height");
-    String model = findFile(parser.get<String>("model"));
-    int backendId = parser.get<int>("backend");
-    int targetId = parser.get<int>("target");
+    String model = findModel(parser.get<String>("model"), sha1);
+    const string backend = parser.get<String>("backend");
+    const string target = parser.get<String>("target");
+    int stdSize = 20;
+    int stdWeight = 400;
+    int stdImgSize = 512;
+    int imgWidth = -1; // Initialization
+    int fontSize = 50;
+    int fontWeight = 500;
+    FontFace fontFace("sans");
 
-    // Open file with classes names.
-    if (parser.has("classes"))
+    // Open file with labels names.
+    if (parser.has("labels"))
     {
-        string file = findFile(parser.get<String>("classes"));
+        string file = findFile(parser.get<String>("labels"));
         ifstream ifs(file.c_str());
         if (!ifs.is_open())
             CV_Error(Error::StsError, "File " + file + " not found");
         string line;
         while (getline(ifs, line))
         {
-            classes.push_back(line);
+            labels.push_back(line);
         }
     }
     // Open file with colors.
@@ -116,13 +211,17 @@ int main(int argc, char **argv)
 
     CV_Assert(!model.empty());
     //! [Read and initialize network]
-    Net net = readNetFromONNX(model);
-    net.setPreferableBackend(backendId);
-    net.setPreferableTarget(targetId);
+    EngineType engine = ENGINE_AUTO;
+    if (backend != "default" || target != "cpu"){
+        engine = ENGINE_CLASSIC;
+    }
+    Net net = readNetFromONNX(model, engine);
+    net.setPreferableBackend(getBackendID(backend));
+    net.setPreferableTarget(getTargetID(target));
     //! [Read and initialize network]
     // Create a window
     static const string kWinName = "Deep learning semantic segmentation in OpenCV";
-    namedWindow(kWinName, WINDOW_NORMAL);
+    namedWindow(kWinName, WINDOW_AUTOSIZE);
 
     //! [Open a video file or an image file or a camera stream]
     VideoCapture cap;
@@ -141,6 +240,11 @@ int main(int argc, char **argv)
             waitKey();
             break;
         }
+        if (imgWidth == -1){
+            imgWidth = max(frame.rows, frame.cols);
+            fontSize = min(fontSize, (stdSize*imgWidth)/stdImgSize);
+            fontWeight = min(fontWeight, (stdWeight*imgWidth)/stdImgSize);
+        }
         imshow("Original Image", frame);
         //! [Create a 4D blob from a frame]
         blobFromImage(frame, blob, scale, Size(inpWidth, inpHeight), mean, swapRB, false);
@@ -185,92 +289,15 @@ int main(int argc, char **argv)
         double freq = getTickFrequency() / 1000;
         double t = net.getPerfProfile(layersTimes) / freq;
         string label = format("Inference time: %.2f ms", t);
-        putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
+        Rect r = getTextSize(Size(), label, Point(), fontFace, fontSize, fontWeight);
+        r.height += fontSize; // padding
+        r.width += 10; // padding
+        rectangle(frame, r, Scalar::all(255), FILLED);
+        putText(frame, label, Point(10, fontSize), Scalar(0,0,0), fontFace, fontSize, fontWeight);
 
         imshow(kWinName, frame);
-        if (!classes.empty())
-            showLegend();
+        if (!labels.empty())
+            showLegend(fontFace);
     }
     return 0;
 }
-
-void colorizeSegmentation(const Mat &score, Mat &segm)
-{
-    const int rows = score.size[2];
-    const int cols = score.size[3];
-    const int chns = score.size[1];
-
-    if (colors.empty())
-    {
-        // Generate colors.
-        colors.push_back(Vec3b());
-        for (int i = 1; i < chns; ++i)
-        {
-            Vec3b color;
-            for (int j = 0; j < 3; ++j)
-                color[j] = (colors[i - 1][j] + rand() % 256) / 2;
-            colors.push_back(color);
-        }
-    }
-    else if (chns != (int)colors.size())
-    {
-        CV_Error(Error::StsError, format("Number of output classes does not match "
-                                         "number of colors (%d != %zu)",
-                                         chns, colors.size()));
-    }
-
-    Mat maxCl = Mat::zeros(rows, cols, CV_8UC1);
-    Mat maxVal(rows, cols, CV_32FC1, score.data);
-    for (int ch = 1; ch < chns; ch++)
-    {
-        for (int row = 0; row < rows; row++)
-        {
-            const float *ptrScore = score.ptr<float>(0, ch, row);
-            uint8_t *ptrMaxCl = maxCl.ptr<uint8_t>(row);
-            float *ptrMaxVal = maxVal.ptr<float>(row);
-            for (int col = 0; col < cols; col++)
-            {
-                if (ptrScore[col] > ptrMaxVal[col])
-                {
-                    ptrMaxVal[col] = ptrScore[col];
-                    ptrMaxCl[col] = (uchar)ch;
-                }
-            }
-        }
-    }
-    segm.create(rows, cols, CV_8UC3);
-    for (int row = 0; row < rows; row++)
-    {
-        const uchar *ptrMaxCl = maxCl.ptr<uchar>(row);
-        Vec3b *ptrSegm = segm.ptr<Vec3b>(row);
-        for (int col = 0; col < cols; col++)
-        {
-            ptrSegm[col] = colors[ptrMaxCl[col]];
-        }
-    }
-}
-
-void showLegend()
-{
-    static const int kBlockHeight = 30;
-    static Mat legend;
-    if (legend.empty())
-    {
-        const int numClasses = (int)classes.size();
-        if ((int)colors.size() != numClasses)
-        {
-            CV_Error(Error::StsError, format("Number of output classes does not match "
-                                             "number of labels (%zu != %zu)",
-                                             colors.size(), classes.size()));
-        }
-        legend.create(kBlockHeight * numClasses, 200, CV_8UC3);
-        for (int i = 0; i < numClasses; i++)
-        {
-            Mat block = legend.rowRange(i * kBlockHeight, (i + 1) * kBlockHeight);
-            block.setTo(colors[i]);
-            putText(block, classes[i], Point(0, kBlockHeight / 2), FONT_HERSHEY_SIMPLEX, 0.5, Vec3b(255, 255, 255));
-        }
-        namedWindow("Legend", WINDOW_NORMAL);
-        imshow("Legend", legend);
-    }
-}
diff --git a/samples/dnn/segmentation.py b/samples/dnn/segmentation.py
index bca11bc1d8..39b5edc8b5 100644
--- a/samples/dnn/segmentation.py
+++ b/samples/dnn/segmentation.py
@@ -1,140 +1,176 @@
 import cv2 as cv
 import argparse
 import numpy as np
-import sys
 
 from common import *
 
-backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV,
-            cv.dnn.DNN_BACKEND_VKCOM, cv.dnn.DNN_BACKEND_CUDA)
-targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD, cv.dnn.DNN_TARGET_HDDL,
-           cv.dnn.DNN_TARGET_VULKAN, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16)
+def help():
+    print(
+        '''
+        Firstly, download required models using `download_models.py` (if not already done). Set environment variable OPENCV_DOWNLOAD_CACHE_DIR to specify where models should be downloaded. Also, point OPENCV_SAMPLES_DATA_PATH to opencv/samples/data.\n"\n
 
-parser = argparse.ArgumentParser(add_help=False)
-parser.add_argument('--zoo', default=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'models.yml'),
-                    help='An optional path to file with preprocessing parameters.')
-parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.')
-parser.add_argument('--colors', help='Optional path to a text file with colors for an every class. '
-                                     'An every color is represented with three values from 0 to 255 in BGR channels order.')
-parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int,
+        To run:
+            python segmentation.py model_name(e.g. u2netp) --input=path/to/your/input/image/or/video (don't give --input flag if want to use device camera)
+
+        Model path can also be specified using --model argument
+        '''
+    )
+
+def get_args_parser(func_args):
+    backends = ("default", "openvino", "opencv", "vkcom", "cuda")
+    targets = ("cpu", "opencl", "opencl_fp16", "ncs2_vpu", "hddl_vpu", "vulkan", "cuda", "cuda_fp16")
+
+    parser = argparse.ArgumentParser(add_help=False)
+    parser.add_argument('--zoo', default=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'models.yml'),
+                        help='An optional path to file with preprocessing parameters.')
+    parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.')
+    parser.add_argument('--colors', help='Optional path to a text file with colors for an every class. '
+                                        'An every color is represented with three values from 0 to 255 in BGR channels order.')
+    parser.add_argument('--backend', default="default", type=str, choices=backends,
                     help="Choose one of computation backends: "
-                         "%d: automatically (by default), "
-                         "%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
-                         "%d: OpenCV implementation, "
-                         "%d: VKCOM, "
-                         "%d: CUDA"% backends)
-parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int,
-                    help='Choose one of target computation devices: '
-                         '%d: CPU target (by default), '
-                         '%d: OpenCL, '
-                         '%d: OpenCL fp16 (half-float precision), '
-                         '%d: NCS2 VPU, '
-                         '%d: HDDL VPU, '
-                         '%d: Vulkan, '
-                         '%d: CUDA, '
-                         '%d: CUDA fp16 (half-float preprocess)'% targets)
-args, _ = parser.parse_known_args()
-add_preproc_args(args.zoo, parser, 'segmentation')
-parser = argparse.ArgumentParser(parents=[parser],
-                                 description='Use this script to run semantic segmentation deep learning networks using OpenCV.',
-                                 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-args = parser.parse_args()
+                         "default: automatically (by default), "
+                         "openvino: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
+                         "opencv: OpenCV implementation, "
+                         "vkcom: VKCOM, "
+                         "cuda: CUDA, "
+                         "webnn: WebNN")
+    parser.add_argument('--target', default="cpu", type=str, choices=targets,
+                    help="Choose one of target computation devices: "
+                         "cpu: CPU target (by default), "
+                         "opencl: OpenCL, "
+                         "opencl_fp16: OpenCL fp16 (half-float precision), "
+                         "ncs2_vpu: NCS2 VPU, "
+                         "hddl_vpu: HDDL VPU, "
+                         "vulkan: Vulkan, "
+                         "cuda: CUDA, "
+                         "cuda_fp16: CUDA fp16 (half-float preprocess)")
 
-args.model = findFile(args.model)
-args.classes = findFile(args.classes)
+    args, _ = parser.parse_known_args()
+    add_preproc_args(args.zoo, parser, 'segmentation')
+    parser = argparse.ArgumentParser(parents=[parser],
+                                    description='Use this script to run semantic segmentation deep learning networks using OpenCV.',
+                                    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    return parser.parse_args(func_args)
 
-np.random.seed(324)
-
-# Load names of classes
-classes = None
-if args.classes:
-    with open(args.classes, 'rt') as f:
-        classes = f.read().rstrip('\n').split('\n')
-
-# Load colors
-colors = None
-if args.colors:
-    with open(args.colors, 'rt') as f:
-        colors = [np.array(color.split(' '), np.uint8) for color in f.read().rstrip('\n').split('\n')]
-
-legend = None
-def showLegend(classes):
-    global legend
-    if not classes is None and legend is None:
+def showLegend(labels, colors, legend):
+    if not labels is None and legend is None:
         blockHeight = 30
-        assert(len(classes) == len(colors))
+        assert(len(labels) == len(colors))
 
         legend = np.zeros((blockHeight * len(colors), 200, 3), np.uint8)
-        for i in range(len(classes)):
+        for i in range(len(labels)):
             block = legend[i * blockHeight:(i + 1) * blockHeight]
             block[:,:] = colors[i]
-            cv.putText(block, classes[i], (0, blockHeight//2), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255))
+            cv.putText(block, labels[i], (0, blockHeight//2), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))
 
-        cv.namedWindow('Legend', cv.WINDOW_NORMAL)
+        cv.namedWindow('Legend', cv.WINDOW_AUTOSIZE)
         cv.imshow('Legend', legend)
-        classes = None
+        labels = None
 
-# Load a network
-net = cv.dnn.readNetFromONNX(args.model)
-net.setPreferableBackend(args.backend)
-net.setPreferableTarget(args.target)
+def main(func_args=None):
+    args = get_args_parser(func_args)
+    if args.alias is None or hasattr(args, 'help'):
+        help()
+        exit(1)
 
-winName = 'Deep learning semantic segmentation in OpenCV'
-cv.namedWindow(winName, cv.WINDOW_NORMAL)
+    args.model = findModel(args.model, args.sha1)
+    if args.labels is not None:
+        args.labels = findFile(args.labels)
 
-cap = cv.VideoCapture(cv.samples.findFile(args.input) if args.input else 0)
-legend = None
-while cv.waitKey(1) < 0:
-    hasFrame, frame = cap.read()
-    if not hasFrame:
-        cv.waitKey()
-        break
+    np.random.seed(324)
 
-    cv.imshow("Original Image", frame)
-    frameHeight = frame.shape[0]
-    frameWidth = frame.shape[1]
-    # Create a 4D blob from a frame.
-    inpWidth = args.width if args.width else frameWidth
-    inpHeight = args.height if args.height else frameHeight
+    stdSize = 0.8
+    stdWeight = 2
+    stdImgSize = 512
+    imgWidth = -1 # Initialization
+    fontSize = 1.5
+    fontThickness = 1
 
-    blob = cv.dnn.blobFromImage(frame, args.scale, (inpWidth, inpHeight), args.mean, args.rgb, crop=False)
-    net.setInput(blob)
+    # Load names of labels
+    labels = None
+    if args.labels:
+        with open(args.labels, 'rt') as f:
+            labels = f.read().rstrip('\n').split('\n')
 
-    if args.alias == 'u2netp':
-        output = net.forward(net.getUnconnectedOutLayersNames())
-        pred = output[0][0, 0, :, :]
-        mask = (pred * 255).astype(np.uint8)
-        mask = cv.resize(mask, (frame.shape[1], frame.shape[0]), interpolation=cv.INTER_AREA)
-        # Create overlays for foreground and background
-        foreground_overlay = np.zeros_like(frame, dtype=np.uint8)
-        # Set foreground (object) to red and background to blue
-        foreground_overlay[:, :, 2] = mask  # Red foreground
-        # Blend the overlays with the original frame
-        frame = cv.addWeighted(frame, 0.25, foreground_overlay, 0.75, 0)
-    else:
-        score = net.forward()
+    # Load colors
+    colors = None
+    if args.colors:
+        with open(args.colors, 'rt') as f:
+            colors = [np.array(color.split(' '), np.uint8) for color in f.read().rstrip('\n').split('\n')]
 
-        numClasses = score.shape[1]
-        height = score.shape[2]
-        width = score.shape[3]
-        # Draw segmentation
-        if not colors:
-            # Generate colors
-            colors = [np.array([0, 0, 0], np.uint8)]
-            for i in range(1, numClasses):
-                colors.append((colors[i - 1] + np.random.randint(0, 256, [3], np.uint8)) / 2)
-        classIds = np.argmax(score[0], axis=0)
-        segm = np.stack([colors[idx] for idx in classIds.flatten()])
-        segm = segm.reshape(height, width, 3)
+    # Load a network
+    engine = cv.dnn.ENGINE_AUTO
+    if args.backend != "default" or args.target != "cpu":
+        engine = cv.dnn.ENGINE_CLASSIC
+    net = cv.dnn.readNetFromONNX(args.model, engine)
+    net.setPreferableBackend(get_backend_id(args.backend))
+    net.setPreferableTarget(get_target_id(args.target))
 
-        segm = cv.resize(segm, (frameWidth, frameHeight), interpolation=cv.INTER_NEAREST)
-        frame = (0.1 * frame + 0.9 * segm).astype(np.uint8)
+    winName = 'Deep learning semantic segmentation in OpenCV'
+    cv.namedWindow(winName, cv.WINDOW_AUTOSIZE)
 
-        showLegend(classes)
+    cap = cv.VideoCapture(cv.samples.findFile(args.input) if args.input else 0)
+    legend = None
+    while cv.waitKey(1) < 0:
+        hasFrame, frame = cap.read()
+        if not hasFrame:
+            cv.waitKey()
+            break
+        if imgWidth == -1:
+            imgWidth = max(frame.shape[:2])
+            fontSize = min(fontSize, (stdSize*imgWidth)/stdImgSize)
+            fontThickness = max(fontThickness,(stdWeight*imgWidth)//stdImgSize)
 
-    # Put efficiency information.
-    t, _ = net.getPerfProfile()
-    label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
-    cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))
+        cv.imshow("Original Image", frame)
+        frameHeight = frame.shape[0]
+        frameWidth = frame.shape[1]
+        # Create a 4D blob from a frame.
+        inpWidth = args.width if args.width else frameWidth
+        inpHeight = args.height if args.height else frameHeight
 
-    cv.imshow(winName, frame)
\ No newline at end of file
+        blob = cv.dnn.blobFromImage(frame, args.scale, (inpWidth, inpHeight), args.mean, args.rgb, crop=False)
+        net.setInput(blob)
+
+        if args.alias == 'u2netp':
+            output = net.forward(net.getUnconnectedOutLayersNames())
+            pred = output[0][0, 0, :, :]
+            mask = (pred * 255).astype(np.uint8)
+            mask = cv.resize(mask, (frame.shape[1], frame.shape[0]), interpolation=cv.INTER_AREA)
+            # Create overlays for foreground and background
+            foreground_overlay = np.zeros_like(frame, dtype=np.uint8)
+            # Set foreground (object) to red and background to blue
+            foreground_overlay[:, :, 2] = mask  # Red foreground
+            # Blend the overlays with the original frame
+            frame = cv.addWeighted(frame, 0.25, foreground_overlay, 0.75, 0)
+        else:
+            score = net.forward()
+
+            numClasses = score.shape[1]
+            height = score.shape[2]
+            width = score.shape[3]
+            # Draw segmentation
+            if not colors:
+                # Generate colors
+                colors = [np.array([0, 0, 0], np.uint8)]
+                for i in range(1, numClasses):
+                    colors.append((colors[i - 1] + np.random.randint(0, 256, [3], np.uint8)) / 2)
+            classIds = np.argmax(score[0], axis=0)
+            segm = np.stack([colors[idx] for idx in classIds.flatten()])
+            segm = segm.reshape(height, width, 3)
+
+            segm = cv.resize(segm, (frameWidth, frameHeight), interpolation=cv.INTER_NEAREST)
+            frame = (0.1 * frame + 0.9 * segm).astype(np.uint8)
+
+            showLegend(labels, colors, legend)
+
+        # Put efficiency information.
+        t, _ = net.getPerfProfile()
+        label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
+        labelSize, _ = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, fontSize, fontThickness)
+        cv.rectangle(frame, (0, 0), (labelSize[0]+10, labelSize[1]), (255,255,255), cv.FILLED)
+        cv.putText(frame, label, (10, int(25*fontSize)), cv.FONT_HERSHEY_SIMPLEX, fontSize, (0, 0, 0), fontThickness)
+
+        cv.imshow(winName, frame)
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file