diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp index 2f8039dc47..4ab85a4fe3 100644 --- a/modules/core/include/opencv2/core.hpp +++ b/modules/core/include/opencv2/core.hpp @@ -3159,7 +3159,7 @@ protected: struct Param { enum { INT=0, BOOLEAN=1, REAL=2, STRING=3, MAT=4, MAT_VECTOR=5, ALGORITHM=6, FLOAT=7, - UNSIGNED_INT=8, UINT64=9, UCHAR=11 }; + UNSIGNED_INT=8, UINT64=9, UCHAR=11, SCALAR=12 }; }; @@ -3252,6 +3252,14 @@ template<> struct ParamType enum { type = Param::UCHAR }; }; +template<> struct ParamType +{ + typedef const Scalar& const_param_type; + typedef Scalar member_type; + + enum { type = Param::SCALAR }; +}; + //! @} core_basic } //namespace cv diff --git a/modules/core/src/command_line_parser.cpp b/modules/core/src/command_line_parser.cpp index 6529bfd704..7d5c6cc03a 100644 --- a/modules/core/src/command_line_parser.cpp +++ b/modules/core/src/command_line_parser.cpp @@ -104,6 +104,12 @@ static void from_str(const String& str, int type, void* dst) ss >> *(double*)dst; else if( type == Param::STRING ) *(String*)dst = str; + else if( type == Param::SCALAR) + { + Scalar& scalar = *(Scalar*)dst; + for (int i = 0; i < 4 && !ss.eof(); ++i) + ss >> scalar[i]; + } else CV_Error(Error::StsBadArg, "unknown/unsupported parameter type"); diff --git a/samples/dnn/classification_classes_ILSVRC2012.txt b/samples/data/dnn/classification_classes_ILSVRC2012.txt similarity index 100% rename from samples/dnn/classification_classes_ILSVRC2012.txt rename to samples/data/dnn/classification_classes_ILSVRC2012.txt diff --git a/samples/data/dnn/enet-classes.txt b/samples/data/dnn/enet-classes.txt new file mode 100644 index 0000000000..1616563ac1 --- /dev/null +++ b/samples/data/dnn/enet-classes.txt @@ -0,0 +1,20 @@ +Unlabeled +Road +Sidewalk +Building +Wall +Fence +Pole +TrafficLight +TrafficSign +Vegetation +Terrain +Sky +Person +Rider +Car +Truck +Bus +Train +Motorcycle +Bicycle diff --git a/samples/dnn/object_detection_classes_coco.txt b/samples/data/dnn/object_detection_classes_coco.txt similarity index 100% rename from samples/dnn/object_detection_classes_coco.txt rename to samples/data/dnn/object_detection_classes_coco.txt diff --git a/samples/dnn/object_detection_classes_pascal_voc.txt b/samples/data/dnn/object_detection_classes_pascal_voc.txt similarity index 100% rename from samples/dnn/object_detection_classes_pascal_voc.txt rename to samples/data/dnn/object_detection_classes_pascal_voc.txt diff --git a/samples/dnn/README.md b/samples/dnn/README.md index fea2025bda..121b703093 100644 --- a/samples/dnn/README.md +++ b/samples/dnn/README.md @@ -20,7 +20,14 @@ | GoogLeNet | `1.0` | `224x224` | `104 117 123` | BGR | | [SqueezeNet](https://github.com/DeepScale/SqueezeNet) | `1.0` | `227x227` | `0 0 0` | BGR | +### Semantic segmentation +| Model | Scale | Size WxH| Mean subtraction | Channels order | +|---------------|-------|-----------|--------------------|-------| +| [ENet](https://github.com/e-lab/ENet-training) | `0.00392 (1/255)` | `1024x512` | `0 0 0` | RGB | +| FCN8s | `1.0` | `500x500` | `0 0 0` | BGR | + ## References * [Models downloading script](https://github.com/opencv/opencv_extra/blob/master/testdata/dnn/download_models.py) * [Configuration files adopted for OpenCV](https://github.com/opencv/opencv_extra/tree/master/testdata/dnn) * [How to import models from TensorFlow Object Detection API](https://github.com/opencv/opencv/wiki/TensorFlow-Object-Detection-API) +* [Names of classes from different datasets](https://github.com/opencv/opencv/tree/master/samples/data/dnn) diff --git a/samples/dnn/classification.cpp b/samples/dnn/classification.cpp index d3ae08ee6c..9407326831 100644 --- a/samples/dnn/classification.cpp +++ b/samples/dnn/classification.cpp @@ -1,5 +1,4 @@ #include -#include #include #include @@ -17,17 +16,17 @@ const char* keys = "{ framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. }" "{ classes | | Optional path to a text file with names of classes. }" "{ mean | | Preprocess input image by subtracting mean values. Mean values should be in BGR order and delimited by spaces. }" - "{ scale | 1 | Preprocess input image by multiplying on a scale factor. }" - "{ width | -1 | Preprocess input image by resizing to a specific width. }" - "{ height | -1 | Preprocess input image by resizing to a specific height. }" - "{ rgb | | Indicate that model works with RGB input images instead BGR ones. }" - "{ backend | 0 | Choose one of computation backends: " - "0: default C++ backend, " - "1: Halide language (http://halide-lang.org/), " - "2: Intel's Deep Learning Inference Engine (https://software.seek.intel.com/deep-learning-deployment)}" - "{ target | 0 | Choose one of target computation devices: " - "0: CPU target (by default)," - "1: OpenCL }"; + "{ scale | 1 | Preprocess input image by multiplying on a scale factor. }" + "{ width | | Preprocess input image by resizing to a specific width. }" + "{ height | | Preprocess input image by resizing to a specific height. }" + "{ rgb | | Indicate that model works with RGB input images instead BGR ones. }" + "{ backend | 0 | Choose one of computation backends: " + "0: default C++ backend, " + "1: Halide language (http://halide-lang.org/), " + "2: Intel's Deep Learning Inference Engine (https://software.seek.intel.com/deep-learning-deployment)}" + "{ target | 0 | Choose one of target computation devices: " + "0: CPU target (by default)," + "1: OpenCL }"; using namespace cv; using namespace dnn; @@ -45,7 +44,9 @@ int main(int argc, char** argv) } float scale = parser.get("scale"); + Scalar mean = parser.get("mean"); bool swapRB = parser.get("rgb"); + CV_Assert(parser.has("width"), parser.has("height")); int inpWidth = parser.get("width"); int inpHeight = parser.get("height"); String model = parser.get("model"); @@ -54,19 +55,6 @@ int main(int argc, char** argv) int backendId = parser.get("backend"); int targetId = parser.get("target"); - // Parse mean values. - Scalar mean; - if (parser.has("mean")) - { - std::istringstream meanStr(parser.get("mean")); - std::vector meanValues; - float val; - while (meanStr >> val) - meanValues.push_back(val); - CV_Assert(meanValues.size() == 3); - mean = Scalar(meanValues[0], meanValues[1], meanValues[2]); - } - // Open file with classes names. if (parser.has("classes")) { diff --git a/samples/dnn/fcn_semsegm.cpp b/samples/dnn/fcn_semsegm.cpp deleted file mode 100644 index 7789d0db80..0000000000 --- a/samples/dnn/fcn_semsegm.cpp +++ /dev/null @@ -1,138 +0,0 @@ -#include -#include -#include -using namespace cv; -using namespace cv::dnn; - -#include -#include -#include -using namespace std; - -static const string fcnType = "fcn8s"; - -static vector readColors(const string &filename = "pascal-classes.txt") -{ - vector colors; - - ifstream fp(filename.c_str()); - if (!fp.is_open()) - { - cerr << "File with colors not found: " << filename << endl; - exit(-1); - } - - string line; - while (!fp.eof()) - { - getline(fp, line); - if (line.length()) - { - stringstream ss(line); - - string name; ss >> name; - int temp; - cv::Vec3b color; - ss >> temp; color[0] = (uchar)temp; - ss >> temp; color[1] = (uchar)temp; - ss >> temp; color[2] = (uchar)temp; - colors.push_back(color); - } - } - - fp.close(); - return colors; -} - -static void colorizeSegmentation(const Mat &score, const vector &colors, cv::Mat &segm) -{ - const int rows = score.size[2]; - const int cols = score.size[3]; - const int chns = score.size[1]; - - cv::Mat maxCl=cv::Mat::zeros(rows, cols, CV_8UC1); - cv::Mat maxVal(rows, cols, CV_32FC1, cv::Scalar(-FLT_MAX)); - for (int ch = 0; ch < chns; ch++) - { - for (int row = 0; row < rows; row++) - { - const float *ptrScore = score.ptr(0, ch, row); - uchar *ptrMaxCl = maxCl.ptr(row); - float *ptrMaxVal = maxVal.ptr(row); - for (int col = 0; col < cols; col++) - { - if (ptrScore[col] > ptrMaxVal[col]) - { - ptrMaxVal[col] = ptrScore[col]; - ptrMaxCl[col] = (uchar)ch; - } - } - } - } - - segm.create(rows, cols, CV_8UC3); - for (int row = 0; row < rows; row++) - { - const uchar *ptrMaxCl = maxCl.ptr(row); - cv::Vec3b *ptrSegm = segm.ptr(row); - for (int col = 0; col < cols; col++) - { - ptrSegm[col] = colors[ptrMaxCl[col]]; - } - } - -} - -int main(int argc, char **argv) -{ - String modelTxt = fcnType + "-heavy-pascal.prototxt"; - String modelBin = fcnType + "-heavy-pascal.caffemodel"; - String imageFile = (argc > 1) ? argv[1] : "rgb.jpg"; - - vector colors = readColors(); - - //! [Initialize network] - dnn::Net net = readNetFromCaffe(modelTxt, modelBin); - //! [Initialize network] - - if (net.empty()) - { - cerr << "Can't load network by using the following files: " << endl; - cerr << "prototxt: " << modelTxt << endl; - cerr << "caffemodel: " << modelBin << endl; - cerr << fcnType << "-heavy-pascal.caffemodel can be downloaded here:" << endl; - cerr << "http://dl.caffe.berkeleyvision.org/" << fcnType << "-heavy-pascal.caffemodel" << endl; - exit(-1); - } - - //! [Prepare blob] - Mat img = imread(imageFile); - if (img.empty()) - { - cerr << "Can't read image from the file: " << imageFile << endl; - exit(-1); - } - - resize(img, img, Size(500, 500), 0, 0, INTER_LINEAR_EXACT); //FCN accepts 500x500 BGR-images - Mat inputBlob = blobFromImage(img, 1, Size(), Scalar(), false); //Convert Mat to batch of images - //! [Prepare blob] - - //! [Set input blob] - net.setInput(inputBlob, "data"); //set the network input - //! [Set input blob] - - //! [Make forward pass] - double t = (double)cv::getTickCount(); - Mat score = net.forward("score"); //compute output - t = (double)cv::getTickCount() - t; - printf("processing time: %.1fms\n", t*1000./getTickFrequency()); - //! [Make forward pass] - - Mat colorize; - colorizeSegmentation(score, colors, colorize); - Mat show; - addWeighted(img, 0.4, colorize, 0.6, 0.0, show); - imshow("show", show); - waitKey(0); - return 0; -} //main diff --git a/samples/dnn/object_detection.cpp b/samples/dnn/object_detection.cpp index 81575d2d59..52b08fd009 100644 --- a/samples/dnn/object_detection.cpp +++ b/samples/dnn/object_detection.cpp @@ -1,5 +1,4 @@ #include -#include #include #include @@ -54,23 +53,11 @@ int main(int argc, char** argv) confThreshold = parser.get("thr"); float scale = parser.get("scale"); + Scalar mean = parser.get("mean"); bool swapRB = parser.get("rgb"); int inpWidth = parser.get("width"); int inpHeight = parser.get("height"); - // Parse mean values. - Scalar mean; - if (parser.has("mean")) - { - std::istringstream meanStr(parser.get("mean")); - std::vector meanValues; - float val; - while (meanStr >> val) - meanValues.push_back(val); - CV_Assert(meanValues.size() == 3); - mean = Scalar(meanValues[0], meanValues[1], meanValues[2]); - } - // Open file with classes names. if (parser.has("classes")) { diff --git a/samples/dnn/segmentation.cpp b/samples/dnn/segmentation.cpp new file mode 100644 index 0000000000..a0eb15bc86 --- /dev/null +++ b/samples/dnn/segmentation.cpp @@ -0,0 +1,237 @@ +#include +#include + +#include +#include +#include + +const char* keys = + "{ help h | | Print help message. }" + "{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera.}" + "{ model m | | Path to a binary file of model contains trained weights. " + "It could be a file with extensions .caffemodel (Caffe), " + ".pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet) }" + "{ config c | | Path to a text file of model contains network configuration. " + "It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet) }" + "{ framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. }" + "{ classes | | Optional path to a text file with names of classes. }" + "{ colors | | Optional path to a text file with colors for an every class. " + "An every color is represented with three values from 0 to 255 in BGR channels order. }" + "{ mean | | Preprocess input image by subtracting mean values. Mean values should be in BGR order and delimited by spaces. }" + "{ scale | 1 | Preprocess input image by multiplying on a scale factor. }" + "{ width | | Preprocess input image by resizing to a specific width. }" + "{ height | | Preprocess input image by resizing to a specific height. }" + "{ rgb | | Indicate that model works with RGB input images instead BGR ones. }" + "{ backend | 0 | Choose one of computation backends: " + "0: default C++ backend, " + "1: Halide language (http://halide-lang.org/), " + "2: Intel's Deep Learning Inference Engine (https://software.seek.intel.com/deep-learning-deployment)}" + "{ target | 0 | Choose one of target computation devices: " + "0: CPU target (by default)," + "1: OpenCL }"; + +using namespace cv; +using namespace dnn; + +std::vector classes; +std::vector colors; + +void showLegend(); + +void colorizeSegmentation(const Mat &score, Mat &segm); + +int main(int argc, char** argv) +{ + CommandLineParser parser(argc, argv, keys); + parser.about("Use this script to run semantic segmentation deep learning networks using OpenCV."); + if (argc == 1 || parser.has("help")) + { + parser.printMessage(); + return 0; + } + + float scale = parser.get("scale"); + Scalar mean = parser.get("mean"); + bool swapRB = parser.get("rgb"); + CV_Assert(parser.has("width"), parser.has("height")); + int inpWidth = parser.get("width"); + int inpHeight = parser.get("height"); + String model = parser.get("model"); + String config = parser.get("config"); + String framework = parser.get("framework"); + int backendId = parser.get("backend"); + int targetId = parser.get("target"); + + // Open file with classes names. + if (parser.has("classes")) + { + std::string file = parser.get("classes"); + std::ifstream ifs(file.c_str()); + if (!ifs.is_open()) + CV_Error(Error::StsError, "File " + file + " not found"); + std::string line; + while (std::getline(ifs, line)) + { + classes.push_back(line); + } + } + + // Open file with colors. + if (parser.has("colors")) + { + std::string file = parser.get("colors"); + std::ifstream ifs(file.c_str()); + if (!ifs.is_open()) + CV_Error(Error::StsError, "File " + file + " not found"); + std::string line; + while (std::getline(ifs, line)) + { + std::istringstream colorStr(line.c_str()); + + Vec3b color; + for (int i = 0; i < 3 && !colorStr.eof(); ++i) + colorStr >> color[i]; + colors.push_back(color); + } + } + + CV_Assert(parser.has("model")); + //! [Read and initialize network] + Net net = readNet(model, config, framework); + net.setPreferableBackend(backendId); + net.setPreferableTarget(targetId); + //! [Read and initialize network] + + // Create a window + static const std::string kWinName = "Deep learning semantic segmentation in OpenCV"; + namedWindow(kWinName, WINDOW_NORMAL); + + //! [Open a video file or an image file or a camera stream] + VideoCapture cap; + if (parser.has("input")) + cap.open(parser.get("input")); + else + cap.open(0); + //! [Open a video file or an image file or a camera stream] + + // Process frames. + Mat frame, blob; + while (waitKey(1) < 0) + { + cap >> frame; + if (frame.empty()) + { + waitKey(); + break; + } + + //! [Create a 4D blob from a frame] + blobFromImage(frame, blob, scale, Size(inpWidth, inpHeight), mean, swapRB, false); + //! [Create a 4D blob from a frame] + + //! [Set input blob] + net.setInput(blob); + //! [Set input blob] + //! [Make forward pass] + Mat score = net.forward(); + //! [Make forward pass] + + Mat segm; + colorizeSegmentation(score, segm); + + resize(segm, segm, frame.size(), 0, 0, INTER_NEAREST); + addWeighted(frame, 0.1, segm, 0.9, 0.0, frame); + + // Put efficiency information. + std::vector layersTimes; + double freq = getTickFrequency() / 1000; + double t = net.getPerfProfile(layersTimes) / freq; + std::string label = format("Inference time: %.2f ms", t); + putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0)); + + imshow(kWinName, frame); + if (!classes.empty()) + showLegend(); + } + return 0; +} + +void colorizeSegmentation(const Mat &score, Mat &segm) +{ + const int rows = score.size[2]; + const int cols = score.size[3]; + const int chns = score.size[1]; + + if (colors.empty()) + { + // Generate colors. + colors.push_back(Vec3b()); + for (int i = 1; i < chns; ++i) + { + Vec3b color; + for (int j = 0; j < 3; ++j) + color[j] = (colors[i - 1][j] + rand() % 256) / 2; + colors.push_back(color); + } + } + else if (chns != (int)colors.size()) + { + CV_Error(Error::StsError, format("Number of output classes does not match " + "number of colors (%d != %d)", chns, colors.size())); + } + + Mat maxCl = Mat::zeros(rows, cols, CV_8UC1); + Mat maxVal(rows, cols, CV_32FC1, score.data); + for (int ch = 1; ch < chns; ch++) + { + for (int row = 0; row < rows; row++) + { + const float *ptrScore = score.ptr(0, ch, row); + uint8_t *ptrMaxCl = maxCl.ptr(row); + float *ptrMaxVal = maxVal.ptr(row); + for (int col = 0; col < cols; col++) + { + if (ptrScore[col] > ptrMaxVal[col]) + { + ptrMaxVal[col] = ptrScore[col]; + ptrMaxCl[col] = (uchar)ch; + } + } + } + } + + segm.create(rows, cols, CV_8UC3); + for (int row = 0; row < rows; row++) + { + const uchar *ptrMaxCl = maxCl.ptr(row); + Vec3b *ptrSegm = segm.ptr(row); + for (int col = 0; col < cols; col++) + { + ptrSegm[col] = colors[ptrMaxCl[col]]; + } + } +} + +void showLegend() +{ + static const int kBlockHeight = 30; + static Mat legend; + if (legend.empty()) + { + const int numClasses = (int)classes.size(); + if ((int)colors.size() != numClasses) + { + CV_Error(Error::StsError, format("Number of output classes does not match " + "number of labels (%d != %d)", colors.size(), classes.size())); + } + legend.create(kBlockHeight * numClasses, 200, CV_8UC3); + for (int i = 0; i < numClasses; i++) + { + Mat block = legend.rowRange(i * kBlockHeight, (i + 1) * kBlockHeight); + block.setTo(colors[i]); + putText(block, classes[i], Point(0, kBlockHeight / 2), FONT_HERSHEY_SIMPLEX, 0.5, Vec3b(255, 255, 255)); + } + namedWindow("Legend", WINDOW_NORMAL); + imshow("Legend", legend); + } +} diff --git a/samples/dnn/segmentation.py b/samples/dnn/segmentation.py new file mode 100644 index 0000000000..1a3c5b4553 --- /dev/null +++ b/samples/dnn/segmentation.py @@ -0,0 +1,125 @@ +import cv2 as cv +import argparse +import numpy as np +import sys + +backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE) +targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL) + +parser = argparse.ArgumentParser(description='Use this script to run semantic segmentation deep learning networks using OpenCV.') +parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.') +parser.add_argument('--model', required=True, + help='Path to a binary file of model contains trained weights. ' + 'It could be a file with extensions .caffemodel (Caffe), ' + '.pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet)') +parser.add_argument('--config', + help='Path to a text file of model contains network configuration. ' + 'It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet)') +parser.add_argument('--framework', choices=['caffe', 'tensorflow', 'torch', 'darknet'], + help='Optional name of an origin framework of the model. ' + 'Detect it automatically if it does not set.') +parser.add_argument('--classes', help='Optional path to a text file with names of classes.') +parser.add_argument('--colors', help='Optional path to a text file with colors for an every class. ' + 'An every color is represented with three values from 0 to 255 in BGR channels order.') +parser.add_argument('--mean', nargs='+', type=float, default=[0, 0, 0], + help='Preprocess input image by subtracting mean values. ' + 'Mean values should be in BGR order.') +parser.add_argument('--scale', type=float, default=1.0, + help='Preprocess input image by multiplying on a scale factor.') +parser.add_argument('--width', type=int, required=True, + help='Preprocess input image by resizing to a specific width.') +parser.add_argument('--height', type=int, required=True, + help='Preprocess input image by resizing to a specific height.') +parser.add_argument('--rgb', action='store_true', + help='Indicate that model works with RGB input images instead BGR ones.') +parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, + help="Choose one of computation backends: " + "%d: default C++ backend, " + "%d: Halide language (http://halide-lang.org/), " + "%d: Intel's Deep Learning Inference Engine (https://software.seek.intel.com/deep-learning-deployment)" % backends) +parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int, + help='Choose one of target computation devices: ' + '%d: CPU target (by default), ' + '%d: OpenCL' % targets) +args = parser.parse_args() + +np.random.seed(324) + +# Load names of classes +classes = None +if args.classes: + with open(args.classes, 'rt') as f: + classes = f.read().rstrip('\n').split('\n') + +# Load colors +colors = None +if args.colors: + with open(args.colors, 'rt') as f: + colors = [np.array(color.split(' '), np.uint8) for color in f.read().rstrip('\n').split('\n')] + +legend = None +def showLegend(classes): + global legend + if not classes is None and legend is None: + blockHeight = 30 + assert(len(classes) == len(colors)) + + legend = np.zeros((blockHeight * len(colors), 200, 3), np.uint8) + for i in range(len(classes)): + block = legend[i * blockHeight:(i + 1) * blockHeight] + block[:,:] = colors[i] + cv.putText(block, classes[i], (0, blockHeight/2), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255)) + + cv.namedWindow('Legend', cv.WINDOW_NORMAL) + cv.imshow('Legend', legend) + classes = None + +# Load a network +net = cv.dnn.readNet(args.model, args.config, args.framework) +net.setPreferableBackend(args.backend) +net.setPreferableTarget(args.target) + +winName = 'Deep learning image classification in OpenCV' +cv.namedWindow(winName, cv.WINDOW_NORMAL) + +cap = cv.VideoCapture(args.input if args.input else 0) +legend = None +while cv.waitKey(1) < 0: + hasFrame, frame = cap.read() + if not hasFrame: + cv.waitKey() + break + + # Create a 4D blob from a frame. + blob = cv.dnn.blobFromImage(frame, args.scale, (args.width, args.height), args.mean, args.rgb, crop=False) + + # Run a model + net.setInput(blob) + score = net.forward() + + numClasses = score.shape[1] + height = score.shape[2] + width = score.shape[3] + + # Draw segmentation + if not colors: + # Generate colors + colors = [np.array([0, 0, 0], np.uint8)] + for i in range(1, numClasses): + colors.append((colors[i - 1] + np.random.randint(0, 256, [3], np.uint8)) / 2) + + classIds = np.argmax(score[0], axis=0) + segm = np.stack([colors[idx] for idx in classIds.flatten()]) + segm = segm.reshape(height, width, 3) + + segm = cv.resize(segm, (frame.shape[1], frame.shape[0]), interpolation=cv.INTER_NEAREST) + frame = (0.1 * frame + 0.9 * segm).astype(np.uint8) + + # Put efficiency information. + t, _ = net.getPerfProfile() + label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency()) + cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0)) + + showLegend(classes) + + cv.imshow(winName, frame) diff --git a/samples/dnn/torch_enet.cpp b/samples/dnn/torch_enet.cpp deleted file mode 100644 index 37a5db8da4..0000000000 --- a/samples/dnn/torch_enet.cpp +++ /dev/null @@ -1,175 +0,0 @@ -/* -Sample of using OpenCV dnn module with Torch ENet model. -*/ - -#include -#include -#include -using namespace cv; -using namespace cv::dnn; - -#include -#include -#include -#include -using namespace std; - -const String keys = - "{help h || Sample app for loading ENet Torch model. " - "The model and class names list can be downloaded here: " - "https://www.dropbox.com/sh/dywzk3gyb12hpe5/AAD5YkUa8XgMpHs2gCRgmCVCa }" - "{model m || path to Torch .net model file (model_best.net) }" - "{image i || path to image file }" - "{result r || path to save output blob (optional, binary format, NCHW order) }" - "{show s || whether to show all output channels or not}" - "{o_blob || output blob's name. If empty, last blob's name in net is used}"; - -static const int kNumClasses = 20; - -static const String classes[] = { - "Background", "Road", "Sidewalk", "Building", "Wall", "Fence", "Pole", - "TrafficLight", "TrafficSign", "Vegetation", "Terrain", "Sky", "Person", - "Rider", "Car", "Truck", "Bus", "Train", "Motorcycle", "Bicycle" -}; - -static const Vec3b colors[] = { - Vec3b(0, 0, 0), Vec3b(244, 126, 205), Vec3b(254, 83, 132), Vec3b(192, 200, 189), - Vec3b(50, 56, 251), Vec3b(65, 199, 228), Vec3b(240, 178, 193), Vec3b(201, 67, 188), - Vec3b(85, 32, 33), Vec3b(116, 25, 18), Vec3b(162, 33, 72), Vec3b(101, 150, 210), - Vec3b(237, 19, 16), Vec3b(149, 197, 72), Vec3b(80, 182, 21), Vec3b(141, 5, 207), - Vec3b(189, 156, 39), Vec3b(235, 170, 186), Vec3b(133, 109, 144), Vec3b(231, 160, 96) -}; - -static void showLegend(); - -static void colorizeSegmentation(const Mat &score, Mat &segm); - -int main(int argc, char **argv) -{ - CommandLineParser parser(argc, argv, keys); - - if (parser.has("help") || argc == 1) - { - parser.printMessage(); - return 0; - } - - String modelFile = parser.get("model"); - String imageFile = parser.get("image"); - - if (!parser.check()) - { - parser.printErrors(); - return 0; - } - - String resultFile = parser.get("result"); - - //! [Read model and initialize network] - dnn::Net net = dnn::readNetFromTorch(modelFile); - - //! [Prepare blob] - Mat img = imread(imageFile), input; - if (img.empty()) - { - std::cerr << "Can't read image from the file: " << imageFile << std::endl; - exit(-1); - } - - Mat inputBlob = blobFromImage(img, 1./255, Size(1024, 512), Scalar(), true, false); //Convert Mat to batch of images - //! [Prepare blob] - - //! [Set input blob] - net.setInput(inputBlob); //set the network input - //! [Set input blob] - - TickMeter tm; - - String oBlob = net.getLayerNames().back(); - if (!parser.get("o_blob").empty()) - { - oBlob = parser.get("o_blob"); - } - - //! [Make forward pass] - tm.start(); - Mat result = net.forward(oBlob); - tm.stop(); - - if (!resultFile.empty()) { - CV_Assert(result.isContinuous()); - - ofstream fout(resultFile.c_str(), ios::out | ios::binary); - fout.write((char*)result.data, result.total() * sizeof(float)); - fout.close(); - } - - std::cout << "Output blob: " << result.size[0] << " x " << result.size[1] << " x " << result.size[2] << " x " << result.size[3] << "\n"; - std::cout << "Inference time, ms: " << tm.getTimeMilli() << std::endl; - - if (parser.has("show")) - { - Mat segm, show; - colorizeSegmentation(result, segm); - showLegend(); - - cv::resize(segm, segm, img.size(), 0, 0, cv::INTER_NEAREST); - addWeighted(img, 0.1, segm, 0.9, 0.0, show); - - imshow("Result", show); - waitKey(); - } - return 0; -} //main - -static void showLegend() -{ - static const int kBlockHeight = 30; - - cv::Mat legend(kBlockHeight * kNumClasses, 200, CV_8UC3); - for(int i = 0; i < kNumClasses; i++) - { - cv::Mat block = legend.rowRange(i * kBlockHeight, (i + 1) * kBlockHeight); - block.setTo(colors[i]); - putText(block, classes[i], Point(0, kBlockHeight / 2), FONT_HERSHEY_SIMPLEX, 0.5, Vec3b(255, 255, 255)); - } - imshow("Legend", legend); -} - -static void colorizeSegmentation(const Mat &score, Mat &segm) -{ - const int rows = score.size[2]; - const int cols = score.size[3]; - const int chns = score.size[1]; - - Mat maxCl = Mat::zeros(rows, cols, CV_8UC1); - Mat maxVal(rows, cols, CV_32FC1, score.data); - for (int ch = 1; ch < chns; ch++) - { - for (int row = 0; row < rows; row++) - { - const float *ptrScore = score.ptr(0, ch, row); - uint8_t *ptrMaxCl = maxCl.ptr(row); - float *ptrMaxVal = maxVal.ptr(row); - for (int col = 0; col < cols; col++) - { - if (ptrScore[col] > ptrMaxVal[col]) - { - ptrMaxVal[col] = ptrScore[col]; - ptrMaxCl[col] = (uchar)ch; - } - } - } - } - - segm.create(rows, cols, CV_8UC3); - for (int row = 0; row < rows; row++) - { - const uchar *ptrMaxCl = maxCl.ptr(row); - Vec3b *ptrSegm = segm.ptr(row); - for (int col = 0; col < cols; col++) - { - ptrSegm[col] = colors[ptrMaxCl[col]]; - } - } -}