Merge pull request #13486 from allnes:dnn_rework

2024-11-29 13:47:32 +08:00 · 2018-12-25 14:27:45 +00:00 · 2018-12-25 14:27:45 +00:00 · 148aee31e4
commit 148aee31e4
parent d9089741bc a208c3bf04
2 changed files with 53 additions and 42 deletions
--- a/samples/dnn/openpose.cpp
+++ b/samples/dnn/openpose.cpp
@ -57,21 +57,26 @@ const int POSE_PAIRS[3][20][2] = {
 int main(int argc, char **argv)
 {
    CommandLineParser parser(argc, argv,
-        "{ h help           | false | print this help message }"
-        "{ p proto          |       | (required) model configuration, e.g. hand/pose.prototxt }"
-        "{ m model          |       | (required) model weights, e.g. hand/pose_iter_102000.caffemodel }"
-        "{ i image          |       | (required) path to image file (containing a single person, or hand) }"
-        "{ width            |  368  | Preprocess input image by resizing to a specific width. }"
-        "{ height           |  368  | Preprocess input image by resizing to a specific height. }"
-        "{ t threshold      |  0.1  | threshold or confidence value for the heatmap }"
+        "{ h help           | false     | print this help message }"
+        "{ p proto          |           | (required) model configuration, e.g. hand/pose.prototxt }"
+        "{ m model          |           | (required) model weights, e.g. hand/pose_iter_102000.caffemodel }"
+        "{ i image          |           | (required) path to image file (containing a single person, or hand) }"
+        "{ d dataset        |           | specify what kind of model was trained. It could be (COCO, MPI, HAND) depends on dataset. }"
+        "{ width            |  368      | Preprocess input image by resizing to a specific width. }"
+        "{ height           |  368      | Preprocess input image by resizing to a specific height. }"
+        "{ t threshold      |  0.1      | threshold or confidence value for the heatmap }"
+        "{ s scale          |  0.003922 | scale for blob }"
    );

    String modelTxt = samples::findFile(parser.get<string>("proto"));
    String modelBin = samples::findFile(parser.get<string>("model"));
    String imageFile = samples::findFile(parser.get<String>("image"));
+    String dataset = parser.get<String>("dataset");
    int W_in = parser.get<int>("width");
    int H_in = parser.get<int>("height");
    float thresh = parser.get<float>("threshold");
+    float scale  = parser.get<float>("scale");
+
    if (parser.get<bool>("help") || modelTxt.empty() || modelBin.empty() || imageFile.empty())
    {
        cout << "A sample app to demonstrate human or hand pose detection with a pretrained OpenPose dnn." << endl;
@ -79,9 +84,18 @@ int main(int argc, char **argv)
        return 0;
    }

-    // read the network model
-    Net net = readNetFromCaffe(modelTxt, modelBin);
+    int midx, npairs, nparts;
+         if (!dataset.compare("COCO")) {  midx = 0; npairs = 17; nparts = 18; }
+    else if (!dataset.compare("MPI"))  {  midx = 1; npairs = 14; nparts = 16; }
+    else if (!dataset.compare("HAND")) {  midx = 2; npairs = 20; nparts = 22; }
+    else
+    {
+        std::cerr << "Can't interpret dataset parameter: " << dataset << std::endl;
+        exit(-1);
+    }

+    // read the network model
+    Net net = readNet(modelBin, modelTxt);
    // and the image
    Mat img = imread(imageFile);
    if (img.empty())
@ -91,39 +105,14 @@ int main(int argc, char **argv)
    }

    // send it through the network
-    Mat inputBlob = blobFromImage(img, 1.0 / 255, Size(W_in, H_in), Scalar(0, 0, 0), false, false);
+    Mat inputBlob = blobFromImage(img, scale, Size(W_in, H_in), Scalar(0, 0, 0), false, false);
    net.setInput(inputBlob);
    Mat result = net.forward();
    // the result is an array of "heatmaps", the probability of a body part being in location x,y

-    int midx, npairs;
-    int nparts = result.size[1];
    int H = result.size[2];
    int W = result.size[3];

-    // find out, which model we have
-    if (nparts == 19)
-    {   // COCO body
-        midx   = 0;
-        npairs = 17;
-        nparts = 18; // skip background
-    }
-    else if (nparts == 16)
-    {   // MPI body
-        midx   = 1;
-        npairs = 14;
-    }
-    else if (nparts == 22)
-    {   // hand
-        midx   = 2;
-        npairs = 20;
-    }
-    else
-    {
-        cerr << "there should be 19 parts for the COCO model, 16 for MPI, or 22 for the hand one, but this model has " << nparts << " parts." << endl;
-        return (0);
-    }
-
    // find the position of the body parts
    vector<Point> points(22);
    for (int n=0; n<nparts; n++)
--- a/samples/dnn/openpose.py
+++ b/samples/dnn/openpose.py
@ -1,5 +1,5 @@
 # To use Inference Engine backend, specify location of plugins:
-# export LD_LIBRARY_PATH=/opt/intel/deeplearning_deploymenttoolkit/deployment_tools/external/mklml_lnx/lib:$LD_LIBRARY_PATH
+# source /opt/intel/computer_vision_sdk/bin/setupvars.sh
 import cv2 as cv
 import numpy as np
 import argparse
@ -12,10 +12,11 @@ parser.add_argument('--input', help='Path to image or video. Skip to capture fra
 parser.add_argument('--proto', help='Path to .prototxt')
 parser.add_argument('--model', help='Path to .caffemodel')
 parser.add_argument('--dataset', help='Specify what kind of model was trained. '
-                                      'It could be (COCO, MPI) depends on dataset.')
+                                      'It could be (COCO, MPI, HAND) depends on dataset.')
 parser.add_argument('--thr', default=0.1, type=float, help='Threshold value for pose parts heat map')
 parser.add_argument('--width', default=368, type=int, help='Resize input to specific width.')
 parser.add_argument('--height', default=368, type=int, help='Resize input to specific height.')
+parser.add_argument('--scale', default=0.003922, type=float, help='Scale for blob.')

 args = parser.parse_args()

@ -30,8 +31,7 @@ if args.dataset == 'COCO':
                   ["Neck", "RHip"], ["RHip", "RKnee"], ["RKnee", "RAnkle"], ["Neck", "LHip"],
                   ["LHip", "LKnee"], ["LKnee", "LAnkle"], ["Neck", "Nose"], ["Nose", "REye"],
                   ["REye", "REar"], ["Nose", "LEye"], ["LEye", "LEar"] ]
-else:
-    assert(args.dataset == 'MPI')
+elif args.dataset == 'MPI':
    BODY_PARTS = { "Head": 0, "Neck": 1, "RShoulder": 2, "RElbow": 3, "RWrist": 4,
                   "LShoulder": 5, "LElbow": 6, "LWrist": 7, "RHip": 8, "RKnee": 9,
                   "RAnkle": 10, "LHip": 11, "LKnee": 12, "LAnkle": 13, "Chest": 14,
@ -41,11 +41,33 @@ else:
                   ["RElbow", "RWrist"], ["Neck", "LShoulder"], ["LShoulder", "LElbow"],
                   ["LElbow", "LWrist"], ["Neck", "Chest"], ["Chest", "RHip"], ["RHip", "RKnee"],
                   ["RKnee", "RAnkle"], ["Chest", "LHip"], ["LHip", "LKnee"], ["LKnee", "LAnkle"] ]
+else:
+    assert(args.dataset == 'HAND')
+    BODY_PARTS = { "Wrist": 0,
+                   "ThumbMetacarpal": 1, "ThumbProximal": 2, "ThumbMiddle": 3, "ThumbDistal": 4,
+                   "IndexFingerMetacarpal": 5, "IndexFingerProximal": 6, "IndexFingerMiddle": 7, "IndexFingerDistal": 8,
+                   "MiddleFingerMetacarpal": 9, "MiddleFingerProximal": 10, "MiddleFingerMiddle": 11, "MiddleFingerDistal": 12,
+                   "RingFingerMetacarpal": 13, "RingFingerProximal": 14, "RingFingerMiddle": 15, "RingFingerDistal": 16,
+                   "LittleFingerMetacarpal": 17, "LittleFingerProximal": 18, "LittleFingerMiddle": 19, "LittleFingerDistal": 20,
+                 }
+
+    POSE_PAIRS = [ ["Wrist", "ThumbMetacarpal"], ["ThumbMetacarpal", "ThumbProximal"],
+                   ["ThumbProximal", "ThumbMiddle"], ["ThumbMiddle", "ThumbDistal"],
+                   ["Wrist", "IndexFingerMetacarpal"], ["IndexFingerMetacarpal", "IndexFingerProximal"],
+                   ["IndexFingerProximal", "IndexFingerMiddle"], ["IndexFingerMiddle", "IndexFingerDistal"],
+                   ["Wrist", "MiddleFingerMetacarpal"], ["MiddleFingerMetacarpal", "MiddleFingerProximal"],
+                   ["MiddleFingerProximal", "MiddleFingerMiddle"], ["MiddleFingerMiddle", "MiddleFingerDistal"],
+                   ["Wrist", "RingFingerMetacarpal"], ["RingFingerMetacarpal", "RingFingerProximal"],
+                   ["RingFingerProximal", "RingFingerMiddle"], ["RingFingerMiddle", "RingFingerDistal"],
+                   ["Wrist", "LittleFingerMetacarpal"], ["LittleFingerMetacarpal", "LittleFingerProximal"],
+                   ["LittleFingerProximal", "LittleFingerMiddle"], ["LittleFingerMiddle", "LittleFingerDistal"] ]
+

 inWidth = args.width
 inHeight = args.height
+inScale = args.scale

-net = cv.dnn.readNetFromCaffe(cv.samples.findFile(args.proto), cv.samples.findFile(args.model))
+net = cv.dnn.readNet(cv.samples.findFile(args.proto), cv.samples.findFile(args.model))

 cap = cv.VideoCapture(args.input if args.input else 0)

@ -57,12 +79,12 @@ while cv.waitKey(1) < 0:

    frameWidth = frame.shape[1]
    frameHeight = frame.shape[0]
-    inp = cv.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight),
+    inp = cv.dnn.blobFromImage(frame, inScale, (inWidth, inHeight),
                              (0, 0, 0), swapRB=False, crop=False)
    net.setInput(inp)
    out = net.forward()

-    assert(len(BODY_PARTS) == out.shape[1])
+    assert(len(BODY_PARTS) <= out.shape[1])

    points = []
    for i in range(len(BODY_PARTS)):