mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00
Set output layers names and types for models in DLDT's intermediate representation
This commit is contained in:
parent
e4b51fa8ad
commit
346871e27f
@ -1993,11 +1993,17 @@ Net Net::readFromModelOptimizer(const String& xml, const String& bin)
|
|||||||
backendNode->net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet(ieNet));
|
backendNode->net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet(ieNet));
|
||||||
for (auto& it : ieNet.getOutputsInfo())
|
for (auto& it : ieNet.getOutputsInfo())
|
||||||
{
|
{
|
||||||
|
Ptr<Layer> cvLayer(new InfEngineBackendLayer(it.second));
|
||||||
|
InferenceEngine::CNNLayerPtr ieLayer = ieNet.getLayerByName(it.first.c_str());
|
||||||
|
CV_Assert(ieLayer);
|
||||||
|
|
||||||
LayerParams lp;
|
LayerParams lp;
|
||||||
int lid = cvNet.addLayer(it.first, "", lp);
|
int lid = cvNet.addLayer(it.first, "", lp);
|
||||||
|
|
||||||
LayerData& ld = cvNet.impl->layers[lid];
|
LayerData& ld = cvNet.impl->layers[lid];
|
||||||
ld.layerInstance = Ptr<Layer>(new InfEngineBackendLayer(it.second));
|
cvLayer->name = it.first;
|
||||||
|
cvLayer->type = ieLayer->type;
|
||||||
|
ld.layerInstance = cvLayer;
|
||||||
ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE] = backendNode;
|
ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE] = backendNode;
|
||||||
|
|
||||||
for (int i = 0; i < inputsNames.size(); ++i)
|
for (int i = 0; i < inputsNames.size(); ++i)
|
||||||
|
@ -925,6 +925,10 @@ TEST(Layer_Test_Convolution_DLDT, Accuracy)
|
|||||||
Mat out = net.forward();
|
Mat out = net.forward();
|
||||||
|
|
||||||
normAssert(outDefault, out);
|
normAssert(outDefault, out);
|
||||||
|
|
||||||
|
std::vector<int> outLayers = net.getUnconnectedOutLayers();
|
||||||
|
ASSERT_EQ(net.getLayer(outLayers[0])->name, "output_merge");
|
||||||
|
ASSERT_EQ(net.getLayer(outLayers[0])->type, "Concat");
|
||||||
}
|
}
|
||||||
|
|
||||||
// 1. Create a .prototxt file with the following network:
|
// 1. Create a .prototxt file with the following network:
|
||||||
|
@ -22,6 +22,7 @@ const char* keys =
|
|||||||
"{ height | -1 | Preprocess input image by resizing to a specific height. }"
|
"{ height | -1 | Preprocess input image by resizing to a specific height. }"
|
||||||
"{ rgb | | Indicate that model works with RGB input images instead BGR ones. }"
|
"{ rgb | | Indicate that model works with RGB input images instead BGR ones. }"
|
||||||
"{ thr | .5 | Confidence threshold. }"
|
"{ thr | .5 | Confidence threshold. }"
|
||||||
|
"{ thr | .4 | Non-maximum suppression threshold. }"
|
||||||
"{ backend | 0 | Choose one of computation backends: "
|
"{ backend | 0 | Choose one of computation backends: "
|
||||||
"0: automatically (by default), "
|
"0: automatically (by default), "
|
||||||
"1: Halide language (http://halide-lang.org/), "
|
"1: Halide language (http://halide-lang.org/), "
|
||||||
@ -37,7 +38,7 @@ const char* keys =
|
|||||||
using namespace cv;
|
using namespace cv;
|
||||||
using namespace dnn;
|
using namespace dnn;
|
||||||
|
|
||||||
float confThreshold;
|
float confThreshold, nmsThreshold;
|
||||||
std::vector<std::string> classes;
|
std::vector<std::string> classes;
|
||||||
|
|
||||||
void postprocess(Mat& frame, const std::vector<Mat>& out, Net& net);
|
void postprocess(Mat& frame, const std::vector<Mat>& out, Net& net);
|
||||||
@ -59,6 +60,7 @@ int main(int argc, char** argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
confThreshold = parser.get<float>("thr");
|
confThreshold = parser.get<float>("thr");
|
||||||
|
nmsThreshold = parser.get<float>("nms");
|
||||||
float scale = parser.get<float>("scale");
|
float scale = parser.get<float>("scale");
|
||||||
Scalar mean = parser.get<Scalar>("mean");
|
Scalar mean = parser.get<Scalar>("mean");
|
||||||
bool swapRB = parser.get<bool>("rgb");
|
bool swapRB = parser.get<bool>("rgb");
|
||||||
@ -144,6 +146,9 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
|
|||||||
static std::vector<int> outLayers = net.getUnconnectedOutLayers();
|
static std::vector<int> outLayers = net.getUnconnectedOutLayers();
|
||||||
static std::string outLayerType = net.getLayer(outLayers[0])->type;
|
static std::string outLayerType = net.getLayer(outLayers[0])->type;
|
||||||
|
|
||||||
|
std::vector<int> classIds;
|
||||||
|
std::vector<float> confidences;
|
||||||
|
std::vector<Rect> boxes;
|
||||||
if (net.getLayer(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN
|
if (net.getLayer(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN
|
||||||
{
|
{
|
||||||
// Network produces output blob with a shape 1x1xNx7 where N is a number of
|
// Network produces output blob with a shape 1x1xNx7 where N is a number of
|
||||||
@ -160,8 +165,11 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
|
|||||||
int top = (int)data[i + 4];
|
int top = (int)data[i + 4];
|
||||||
int right = (int)data[i + 5];
|
int right = (int)data[i + 5];
|
||||||
int bottom = (int)data[i + 6];
|
int bottom = (int)data[i + 6];
|
||||||
int classId = (int)(data[i + 1]) - 1; // Skip 0th background class id.
|
int width = right - left + 1;
|
||||||
drawPred(classId, confidence, left, top, right, bottom, frame);
|
int height = bottom - top + 1;
|
||||||
|
classIds.push_back((int)(data[i + 1]) - 1); // Skip 0th background class id.
|
||||||
|
boxes.push_back(Rect(left, top, width, height));
|
||||||
|
confidences.push_back(confidence);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -181,16 +189,16 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
|
|||||||
int top = (int)(data[i + 4] * frame.rows);
|
int top = (int)(data[i + 4] * frame.rows);
|
||||||
int right = (int)(data[i + 5] * frame.cols);
|
int right = (int)(data[i + 5] * frame.cols);
|
||||||
int bottom = (int)(data[i + 6] * frame.rows);
|
int bottom = (int)(data[i + 6] * frame.rows);
|
||||||
int classId = (int)(data[i + 1]) - 1; // Skip 0th background class id.
|
int width = right - left + 1;
|
||||||
drawPred(classId, confidence, left, top, right, bottom, frame);
|
int height = bottom - top + 1;
|
||||||
|
classIds.push_back((int)(data[i + 1]) - 1); // Skip 0th background class id.
|
||||||
|
boxes.push_back(Rect(left, top, width, height));
|
||||||
|
confidences.push_back(confidence);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (outLayerType == "Region")
|
else if (outLayerType == "Region")
|
||||||
{
|
{
|
||||||
std::vector<int> classIds;
|
|
||||||
std::vector<float> confidences;
|
|
||||||
std::vector<Rect> boxes;
|
|
||||||
for (size_t i = 0; i < outs.size(); ++i)
|
for (size_t i = 0; i < outs.size(); ++i)
|
||||||
{
|
{
|
||||||
// Network produces output blob with a shape NxC where N is a number of
|
// Network produces output blob with a shape NxC where N is a number of
|
||||||
@ -218,18 +226,19 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::vector<int> indices;
|
|
||||||
NMSBoxes(boxes, confidences, confThreshold, 0.4f, indices);
|
|
||||||
for (size_t i = 0; i < indices.size(); ++i)
|
|
||||||
{
|
|
||||||
int idx = indices[i];
|
|
||||||
Rect box = boxes[idx];
|
|
||||||
drawPred(classIds[idx], confidences[idx], box.x, box.y,
|
|
||||||
box.x + box.width, box.y + box.height, frame);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
CV_Error(Error::StsNotImplemented, "Unknown output layer type: " + outLayerType);
|
CV_Error(Error::StsNotImplemented, "Unknown output layer type: " + outLayerType);
|
||||||
|
|
||||||
|
std::vector<int> indices;
|
||||||
|
NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
|
||||||
|
for (size_t i = 0; i < indices.size(); ++i)
|
||||||
|
{
|
||||||
|
int idx = indices[i];
|
||||||
|
Rect box = boxes[idx];
|
||||||
|
drawPred(classIds[idx], confidences[idx], box.x, box.y,
|
||||||
|
box.x + box.width, box.y + box.height, frame);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame)
|
void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame)
|
||||||
|
@ -31,6 +31,7 @@ parser.add_argument('--height', type=int,
|
|||||||
parser.add_argument('--rgb', action='store_true',
|
parser.add_argument('--rgb', action='store_true',
|
||||||
help='Indicate that model works with RGB input images instead BGR ones.')
|
help='Indicate that model works with RGB input images instead BGR ones.')
|
||||||
parser.add_argument('--thr', type=float, default=0.5, help='Confidence threshold')
|
parser.add_argument('--thr', type=float, default=0.5, help='Confidence threshold')
|
||||||
|
parser.add_argument('--nms', type=float, default=0.4, help='Non-maximum suppression threshold')
|
||||||
parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int,
|
parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int,
|
||||||
help="Choose one of computation backends: "
|
help="Choose one of computation backends: "
|
||||||
"%d: automatically (by default), "
|
"%d: automatically (by default), "
|
||||||
@ -57,6 +58,7 @@ net.setPreferableBackend(args.backend)
|
|||||||
net.setPreferableTarget(args.target)
|
net.setPreferableTarget(args.target)
|
||||||
|
|
||||||
confThreshold = args.thr
|
confThreshold = args.thr
|
||||||
|
nmsThreshold = args.nms
|
||||||
|
|
||||||
def getOutputsNames(net):
|
def getOutputsNames(net):
|
||||||
layersNames = net.getLayerNames()
|
layersNames = net.getLayerNames()
|
||||||
@ -86,36 +88,43 @@ def postprocess(frame, outs):
|
|||||||
lastLayerId = net.getLayerId(layerNames[-1])
|
lastLayerId = net.getLayerId(layerNames[-1])
|
||||||
lastLayer = net.getLayer(lastLayerId)
|
lastLayer = net.getLayer(lastLayerId)
|
||||||
|
|
||||||
|
classIds = []
|
||||||
|
confidences = []
|
||||||
|
boxes = []
|
||||||
if net.getLayer(0).outputNameToIndex('im_info') != -1: # Faster-RCNN or R-FCN
|
if net.getLayer(0).outputNameToIndex('im_info') != -1: # Faster-RCNN or R-FCN
|
||||||
# Network produces output blob with a shape 1x1xNx7 where N is a number of
|
# Network produces output blob with a shape 1x1xNx7 where N is a number of
|
||||||
# detections and an every detection is a vector of values
|
# detections and an every detection is a vector of values
|
||||||
# [batchId, classId, confidence, left, top, right, bottom]
|
# [batchId, classId, confidence, left, top, right, bottom]
|
||||||
assert(len(outs) == 1)
|
for out in outs:
|
||||||
out = outs[0]
|
for detection in out[0, 0]:
|
||||||
for detection in out[0, 0]:
|
confidence = detection[2]
|
||||||
confidence = detection[2]
|
if confidence > confThreshold:
|
||||||
if confidence > confThreshold:
|
left = int(detection[3])
|
||||||
left = int(detection[3])
|
top = int(detection[4])
|
||||||
top = int(detection[4])
|
right = int(detection[5])
|
||||||
right = int(detection[5])
|
bottom = int(detection[6])
|
||||||
bottom = int(detection[6])
|
width = right - left + 1
|
||||||
classId = int(detection[1]) - 1 # Skip background label
|
height = bottom - top + 1
|
||||||
drawPred(classId, confidence, left, top, right, bottom)
|
classIds.append(int(detection[1]) - 1) # Skip background label
|
||||||
|
confidences.append(float(confidence))
|
||||||
|
boxes.append([left, top, width, height])
|
||||||
elif lastLayer.type == 'DetectionOutput':
|
elif lastLayer.type == 'DetectionOutput':
|
||||||
# Network produces output blob with a shape 1x1xNx7 where N is a number of
|
# Network produces output blob with a shape 1x1xNx7 where N is a number of
|
||||||
# detections and an every detection is a vector of values
|
# detections and an every detection is a vector of values
|
||||||
# [batchId, classId, confidence, left, top, right, bottom]
|
# [batchId, classId, confidence, left, top, right, bottom]
|
||||||
assert(len(outs) == 1)
|
for out in outs:
|
||||||
out = outs[0]
|
for detection in out[0, 0]:
|
||||||
for detection in out[0, 0]:
|
confidence = detection[2]
|
||||||
confidence = detection[2]
|
if confidence > confThreshold:
|
||||||
if confidence > confThreshold:
|
left = int(detection[3] * frameWidth)
|
||||||
left = int(detection[3] * frameWidth)
|
top = int(detection[4] * frameHeight)
|
||||||
top = int(detection[4] * frameHeight)
|
right = int(detection[5] * frameWidth)
|
||||||
right = int(detection[5] * frameWidth)
|
bottom = int(detection[6] * frameHeight)
|
||||||
bottom = int(detection[6] * frameHeight)
|
width = right - left + 1
|
||||||
classId = int(detection[1]) - 1 # Skip background label
|
height = bottom - top + 1
|
||||||
drawPred(classId, confidence, left, top, right, bottom)
|
classIds.append(int(detection[1]) - 1) # Skip background label
|
||||||
|
confidences.append(float(confidence))
|
||||||
|
boxes.append([left, top, width, height])
|
||||||
elif lastLayer.type == 'Region':
|
elif lastLayer.type == 'Region':
|
||||||
# Network produces output blob with a shape NxC where N is a number of
|
# Network produces output blob with a shape NxC where N is a number of
|
||||||
# detected objects and C is a number of classes + 4 where the first 4
|
# detected objects and C is a number of classes + 4 where the first 4
|
||||||
@ -138,15 +147,19 @@ def postprocess(frame, outs):
|
|||||||
classIds.append(classId)
|
classIds.append(classId)
|
||||||
confidences.append(float(confidence))
|
confidences.append(float(confidence))
|
||||||
boxes.append([left, top, width, height])
|
boxes.append([left, top, width, height])
|
||||||
indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, 0.4)
|
else:
|
||||||
for i in indices:
|
print('Unknown output layer type: ' + lastLayer.type)
|
||||||
i = i[0]
|
exit()
|
||||||
box = boxes[i]
|
|
||||||
left = box[0]
|
indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
|
||||||
top = box[1]
|
for i in indices:
|
||||||
width = box[2]
|
i = i[0]
|
||||||
height = box[3]
|
box = boxes[i]
|
||||||
drawPred(classIds[i], confidences[i], left, top, left + width, top + height)
|
left = box[0]
|
||||||
|
top = box[1]
|
||||||
|
width = box[2]
|
||||||
|
height = box[3]
|
||||||
|
drawPred(classIds[i], confidences[i], left, top, left + width, top + height)
|
||||||
|
|
||||||
# Process inputs
|
# Process inputs
|
||||||
winName = 'Deep learning object detection in OpenCV'
|
winName = 'Deep learning object detection in OpenCV'
|
||||||
|
Loading…
Reference in New Issue
Block a user