mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00
Set output layers names and types for models in DLDT's intermediate representation
This commit is contained in:
parent
e4b51fa8ad
commit
346871e27f
@ -1993,11 +1993,17 @@ Net Net::readFromModelOptimizer(const String& xml, const String& bin)
|
||||
backendNode->net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet(ieNet));
|
||||
for (auto& it : ieNet.getOutputsInfo())
|
||||
{
|
||||
Ptr<Layer> cvLayer(new InfEngineBackendLayer(it.second));
|
||||
InferenceEngine::CNNLayerPtr ieLayer = ieNet.getLayerByName(it.first.c_str());
|
||||
CV_Assert(ieLayer);
|
||||
|
||||
LayerParams lp;
|
||||
int lid = cvNet.addLayer(it.first, "", lp);
|
||||
|
||||
LayerData& ld = cvNet.impl->layers[lid];
|
||||
ld.layerInstance = Ptr<Layer>(new InfEngineBackendLayer(it.second));
|
||||
cvLayer->name = it.first;
|
||||
cvLayer->type = ieLayer->type;
|
||||
ld.layerInstance = cvLayer;
|
||||
ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE] = backendNode;
|
||||
|
||||
for (int i = 0; i < inputsNames.size(); ++i)
|
||||
|
@ -925,6 +925,10 @@ TEST(Layer_Test_Convolution_DLDT, Accuracy)
|
||||
Mat out = net.forward();
|
||||
|
||||
normAssert(outDefault, out);
|
||||
|
||||
std::vector<int> outLayers = net.getUnconnectedOutLayers();
|
||||
ASSERT_EQ(net.getLayer(outLayers[0])->name, "output_merge");
|
||||
ASSERT_EQ(net.getLayer(outLayers[0])->type, "Concat");
|
||||
}
|
||||
|
||||
// 1. Create a .prototxt file with the following network:
|
||||
|
@ -22,6 +22,7 @@ const char* keys =
|
||||
"{ height | -1 | Preprocess input image by resizing to a specific height. }"
|
||||
"{ rgb | | Indicate that model works with RGB input images instead BGR ones. }"
|
||||
"{ thr | .5 | Confidence threshold. }"
|
||||
"{ thr | .4 | Non-maximum suppression threshold. }"
|
||||
"{ backend | 0 | Choose one of computation backends: "
|
||||
"0: automatically (by default), "
|
||||
"1: Halide language (http://halide-lang.org/), "
|
||||
@ -37,7 +38,7 @@ const char* keys =
|
||||
using namespace cv;
|
||||
using namespace dnn;
|
||||
|
||||
float confThreshold;
|
||||
float confThreshold, nmsThreshold;
|
||||
std::vector<std::string> classes;
|
||||
|
||||
void postprocess(Mat& frame, const std::vector<Mat>& out, Net& net);
|
||||
@ -59,6 +60,7 @@ int main(int argc, char** argv)
|
||||
}
|
||||
|
||||
confThreshold = parser.get<float>("thr");
|
||||
nmsThreshold = parser.get<float>("nms");
|
||||
float scale = parser.get<float>("scale");
|
||||
Scalar mean = parser.get<Scalar>("mean");
|
||||
bool swapRB = parser.get<bool>("rgb");
|
||||
@ -144,6 +146,9 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
|
||||
static std::vector<int> outLayers = net.getUnconnectedOutLayers();
|
||||
static std::string outLayerType = net.getLayer(outLayers[0])->type;
|
||||
|
||||
std::vector<int> classIds;
|
||||
std::vector<float> confidences;
|
||||
std::vector<Rect> boxes;
|
||||
if (net.getLayer(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN
|
||||
{
|
||||
// Network produces output blob with a shape 1x1xNx7 where N is a number of
|
||||
@ -160,8 +165,11 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
|
||||
int top = (int)data[i + 4];
|
||||
int right = (int)data[i + 5];
|
||||
int bottom = (int)data[i + 6];
|
||||
int classId = (int)(data[i + 1]) - 1; // Skip 0th background class id.
|
||||
drawPred(classId, confidence, left, top, right, bottom, frame);
|
||||
int width = right - left + 1;
|
||||
int height = bottom - top + 1;
|
||||
classIds.push_back((int)(data[i + 1]) - 1); // Skip 0th background class id.
|
||||
boxes.push_back(Rect(left, top, width, height));
|
||||
confidences.push_back(confidence);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -181,16 +189,16 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
|
||||
int top = (int)(data[i + 4] * frame.rows);
|
||||
int right = (int)(data[i + 5] * frame.cols);
|
||||
int bottom = (int)(data[i + 6] * frame.rows);
|
||||
int classId = (int)(data[i + 1]) - 1; // Skip 0th background class id.
|
||||
drawPred(classId, confidence, left, top, right, bottom, frame);
|
||||
int width = right - left + 1;
|
||||
int height = bottom - top + 1;
|
||||
classIds.push_back((int)(data[i + 1]) - 1); // Skip 0th background class id.
|
||||
boxes.push_back(Rect(left, top, width, height));
|
||||
confidences.push_back(confidence);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (outLayerType == "Region")
|
||||
{
|
||||
std::vector<int> classIds;
|
||||
std::vector<float> confidences;
|
||||
std::vector<Rect> boxes;
|
||||
for (size_t i = 0; i < outs.size(); ++i)
|
||||
{
|
||||
// Network produces output blob with a shape NxC where N is a number of
|
||||
@ -218,8 +226,12 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, "Unknown output layer type: " + outLayerType);
|
||||
|
||||
std::vector<int> indices;
|
||||
NMSBoxes(boxes, confidences, confThreshold, 0.4f, indices);
|
||||
NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
|
||||
for (size_t i = 0; i < indices.size(); ++i)
|
||||
{
|
||||
int idx = indices[i];
|
||||
@ -228,9 +240,6 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
|
||||
box.x + box.width, box.y + box.height, frame);
|
||||
}
|
||||
}
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, "Unknown output layer type: " + outLayerType);
|
||||
}
|
||||
|
||||
void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame)
|
||||
{
|
||||
|
@ -31,6 +31,7 @@ parser.add_argument('--height', type=int,
|
||||
parser.add_argument('--rgb', action='store_true',
|
||||
help='Indicate that model works with RGB input images instead BGR ones.')
|
||||
parser.add_argument('--thr', type=float, default=0.5, help='Confidence threshold')
|
||||
parser.add_argument('--nms', type=float, default=0.4, help='Non-maximum suppression threshold')
|
||||
parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int,
|
||||
help="Choose one of computation backends: "
|
||||
"%d: automatically (by default), "
|
||||
@ -57,6 +58,7 @@ net.setPreferableBackend(args.backend)
|
||||
net.setPreferableTarget(args.target)
|
||||
|
||||
confThreshold = args.thr
|
||||
nmsThreshold = args.nms
|
||||
|
||||
def getOutputsNames(net):
|
||||
layersNames = net.getLayerNames()
|
||||
@ -86,12 +88,14 @@ def postprocess(frame, outs):
|
||||
lastLayerId = net.getLayerId(layerNames[-1])
|
||||
lastLayer = net.getLayer(lastLayerId)
|
||||
|
||||
classIds = []
|
||||
confidences = []
|
||||
boxes = []
|
||||
if net.getLayer(0).outputNameToIndex('im_info') != -1: # Faster-RCNN or R-FCN
|
||||
# Network produces output blob with a shape 1x1xNx7 where N is a number of
|
||||
# detections and an every detection is a vector of values
|
||||
# [batchId, classId, confidence, left, top, right, bottom]
|
||||
assert(len(outs) == 1)
|
||||
out = outs[0]
|
||||
for out in outs:
|
||||
for detection in out[0, 0]:
|
||||
confidence = detection[2]
|
||||
if confidence > confThreshold:
|
||||
@ -99,14 +103,16 @@ def postprocess(frame, outs):
|
||||
top = int(detection[4])
|
||||
right = int(detection[5])
|
||||
bottom = int(detection[6])
|
||||
classId = int(detection[1]) - 1 # Skip background label
|
||||
drawPred(classId, confidence, left, top, right, bottom)
|
||||
width = right - left + 1
|
||||
height = bottom - top + 1
|
||||
classIds.append(int(detection[1]) - 1) # Skip background label
|
||||
confidences.append(float(confidence))
|
||||
boxes.append([left, top, width, height])
|
||||
elif lastLayer.type == 'DetectionOutput':
|
||||
# Network produces output blob with a shape 1x1xNx7 where N is a number of
|
||||
# detections and an every detection is a vector of values
|
||||
# [batchId, classId, confidence, left, top, right, bottom]
|
||||
assert(len(outs) == 1)
|
||||
out = outs[0]
|
||||
for out in outs:
|
||||
for detection in out[0, 0]:
|
||||
confidence = detection[2]
|
||||
if confidence > confThreshold:
|
||||
@ -114,8 +120,11 @@ def postprocess(frame, outs):
|
||||
top = int(detection[4] * frameHeight)
|
||||
right = int(detection[5] * frameWidth)
|
||||
bottom = int(detection[6] * frameHeight)
|
||||
classId = int(detection[1]) - 1 # Skip background label
|
||||
drawPred(classId, confidence, left, top, right, bottom)
|
||||
width = right - left + 1
|
||||
height = bottom - top + 1
|
||||
classIds.append(int(detection[1]) - 1) # Skip background label
|
||||
confidences.append(float(confidence))
|
||||
boxes.append([left, top, width, height])
|
||||
elif lastLayer.type == 'Region':
|
||||
# Network produces output blob with a shape NxC where N is a number of
|
||||
# detected objects and C is a number of classes + 4 where the first 4
|
||||
@ -138,7 +147,11 @@ def postprocess(frame, outs):
|
||||
classIds.append(classId)
|
||||
confidences.append(float(confidence))
|
||||
boxes.append([left, top, width, height])
|
||||
indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, 0.4)
|
||||
else:
|
||||
print('Unknown output layer type: ' + lastLayer.type)
|
||||
exit()
|
||||
|
||||
indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
|
||||
for i in indices:
|
||||
i = i[0]
|
||||
box = boxes[i]
|
||||
|
Loading…
Reference in New Issue
Block a user