mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00
Update a script to generate text graphs for Faster-RCNN networks from TensorFlow
This commit is contained in:
parent
e75576e1ab
commit
dc9e6d3af8
@ -1794,44 +1794,46 @@ struct Net::Impl
|
|||||||
}
|
}
|
||||||
|
|
||||||
// fuse convolution layer followed by eltwise + relu
|
// fuse convolution layer followed by eltwise + relu
|
||||||
if ( IS_DNN_OPENCL_TARGET(preferableTarget) )
|
if ( IS_DNN_OPENCL_TARGET(preferableTarget) && ld.layerInstance->type == "Convolution" )
|
||||||
{
|
{
|
||||||
Ptr<EltwiseLayer> nextEltwiseLayer;
|
Ptr<EltwiseLayer> nextEltwiseLayer;
|
||||||
if( nextData )
|
if( nextData )
|
||||||
nextEltwiseLayer = nextData->layerInstance.dynamicCast<EltwiseLayer>();
|
nextEltwiseLayer = nextData->layerInstance.dynamicCast<EltwiseLayer>();
|
||||||
|
|
||||||
if( !nextEltwiseLayer.empty() && pinsToKeep.count(lpNext) == 0 )
|
if( !nextEltwiseLayer.empty() && pinsToKeep.count(lpNext) == 0 &&
|
||||||
|
nextData->inputBlobsId.size() == 2 )
|
||||||
{
|
{
|
||||||
LayerData *eltwiseData = nextData;
|
LayerData *eltwiseData = nextData;
|
||||||
// go down from the second input and find the first non-skipped layer.
|
|
||||||
LayerData *downLayerData = &layers[eltwiseData->inputBlobsId[1].lid];
|
|
||||||
CV_Assert(downLayerData);
|
|
||||||
while (downLayerData->skip)
|
|
||||||
{
|
|
||||||
downLayerData = &layers[downLayerData->inputBlobsId[0].lid];
|
|
||||||
}
|
|
||||||
CV_Assert(downLayerData);
|
|
||||||
|
|
||||||
// second input layer is current layer.
|
// Eltwise layer has two inputs. We need to determine which
|
||||||
if ( ld.id == downLayerData->id )
|
// is a base convolution layer and which could be used as it's bias.
|
||||||
|
LayerData* biasLayerData = 0;
|
||||||
|
for (int i = 0; i < 2; ++i)
|
||||||
{
|
{
|
||||||
// go down from the first input and find the first non-skipped layer
|
LayerData *downLayerData = &layers[eltwiseData->inputBlobsId[i].lid];
|
||||||
downLayerData = &layers[eltwiseData->inputBlobsId[0].lid];
|
CV_Assert(downLayerData);
|
||||||
while (downLayerData->skip)
|
while (downLayerData->skip)
|
||||||
{
|
{
|
||||||
if ( !downLayerData->type.compare("Eltwise") )
|
if (downLayerData->inputBlobsId.size() == 1)
|
||||||
downLayerData = &layers[downLayerData->inputBlobsId[1].lid];
|
|
||||||
else
|
|
||||||
downLayerData = &layers[downLayerData->inputBlobsId[0].lid];
|
downLayerData = &layers[downLayerData->inputBlobsId[0].lid];
|
||||||
|
else
|
||||||
|
{
|
||||||
|
downLayerData = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
if (downLayerData && ld.id == downLayerData->id)
|
||||||
Ptr<ConvolutionLayer> convLayer = downLayerData->layerInstance.dynamicCast<ConvolutionLayer>();
|
{
|
||||||
|
biasLayerData = &layers[eltwiseData->inputBlobsId[1 - i].lid];
|
||||||
// first input layer is convolution layer
|
break;
|
||||||
if( !convLayer.empty() && eltwiseData->consumers.size() == 1 )
|
}
|
||||||
|
}
|
||||||
|
CV_Assert(biasLayerData);
|
||||||
|
{
|
||||||
|
if( eltwiseData->consumers.size() == 1 )
|
||||||
{
|
{
|
||||||
// fuse eltwise + activation layer
|
// fuse eltwise + activation layer
|
||||||
LayerData *firstConvLayerData = downLayerData;
|
if (biasLayerData->id < ld.id)
|
||||||
{
|
{
|
||||||
nextData = &layers[eltwiseData->consumers[0].lid];
|
nextData = &layers[eltwiseData->consumers[0].lid];
|
||||||
lpNext = LayerPin(eltwiseData->consumers[0].lid, 0);
|
lpNext = LayerPin(eltwiseData->consumers[0].lid, 0);
|
||||||
@ -1845,8 +1847,8 @@ struct Net::Impl
|
|||||||
!nextData->type.compare("Power")) &&
|
!nextData->type.compare("Power")) &&
|
||||||
currLayer->setActivation(nextActivLayer) )
|
currLayer->setActivation(nextActivLayer) )
|
||||||
{
|
{
|
||||||
CV_Assert(firstConvLayerData->outputBlobsWrappers.size() == 1 && ld.inputBlobsWrappers.size() == 1);
|
CV_Assert_N(biasLayerData->outputBlobsWrappers.size() == 1, ld.inputBlobsWrappers.size() == 1);
|
||||||
ld.inputBlobsWrappers.push_back(firstConvLayerData->outputBlobsWrappers[0]);
|
ld.inputBlobsWrappers.push_back(biasLayerData->outputBlobsWrappers[0]);
|
||||||
printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str()));
|
printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str()));
|
||||||
printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
|
printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
|
||||||
eltwiseData->skip = true;
|
eltwiseData->skip = true;
|
||||||
@ -1897,9 +1899,6 @@ struct Net::Impl
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (preferableBackend != DNN_BACKEND_OPENCV)
|
|
||||||
continue; // Go to the next layer.
|
|
||||||
|
|
||||||
// the optimization #2. if there is no layer that takes max pooling layer's computed
|
// the optimization #2. if there is no layer that takes max pooling layer's computed
|
||||||
// max indices (and only some semantical segmentation networks might need this;
|
// max indices (and only some semantical segmentation networks might need this;
|
||||||
// many others only take the maximum values), then we switch the max pooling
|
// many others only take the maximum values), then we switch the max pooling
|
||||||
|
@ -95,7 +95,6 @@ public:
|
|||||||
else if (params.has("pooled_w") || params.has("pooled_h"))
|
else if (params.has("pooled_w") || params.has("pooled_h"))
|
||||||
{
|
{
|
||||||
type = ROI;
|
type = ROI;
|
||||||
computeMaxIdx = false;
|
|
||||||
pooledSize.width = params.get<uint32_t>("pooled_w", 1);
|
pooledSize.width = params.get<uint32_t>("pooled_w", 1);
|
||||||
pooledSize.height = params.get<uint32_t>("pooled_h", 1);
|
pooledSize.height = params.get<uint32_t>("pooled_h", 1);
|
||||||
}
|
}
|
||||||
@ -141,6 +140,7 @@ public:
|
|||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
poolOp.release();
|
poolOp.release();
|
||||||
#endif
|
#endif
|
||||||
|
computeMaxIdx = type == MAX;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||||
@ -190,19 +190,14 @@ public:
|
|||||||
poolOp = Ptr<OCL4DNNPool<float> >(new OCL4DNNPool<float>(config));
|
poolOp = Ptr<OCL4DNNPool<float> >(new OCL4DNNPool<float>(config));
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t ii = 0; ii < inputs.size(); ii++)
|
CV_Assert_N(inputs.size() == 1, !outputs.empty(), !computeMaxIdx || outputs.size() == 2);
|
||||||
{
|
UMat& inpMat = inputs[0];
|
||||||
UMat& inpMat = inputs[ii];
|
UMat& outMat = outputs[0];
|
||||||
int out_index = (type == MAX) ? 2 : 1;
|
UMat maskMat = computeMaxIdx ? outputs[1] : UMat();
|
||||||
UMat& outMat = outputs[out_index * ii];
|
|
||||||
UMat maskMat = (type == MAX) ? outputs[2 * ii + 1] : UMat();
|
|
||||||
|
|
||||||
CV_Assert(inpMat.offset == 0 && outMat.offset == 0);
|
CV_Assert(inpMat.offset == 0 && outMat.offset == 0);
|
||||||
|
|
||||||
if (!poolOp->Forward(inpMat, outMat, maskMat))
|
return poolOp->Forward(inpMat, outMat, maskMat);
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -229,9 +224,12 @@ public:
|
|||||||
switch (type)
|
switch (type)
|
||||||
{
|
{
|
||||||
case MAX:
|
case MAX:
|
||||||
CV_Assert_N(inputs.size() == 1, outputs.size() == 2);
|
{
|
||||||
maxPooling(inputs[0], outputs[0], outputs[1]);
|
CV_Assert_N(inputs.size() == 1, !computeMaxIdx || outputs.size() == 2);
|
||||||
|
Mat mask = computeMaxIdx ? outputs[1] : Mat();
|
||||||
|
maxPooling(inputs[0], outputs[0], mask);
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
case AVE:
|
case AVE:
|
||||||
CV_Assert_N(inputs.size() == 1, outputs.size() == 1);
|
CV_Assert_N(inputs.size() == 1, outputs.size() == 1);
|
||||||
avePooling(inputs[0], outputs[0]);
|
avePooling(inputs[0], outputs[0]);
|
||||||
@ -912,7 +910,10 @@ public:
|
|||||||
dims[0] = inputs[1][0]; // Number of proposals;
|
dims[0] = inputs[1][0]; // Number of proposals;
|
||||||
dims[1] = psRoiOutChannels;
|
dims[1] = psRoiOutChannels;
|
||||||
}
|
}
|
||||||
outputs.assign(type == MAX ? 2 : 1, shape(dims, 4));
|
|
||||||
|
int numOutputs = requiredOutputs ? requiredOutputs : (type == MAX ? 2 : 1);
|
||||||
|
CV_Assert(numOutputs == 1 || (numOutputs == 2 && type == MAX));
|
||||||
|
outputs.assign(numOutputs, shape(dims, 4));
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -358,7 +358,7 @@ TEST_P(Test_TensorFlow_nets, Faster_RCNN)
|
|||||||
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
|
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
|
||||||
throw SkipTestException("");
|
throw SkipTestException("");
|
||||||
|
|
||||||
for (int i = 1; i < 2; ++i)
|
for (int i = 0; i < 2; ++i)
|
||||||
{
|
{
|
||||||
std::string proto = findDataFile("dnn/" + names[i] + ".pbtxt", false);
|
std::string proto = findDataFile("dnn/" + names[i] + ".pbtxt", false);
|
||||||
std::string model = findDataFile("dnn/" + names[i] + ".pb", false);
|
std::string model = findDataFile("dnn/" + names[i] + ".pb", false);
|
||||||
|
@ -32,6 +32,8 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath):
|
|||||||
width_stride = float(grid_anchor_generator['width_stride'][0])
|
width_stride = float(grid_anchor_generator['width_stride'][0])
|
||||||
height_stride = float(grid_anchor_generator['height_stride'][0])
|
height_stride = float(grid_anchor_generator['height_stride'][0])
|
||||||
features_stride = float(config['feature_extractor'][0]['first_stage_features_stride'][0])
|
features_stride = float(config['feature_extractor'][0]['first_stage_features_stride'][0])
|
||||||
|
first_stage_nms_iou_threshold = float(config['first_stage_nms_iou_threshold'][0])
|
||||||
|
first_stage_max_proposals = int(config['first_stage_max_proposals'][0])
|
||||||
|
|
||||||
print('Number of classes: %d' % num_classes)
|
print('Number of classes: %d' % num_classes)
|
||||||
print('Scales: %s' % str(scales))
|
print('Scales: %s' % str(scales))
|
||||||
@ -47,7 +49,8 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath):
|
|||||||
removeIdentity(graph_def)
|
removeIdentity(graph_def)
|
||||||
|
|
||||||
def to_remove(name, op):
|
def to_remove(name, op):
|
||||||
return name.startswith(scopesToIgnore) or not name.startswith(scopesToKeep)
|
return name.startswith(scopesToIgnore) or not name.startswith(scopesToKeep) or \
|
||||||
|
(name.startswith('CropAndResize') and op != 'CropAndResize')
|
||||||
|
|
||||||
removeUnusedNodesAndAttrs(to_remove, graph_def)
|
removeUnusedNodesAndAttrs(to_remove, graph_def)
|
||||||
|
|
||||||
@ -114,10 +117,10 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath):
|
|||||||
detectionOut.addAttr('num_classes', 2)
|
detectionOut.addAttr('num_classes', 2)
|
||||||
detectionOut.addAttr('share_location', True)
|
detectionOut.addAttr('share_location', True)
|
||||||
detectionOut.addAttr('background_label_id', 0)
|
detectionOut.addAttr('background_label_id', 0)
|
||||||
detectionOut.addAttr('nms_threshold', 0.7)
|
detectionOut.addAttr('nms_threshold', first_stage_nms_iou_threshold)
|
||||||
detectionOut.addAttr('top_k', 6000)
|
detectionOut.addAttr('top_k', 6000)
|
||||||
detectionOut.addAttr('code_type', "CENTER_SIZE")
|
detectionOut.addAttr('code_type', "CENTER_SIZE")
|
||||||
detectionOut.addAttr('keep_top_k', 100)
|
detectionOut.addAttr('keep_top_k', first_stage_max_proposals)
|
||||||
detectionOut.addAttr('clip', False)
|
detectionOut.addAttr('clip', False)
|
||||||
|
|
||||||
graph_def.node.extend([detectionOut])
|
graph_def.node.extend([detectionOut])
|
||||||
@ -147,9 +150,11 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath):
|
|||||||
'SecondStageBoxPredictor/Reshape_1/Reshape', [1, -1], graph_def)
|
'SecondStageBoxPredictor/Reshape_1/Reshape', [1, -1], graph_def)
|
||||||
|
|
||||||
# Replace Flatten subgraph onto a single node.
|
# Replace Flatten subgraph onto a single node.
|
||||||
|
cropAndResizeNodeName = ''
|
||||||
for i in reversed(range(len(graph_def.node))):
|
for i in reversed(range(len(graph_def.node))):
|
||||||
if graph_def.node[i].op == 'CropAndResize':
|
if graph_def.node[i].op == 'CropAndResize':
|
||||||
graph_def.node[i].input.insert(1, 'detection_out/clip_by_value')
|
graph_def.node[i].input.insert(1, 'detection_out/clip_by_value')
|
||||||
|
cropAndResizeNodeName = graph_def.node[i].name
|
||||||
|
|
||||||
if graph_def.node[i].name == 'SecondStageBoxPredictor/Reshape':
|
if graph_def.node[i].name == 'SecondStageBoxPredictor/Reshape':
|
||||||
addConstNode('SecondStageBoxPredictor/Reshape/shape2', [1, -1, 4], graph_def)
|
addConstNode('SecondStageBoxPredictor/Reshape/shape2', [1, -1, 4], graph_def)
|
||||||
@ -159,11 +164,15 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath):
|
|||||||
|
|
||||||
if graph_def.node[i].name in ['SecondStageBoxPredictor/Flatten/flatten/Shape',
|
if graph_def.node[i].name in ['SecondStageBoxPredictor/Flatten/flatten/Shape',
|
||||||
'SecondStageBoxPredictor/Flatten/flatten/strided_slice',
|
'SecondStageBoxPredictor/Flatten/flatten/strided_slice',
|
||||||
'SecondStageBoxPredictor/Flatten/flatten/Reshape/shape']:
|
'SecondStageBoxPredictor/Flatten/flatten/Reshape/shape',
|
||||||
|
'SecondStageBoxPredictor/Flatten_1/flatten/Shape',
|
||||||
|
'SecondStageBoxPredictor/Flatten_1/flatten/strided_slice',
|
||||||
|
'SecondStageBoxPredictor/Flatten_1/flatten/Reshape/shape']:
|
||||||
del graph_def.node[i]
|
del graph_def.node[i]
|
||||||
|
|
||||||
for node in graph_def.node:
|
for node in graph_def.node:
|
||||||
if node.name == 'SecondStageBoxPredictor/Flatten/flatten/Reshape':
|
if node.name == 'SecondStageBoxPredictor/Flatten/flatten/Reshape' or \
|
||||||
|
node.name == 'SecondStageBoxPredictor/Flatten_1/flatten/Reshape':
|
||||||
node.op = 'Flatten'
|
node.op = 'Flatten'
|
||||||
node.input.pop()
|
node.input.pop()
|
||||||
|
|
||||||
@ -171,6 +180,11 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath):
|
|||||||
'SecondStageBoxPredictor/BoxEncodingPredictor/MatMul']:
|
'SecondStageBoxPredictor/BoxEncodingPredictor/MatMul']:
|
||||||
node.addAttr('loc_pred_transposed', True)
|
node.addAttr('loc_pred_transposed', True)
|
||||||
|
|
||||||
|
if node.name.startswith('MaxPool2D'):
|
||||||
|
assert(node.op == 'MaxPool')
|
||||||
|
assert(cropAndResizeNodeName)
|
||||||
|
node.input = [cropAndResizeNodeName]
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
### Postprocessing
|
### Postprocessing
|
||||||
################################################################################
|
################################################################################
|
||||||
|
Loading…
Reference in New Issue
Block a user