From faa6c4e1e16ebfcf1415168acb159f2fc723bc36 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Tue, 24 Jul 2018 19:12:58 +0300 Subject: [PATCH] Faster-RCNN anf RFCN models on CPU using Intel's Inference Engine backend. Enable Torch layers tests with Intel's Inference Engine backend. --- modules/dnn/include/opencv2/dnn/dnn.hpp | 2 +- modules/dnn/src/dnn.cpp | 8 + .../dnn/src/layers/detection_output_layer.cpp | 11 +- modules/dnn/src/layers/pooling_layer.cpp | 69 +++++-- modules/dnn/src/layers/proposal_layer.cpp | 55 ++++- modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp | 3 +- modules/dnn/src/opencl/ocl4dnn_pooling.cl | 4 +- modules/dnn/src/torch/torch_importer.cpp | 10 + modules/dnn/test/test_caffe_importer.cpp | 105 ++++++---- modules/dnn/test/test_layers.cpp | 10 +- modules/dnn/test/test_torch_importer.cpp | 189 ++++++++++-------- samples/dnn/object_detection.py | 2 +- 12 files changed, 309 insertions(+), 159 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index 0809891942..c737177128 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -201,7 +201,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN * @param[out] outputs allocated output blobs, which will store results of the computation. * @param[out] internals allocated internal blobs */ - virtual void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) = 0; + virtual void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals); /** @brief Given the @p input blobs, computes the output @p blobs. * @param[in] inputs the input blobs. diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 5014365fdd..202be4d2c0 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -3071,6 +3071,14 @@ std::vector Layer::finalize(const std::vector &inputs) return outputs; } +void Layer::forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) +{ + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + + Layer::forward_fallback(inputs, outputs, internals); +} + void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) { CV_TRACE_FUNCTION(); diff --git a/modules/dnn/src/layers/detection_output_layer.cpp b/modules/dnn/src/layers/detection_output_layer.cpp index f4d4d2b822..fdcaab02e3 100644 --- a/modules/dnn/src/layers/detection_output_layer.cpp +++ b/modules/dnn/src/layers/detection_output_layer.cpp @@ -196,7 +196,7 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { return backendId == DNN_BACKEND_OPENCV || - backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && !_locPredTransposed; + backendId == DNN_BACKEND_INFERENCE_ENGINE && !_locPredTransposed && _bboxesNormalized; } bool getMemoryShapes(const std::vector &inputs, @@ -411,9 +411,12 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && - OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), - forward_ocl(inputs_arr, outputs_arr, internals_arr)) + if (_bboxesNormalized) + { + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && + OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + forward_ocl(inputs_arr, outputs_arr, internals_arr)) + } Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr); } diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 775a044b44..4e0fea21d8 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -135,10 +135,17 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { - return backendId == DNN_BACKEND_OPENCV || - backendId == DNN_BACKEND_HALIDE && haveHalide() && - (type == MAX || type == AVE && !pad.width && !pad.height) || - backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && (type == MAX || type == AVE); + if (backendId == DNN_BACKEND_INFERENCE_ENGINE) + { + if (preferableTarget == DNN_TARGET_MYRIAD) + return type == MAX || type == AVE; + else + return type != STOCHASTIC; + } + else + return backendId == DNN_BACKEND_OPENCV || + backendId == DNN_BACKEND_HALIDE && haveHalide() && + (type == MAX || type == AVE && !pad.width && !pad.height); } #ifdef HAVE_OPENCL @@ -192,8 +199,11 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), - forward_ocl(inputs_arr, outputs_arr, internals_arr)) + if (type == MAX || type == AVE || type == STOCHASTIC) + { + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), + forward_ocl(inputs_arr, outputs_arr, internals_arr)) + } Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr); } @@ -238,22 +248,41 @@ public: #ifdef HAVE_INF_ENGINE InferenceEngine::LayerParams lp; lp.name = name; - lp.type = "Pooling"; lp.precision = InferenceEngine::Precision::FP32; - std::shared_ptr ieLayer(new InferenceEngine::PoolingLayer(lp)); - ieLayer->_kernel_x = kernel.width; - ieLayer->_kernel_y = kernel.height; - ieLayer->_stride_x = stride.width; - ieLayer->_stride_y = stride.height; - ieLayer->_padding_x = pad.width; - ieLayer->_padding_y = pad.height; - ieLayer->_exclude_pad = type == AVE && padMode == "SAME"; - ieLayer->params["rounding-type"] = ceilMode ? "ceil" : "floor"; - if (type == MAX) - ieLayer->_type = InferenceEngine::PoolingLayer::PoolType::MAX; - else if (type == AVE) - ieLayer->_type = InferenceEngine::PoolingLayer::PoolType::AVG; + std::shared_ptr ieLayer; + if (type == MAX || type == AVE) + { + lp.type = "Pooling"; + InferenceEngine::PoolingLayer* poolLayer = new InferenceEngine::PoolingLayer(lp); + poolLayer->_kernel_x = kernel.width; + poolLayer->_kernel_y = kernel.height; + poolLayer->_stride_x = stride.width; + poolLayer->_stride_y = stride.height; + poolLayer->_padding_x = pad.width; + poolLayer->_padding_y = pad.height; + poolLayer->_exclude_pad = type == AVE && padMode == "SAME"; + poolLayer->params["rounding-type"] = ceilMode ? "ceil" : "floor"; + poolLayer->_type = type == MAX ? InferenceEngine::PoolingLayer::PoolType::MAX : + InferenceEngine::PoolingLayer::PoolType::AVG; + ieLayer = std::shared_ptr(poolLayer); + } + else if (type == ROI) + { + lp.type = "ROIPooling"; + ieLayer = std::shared_ptr(new InferenceEngine::CNNLayer(lp)); + ieLayer->params["pooled_w"] = format("%d", pooledSize.width); + ieLayer->params["pooled_h"] = format("%d", pooledSize.height); + ieLayer->params["spatial_scale"] = format("%f", spatialScale); + } + else if (type == PSROI) + { + lp.type = "PSROIPooling"; + ieLayer = std::shared_ptr(new InferenceEngine::CNNLayer(lp)); + ieLayer->params["output_dim"] = format("%d", psRoiOutChannels); + ieLayer->params["group_size"] = format("%d", pooledSize.width); + ieLayer->params["spatial_scale"] = format("%f", spatialScale); + } else CV_Error(Error::StsNotImplemented, "Unsupported pooling type"); diff --git a/modules/dnn/src/layers/proposal_layer.cpp b/modules/dnn/src/layers/proposal_layer.cpp index 44671268a7..cdc5e2250a 100644 --- a/modules/dnn/src/layers/proposal_layer.cpp +++ b/modules/dnn/src/layers/proposal_layer.cpp @@ -6,6 +6,7 @@ // Third party copyrights are property of their respective owners. #include "../precomp.hpp" #include "layers_common.hpp" +#include "../op_inf_engine.hpp" namespace cv { namespace dnn { @@ -16,14 +17,14 @@ public: { setParamsFrom(params); - uint32_t featStride = params.get("feat_stride", 16); - uint32_t baseSize = params.get("base_size", 16); + featStride = params.get("feat_stride", 16); + baseSize = params.get("base_size", 16); // uint32_t minSize = params.get("min_size", 16); - uint32_t keepTopBeforeNMS = params.get("pre_nms_topn", 6000); + keepTopBeforeNMS = params.get("pre_nms_topn", 6000); keepTopAfterNMS = params.get("post_nms_topn", 300); - float nmsThreshold = params.get("nms_thresh", 0.7); - DictValue ratios = params.get("ratio"); - DictValue scales = params.get("scale"); + nmsThreshold = params.get("nms_thresh", 0.7); + ratios = params.get("ratio"); + scales = params.get("scale"); { LayerParams lp; @@ -83,6 +84,12 @@ public: } } + virtual bool supportBackend(int backendId) CV_OVERRIDE + { + return backendId == DNN_BACKEND_OPENCV || + backendId == DNN_BACKEND_INFERENCE_ENGINE && preferableTarget != DNN_TARGET_MYRIAD; + } + bool getMemoryShapes(const std::vector &inputs, const int requiredOutputs, std::vector &outputs, @@ -312,6 +319,38 @@ public: outputs[i].rowRange(numDets, keepTopAfterNMS).setTo(0); } + virtual Ptr initInfEngine(const std::vector >&) CV_OVERRIDE + { +#ifdef HAVE_INF_ENGINE + InferenceEngine::LayerParams lp; + lp.name = name; + lp.type = "Proposal"; + lp.precision = InferenceEngine::Precision::FP32; + std::shared_ptr ieLayer(new InferenceEngine::CNNLayer(lp)); + + ieLayer->params["base_size"] = format("%d", baseSize); + ieLayer->params["feat_stride"] = format("%d", featStride); + ieLayer->params["min_size"] = "16"; + ieLayer->params["nms_thresh"] = format("%f", nmsThreshold); + ieLayer->params["post_nms_topn"] = format("%d", keepTopAfterNMS); + ieLayer->params["pre_nms_topn"] = format("%d", keepTopBeforeNMS); + if (ratios.size()) + { + ieLayer->params["ratio"] = format("%f", ratios.get(0)); + for (int i = 1; i < ratios.size(); ++i) + ieLayer->params["ratio"] += format(",%f", ratios.get(i)); + } + if (scales.size()) + { + ieLayer->params["scale"] = format("%f", scales.get(0)); + for (int i = 1; i < scales.size(); ++i) + ieLayer->params["scale"] += format(",%f", scales.get(i)); + } + return Ptr(new InfEngineBackendNode(ieLayer)); +#endif // HAVE_INF_ENGINE + return Ptr(); + } + private: // A first half of channels are background scores. We need only a second one. static Mat getObjectScores(const Mat& m) @@ -342,8 +381,10 @@ private: Ptr deltasPermute; Ptr scoresPermute; - uint32_t keepTopAfterNMS; + uint32_t keepTopBeforeNMS, keepTopAfterNMS, featStride, baseSize; Mat fakeImageBlob; + float nmsThreshold; + DictValue ratios, scales; #ifdef HAVE_OPENCL UMat umat_fakeImageBlob; #endif diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp index 8b74248b64..77cd3a6337 100644 --- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp +++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp @@ -183,8 +183,9 @@ bool OCL4DNNPool::Forward(const UMat& bottom, ocl::Kernel oclk_sto_pool_forward( kname.c_str(), ocl::dnn::ocl4dnn_pooling_oclsrc, - format("-D KERNEL_STO_POOL=1 -D KERNEL_W=%d -D KERNEL_H=%d" + format(" -D Dtype=%s -D KERNEL_STO_POOL=1 -D KERNEL_W=%d -D KERNEL_H=%d" " -D STRIDE_W=%d -D STRIDE_H=%d", + (use_half) ? "half" : "float", kernel_w_, kernel_h_, stride_w_, stride_h_ )); diff --git a/modules/dnn/src/opencl/ocl4dnn_pooling.cl b/modules/dnn/src/opencl/ocl4dnn_pooling.cl index 501f5a5e87..77d2e5ba33 100644 --- a/modules/dnn/src/opencl/ocl4dnn_pooling.cl +++ b/modules/dnn/src/opencl/ocl4dnn_pooling.cl @@ -104,7 +104,7 @@ __kernel void #elif defined KERNEL_AVE_POOL __kernel void TEMPLATE(ave_pool_forward, Dtype)( - const int nthreads, __global const Dtype* const bottom_data, + const int nthreads, __global const Dtype* bottom_data, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, __global Dtype* top_data) @@ -150,7 +150,7 @@ __kernel void TEMPLATE(ave_pool_forward, Dtype)( #elif defined KERNEL_STO_POOL __kernel void TEMPLATE(sto_pool_forward_test,Dtype)( - const int nthreads, __global const Dtype* const bottom_data, + const int nthreads, __global const Dtype* bottom_data, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, __global Dtype* top_data) diff --git a/modules/dnn/src/torch/torch_importer.cpp b/modules/dnn/src/torch/torch_importer.cpp index 049c83f606..52bc0ce8a3 100644 --- a/modules/dnn/src/torch/torch_importer.cpp +++ b/modules/dnn/src/torch/torch_importer.cpp @@ -938,6 +938,16 @@ struct TorchImporter layerParams.set("end", DictValue::arrayInt(&ends[0], 4)); curModule->modules.push_back(newModule); } + else if (nnName == "SpatialUpSamplingNearest") + { + readTorchTable(scalarParams, tensorParams); + CV_Assert(scalarParams.has("scale_factor")); + int scale_factor = scalarParams.get("scale_factor"); + newModule->apiType = "Resize"; + layerParams.set("interpolation", "nearest"); + layerParams.set("zoom_factor", scale_factor); + curModule->modules.push_back(newModule); + } else { // Importer does not know how to map Torch's layer type to an OpenCV's one. diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp index 5365b2a435..b957b8caf4 100644 --- a/modules/dnn/test/test_caffe_importer.cpp +++ b/modules/dnn/test/test_caffe_importer.cpp @@ -51,6 +51,33 @@ static std::string _tf(TString filename) return (getOpenCVExtraDir() + "/dnn/") + filename; } +class Test_Caffe_nets : public DNNTestLayer +{ +public: + void testFaster(const std::string& proto, const std::string& model, const Mat& ref, + double scoreDiff = 0.0, double iouDiff = 0.0) + { + checkBackend(); + Net net = readNetFromCaffe(findDataFile("dnn/" + proto, false), + findDataFile("dnn/" + model, false)); + net.setPreferableBackend(backend); + net.setPreferableTarget(target); + Mat img = imread(findDataFile("dnn/dog416.png", false)); + resize(img, img, Size(800, 600)); + Mat blob = blobFromImage(img, 1.0, Size(), Scalar(102.9801, 115.9465, 122.7717), false, false); + Mat imInfo = (Mat_(1, 3) << img.rows, img.cols, 1.6f); + + net.setInput(blob, "data"); + net.setInput(imInfo, "im_info"); + // Output has shape 1x1xNx7 where N - number of detections. + // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom] + Mat out = net.forward(); + scoreDiff = scoreDiff ? scoreDiff : default_l1; + iouDiff = iouDiff ? iouDiff : default_lInf; + normAssertDetections(ref, out, ("model name: " + model).c_str(), 0.8, scoreDiff, iouDiff); + } +}; + TEST(Test_Caffe, memory_read) { const string proto = findDataFile("dnn/bvlc_googlenet.prototxt", false); @@ -344,9 +371,15 @@ TEST(Reproducibility_GoogLeNet_fp16, Accuracy) } // https://github.com/richzhang/colorization -TEST(Reproducibility_Colorization, Accuracy) +TEST_P(Test_Caffe_nets, Colorization) { - const float l1 = 3e-5; + checkBackend(); + if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) || + (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) || + (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)) + throw SkipTestException(""); + + const float l1 = 4e-4; const float lInf = 3e-3; Mat inp = blobFromNPY(_tf("colorization_inp.npy")); @@ -356,7 +389,8 @@ TEST(Reproducibility_Colorization, Accuracy) const string proto = findDataFile("dnn/colorization_deploy_v2.prototxt", false); const string model = findDataFile("dnn/colorization_release_v2.caffemodel", false); Net net = readNetFromCaffe(proto, model); - net.setPreferableBackend(DNN_BACKEND_OPENCV); + net.setPreferableBackend(backend); + net.setPreferableTarget(target); net.getLayer(net.getLayerId("class8_ab"))->blobs.push_back(kernel); net.getLayer(net.getLayerId("conv8_313_rh"))->blobs.push_back(Mat(1, 313, CV_32F, 2.606)); @@ -447,39 +481,40 @@ INSTANTIATE_TEST_CASE_P(Test_Caffe, opencv_face_detector, ) ); -TEST(Test_Caffe, FasterRCNN_and_RFCN) +TEST_P(Test_Caffe_nets, FasterRCNN_vgg16) { - std::string models[] = {"VGG16_faster_rcnn_final.caffemodel", "ZF_faster_rcnn_final.caffemodel", - "resnet50_rfcn_final.caffemodel"}; - std::string protos[] = {"faster_rcnn_vgg16.prototxt", "faster_rcnn_zf.prototxt", - "rfcn_pascal_voc_resnet50.prototxt"}; - Mat refs[] = {(Mat_(3, 7) << 0, 2, 0.949398, 99.2454, 210.141, 601.205, 462.849, - 0, 7, 0.997022, 481.841, 92.3218, 722.685, 175.953, - 0, 12, 0.993028, 133.221, 189.377, 350.994, 563.166), - (Mat_(3, 7) << 0, 2, 0.90121, 120.407, 115.83, 570.586, 528.395, - 0, 7, 0.988779, 469.849, 75.1756, 718.64, 186.762, - 0, 12, 0.967198, 138.588, 206.843, 329.766, 553.176), - (Mat_(2, 7) << 0, 7, 0.991359, 491.822, 81.1668, 702.573, 178.234, - 0, 12, 0.94786, 132.093, 223.903, 338.077, 566.16)}; - for (int i = 0; i < 3; ++i) - { - std::string proto = findDataFile("dnn/" + protos[i], false); - std::string model = findDataFile("dnn/" + models[i], false); - - Net net = readNetFromCaffe(proto, model); - net.setPreferableBackend(DNN_BACKEND_OPENCV); - Mat img = imread(findDataFile("dnn/dog416.png", false)); - resize(img, img, Size(800, 600)); - Mat blob = blobFromImage(img, 1.0, Size(), Scalar(102.9801, 115.9465, 122.7717), false, false); - Mat imInfo = (Mat_(1, 3) << img.rows, img.cols, 1.6f); - - net.setInput(blob, "data"); - net.setInput(imInfo, "im_info"); - // Output has shape 1x1xNx7 where N - number of detections. - // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom] - Mat out = net.forward(); - normAssertDetections(refs[i], out, ("model name: " + models[i]).c_str(), 0.8); - } + if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) || + (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)) + throw SkipTestException(""); + static Mat ref = (Mat_(3, 7) << 0, 2, 0.949398, 99.2454, 210.141, 601.205, 462.849, + 0, 7, 0.997022, 481.841, 92.3218, 722.685, 175.953, + 0, 12, 0.993028, 133.221, 189.377, 350.994, 563.166); + testFaster("faster_rcnn_vgg16.prototxt", "VGG16_faster_rcnn_final.caffemodel", ref); } +TEST_P(Test_Caffe_nets, FasterRCNN_zf) +{ + if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) || + (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) || + (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)) + throw SkipTestException(""); + static Mat ref = (Mat_(3, 7) << 0, 2, 0.90121, 120.407, 115.83, 570.586, 528.395, + 0, 7, 0.988779, 469.849, 75.1756, 718.64, 186.762, + 0, 12, 0.967198, 138.588, 206.843, 329.766, 553.176); + testFaster("faster_rcnn_zf.prototxt", "ZF_faster_rcnn_final.caffemodel", ref); +} + +TEST_P(Test_Caffe_nets, RFCN) +{ + if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) || + (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) || + (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)) + throw SkipTestException(""); + static Mat ref = (Mat_(2, 7) << 0, 7, 0.991359, 491.822, 81.1668, 702.573, 178.234, + 0, 12, 0.94786, 132.093, 223.903, 338.077, 566.16); + testFaster("rfcn_pascal_voc_resnet50.prototxt", "resnet50_rfcn_final.caffemodel", ref); +} + +INSTANTIATE_TEST_CASE_P(/**/, Test_Caffe_nets, dnnBackendsAndTargets()); + }} // namespace diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 3ebb4172d9..77a326417c 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -1205,14 +1205,6 @@ public: } } - void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) CV_OVERRIDE - { - CV_TRACE_FUNCTION(); - CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - - Layer::forward_fallback(inputs, outputs, internals); - } - private: int outWidth, outHeight, zoomFactor; }; @@ -1225,7 +1217,7 @@ TEST_P(Test_Caffe_layers, DISABLED_Interp) // requires patched protobuf (availa { if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) throw SkipTestException(""); - // Test a cusom layer. + // Test a custom layer. CV_DNN_REGISTER_LAYER_CLASS(Interp, CustomInterpLayer); try { diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp index 37966a1f93..c07c5b39d8 100644 --- a/modules/dnn/test/test_torch_importer.cpp +++ b/modules/dnn/test/test_torch_importer.cpp @@ -69,100 +69,119 @@ TEST(Torch_Importer, simple_read) ASSERT_FALSE(net.empty()); } -static void runTorchNet(String prefix, int targetId = DNN_TARGET_CPU, String outLayerName = "", - bool check2ndBlob = false, bool isBinary = false) +class Test_Torch_layers : public DNNTestLayer { - String suffix = (isBinary) ? ".dat" : ".txt"; - - Net net = readNetFromTorch(_tf(prefix + "_net" + suffix), isBinary); - ASSERT_FALSE(net.empty()); - - net.setPreferableBackend(DNN_BACKEND_OPENCV); - net.setPreferableTarget(targetId); - - Mat inp, outRef; - ASSERT_NO_THROW( inp = readTorchBlob(_tf(prefix + "_input" + suffix), isBinary) ); - ASSERT_NO_THROW( outRef = readTorchBlob(_tf(prefix + "_output" + suffix), isBinary) ); - - if (outLayerName.empty()) - outLayerName = net.getLayerNames().back(); - - net.setInput(inp); - std::vector outBlobs; - net.forward(outBlobs, outLayerName); - normAssert(outRef, outBlobs[0]); - - if (check2ndBlob) +public: + void runTorchNet(const String& prefix, String outLayerName = "", + bool check2ndBlob = false, bool isBinary = false, + double l1 = 0.0, double lInf = 0.0) { - Mat out2 = outBlobs[1]; - Mat ref2 = readTorchBlob(_tf(prefix + "_output_2" + suffix), isBinary); - normAssert(out2, ref2); - } -} + String suffix = (isBinary) ? ".dat" : ".txt"; -typedef testing::TestWithParam Test_Torch_layers; + Mat inp, outRef; + ASSERT_NO_THROW( inp = readTorchBlob(_tf(prefix + "_input" + suffix), isBinary) ); + ASSERT_NO_THROW( outRef = readTorchBlob(_tf(prefix + "_output" + suffix), isBinary) ); + + checkBackend(backend, target, &inp, &outRef); + + Net net = readNetFromTorch(_tf(prefix + "_net" + suffix), isBinary); + ASSERT_FALSE(net.empty()); + + net.setPreferableBackend(backend); + net.setPreferableTarget(target); + + if (outLayerName.empty()) + outLayerName = net.getLayerNames().back(); + + net.setInput(inp); + std::vector outBlobs; + net.forward(outBlobs, outLayerName); + l1 = l1 ? l1 : default_l1; + lInf = lInf ? lInf : default_lInf; + normAssert(outRef, outBlobs[0], "", l1, lInf); + + if (check2ndBlob && backend != DNN_BACKEND_INFERENCE_ENGINE) + { + Mat out2 = outBlobs[1]; + Mat ref2 = readTorchBlob(_tf(prefix + "_output_2" + suffix), isBinary); + normAssert(out2, ref2, "", l1, lInf); + } + } +}; TEST_P(Test_Torch_layers, run_convolution) { - runTorchNet("net_conv", GetParam(), "", false, true); + if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU) || + (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)) + throw SkipTestException(""); + runTorchNet("net_conv", "", false, true); } TEST_P(Test_Torch_layers, run_pool_max) { - runTorchNet("net_pool_max", GetParam(), "", true); + if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) + throw SkipTestException(""); + runTorchNet("net_pool_max", "", true); } TEST_P(Test_Torch_layers, run_pool_ave) { - runTorchNet("net_pool_ave", GetParam()); + runTorchNet("net_pool_ave"); } TEST_P(Test_Torch_layers, run_reshape) { - int targetId = GetParam(); - runTorchNet("net_reshape", targetId); - runTorchNet("net_reshape_batch", targetId); - runTorchNet("net_reshape_single_sample", targetId); - runTorchNet("net_reshape_channels", targetId, "", false, true); + runTorchNet("net_reshape"); + runTorchNet("net_reshape_batch"); + runTorchNet("net_reshape_channels", "", false, true); +} + +TEST_P(Test_Torch_layers, run_reshape_single_sample) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) + throw SkipTestException(""); + runTorchNet("net_reshape_single_sample", "", false, false, + (target == DNN_TARGET_MYRIAD || target == DNN_TARGET_OPENCL_FP16) ? 0.0052 : 0.0); } TEST_P(Test_Torch_layers, run_linear) { - runTorchNet("net_linear_2d", GetParam()); + if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) + throw SkipTestException(""); + runTorchNet("net_linear_2d"); } TEST_P(Test_Torch_layers, run_concat) { - int targetId = GetParam(); - runTorchNet("net_concat", targetId, "l5_torchMerge"); - runTorchNet("net_depth_concat", targetId, "", false, true); + runTorchNet("net_concat", "l5_torchMerge"); + runTorchNet("net_depth_concat", "", false, true, 0.0, + target == DNN_TARGET_OPENCL_FP16 ? 0.021 : 0.0); } TEST_P(Test_Torch_layers, run_deconv) { - runTorchNet("net_deconv", GetParam()); + runTorchNet("net_deconv"); } TEST_P(Test_Torch_layers, run_batch_norm) { - runTorchNet("net_batch_norm", GetParam(), "", false, true); + runTorchNet("net_batch_norm", "", false, true); } TEST_P(Test_Torch_layers, net_prelu) { - runTorchNet("net_prelu", GetParam()); + runTorchNet("net_prelu"); } TEST_P(Test_Torch_layers, net_cadd_table) { - runTorchNet("net_cadd_table", GetParam()); + runTorchNet("net_cadd_table"); } TEST_P(Test_Torch_layers, net_softmax) { - int targetId = GetParam(); - runTorchNet("net_softmax", targetId); - runTorchNet("net_softmax_spatial", targetId); + runTorchNet("net_softmax"); + runTorchNet("net_softmax_spatial"); } TEST_P(Test_Torch_layers, net_logsoftmax) @@ -173,40 +192,55 @@ TEST_P(Test_Torch_layers, net_logsoftmax) TEST_P(Test_Torch_layers, net_lp_pooling) { - int targetId = GetParam(); - runTorchNet("net_lp_pooling_square", targetId, "", false, true); - runTorchNet("net_lp_pooling_power", targetId, "", false, true); + runTorchNet("net_lp_pooling_square", "", false, true); + runTorchNet("net_lp_pooling_power", "", false, true); } TEST_P(Test_Torch_layers, net_conv_gemm_lrn) { - runTorchNet("net_conv_gemm_lrn", GetParam(), "", false, true); + if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) + throw SkipTestException(""); + runTorchNet("net_conv_gemm_lrn", "", false, true, + target == DNN_TARGET_OPENCL_FP16 ? 0.046 : 0.0, + target == DNN_TARGET_OPENCL_FP16 ? 0.023 : 0.0); } TEST_P(Test_Torch_layers, net_inception_block) { - runTorchNet("net_inception_block", GetParam(), "", false, true); + runTorchNet("net_inception_block", "", false, true); } TEST_P(Test_Torch_layers, net_normalize) { - runTorchNet("net_normalize", GetParam(), "", false, true); + runTorchNet("net_normalize", "", false, true); } TEST_P(Test_Torch_layers, net_padding) { - int targetId = GetParam(); - runTorchNet("net_padding", targetId, "", false, true); - runTorchNet("net_spatial_zero_padding", targetId, "", false, true); - runTorchNet("net_spatial_reflection_padding", targetId, "", false, true); + runTorchNet("net_padding", "", false, true); + runTorchNet("net_spatial_zero_padding", "", false, true); + runTorchNet("net_spatial_reflection_padding", "", false, true); } TEST_P(Test_Torch_layers, net_non_spatial) { - runTorchNet("net_non_spatial", GetParam(), "", false, true); + if (backend == DNN_BACKEND_INFERENCE_ENGINE && + (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + throw SkipTestException(""); + runTorchNet("net_non_spatial", "", false, true); } -INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_layers, availableDnnTargets()); +TEST_P(Test_Torch_layers, run_paralel) +{ + if (backend != DNN_BACKEND_OPENCV || target != DNN_TARGET_CPU) + throw SkipTestException(""); + runTorchNet("net_parallel", "l5_torchMerge"); +} + +TEST_P(Test_Torch_layers, net_residual) +{ + runTorchNet("net_residual", "", false, true); +} typedef testing::TestWithParam Test_Torch_nets; @@ -313,21 +347,6 @@ TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy) INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_nets, availableDnnTargets()); -// TODO: fix OpenCL and add to the rest of tests -TEST(Torch_Importer, run_paralel) -{ - runTorchNet("net_parallel", DNN_TARGET_CPU, "l5_torchMerge"); -} - -TEST(Torch_Importer, DISABLED_run_paralel) -{ - runTorchNet("net_parallel", DNN_TARGET_OPENCL, "l5_torchMerge"); -} - -TEST(Torch_Importer, net_residual) -{ - runTorchNet("net_residual", DNN_TARGET_CPU, "", false, true); -} // Test a custom layer // https://github.com/torch/nn/blob/master/doc/convolution.md#nn.SpatialUpSamplingNearest @@ -374,17 +393,29 @@ public: } } - virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE {} - private: int scale; }; -TEST(Torch_Importer, upsampling_nearest) +TEST_P(Test_Torch_layers, upsampling_nearest) { + // Test a custom layer. CV_DNN_REGISTER_LAYER_CLASS(SpatialUpSamplingNearest, SpatialUpSamplingNearestLayer); - runTorchNet("net_spatial_upsampling_nearest", DNN_TARGET_CPU, "", false, true); + try + { + runTorchNet("net_spatial_upsampling_nearest", "", false, true); + } + catch (...) + { + LayerFactory::unregisterLayer("SpatialUpSamplingNearest"); + throw; + } LayerFactory::unregisterLayer("SpatialUpSamplingNearest"); + + // Test an implemented layer. + runTorchNet("net_spatial_upsampling_nearest", "", false, true); } +INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_layers, dnnBackendsAndTargets()); + } diff --git a/samples/dnn/object_detection.py b/samples/dnn/object_detection.py index 386e02890d..329c349e49 100644 --- a/samples/dnn/object_detection.py +++ b/samples/dnn/object_detection.py @@ -190,7 +190,7 @@ while cv.waitKey(1) < 0: net.setInput(blob) if net.getLayer(0).outputNameToIndex('im_info') != -1: # Faster-RCNN or R-FCN frame = cv.resize(frame, (inpWidth, inpHeight)) - net.setInput(np.array([inpHeight, inpWidth, 1.6], dtype=np.float32), 'im_info') + net.setInput(np.array([[inpHeight, inpWidth, 1.6]], dtype=np.float32), 'im_info') outs = net.forward(getOutputsNames(net)) postprocess(frame, outs)