diff --git a/modules/dnn/misc/python/test/test_dnn.py b/modules/dnn/misc/python/test/test_dnn.py index 5c91aae56f..a06c02ad2d 100644 --- a/modules/dnn/misc/python/test/test_dnn.py +++ b/modules/dnn/misc/python/test/test_dnn.py @@ -191,10 +191,10 @@ class dnn_test(NewOpenCVTests): def test_model(self): img_path = self.find_dnn_file("dnn/street.png") - weights = self.find_dnn_file("dnn/MobileNetSSD_deploy.caffemodel", required=False) - config = self.find_dnn_file("dnn/MobileNetSSD_deploy.prototxt", required=False) + weights = self.find_dnn_file("dnn/MobileNetSSD_deploy_19e3ec3.caffemodel", required=False) + config = self.find_dnn_file("dnn/MobileNetSSD_deploy_19e3ec3.prototxt", required=False) if weights is None or config is None: - raise unittest.SkipTest("Missing DNN test files (dnn/MobileNetSSD_deploy.{prototxt/caffemodel}). Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.") + raise unittest.SkipTest("Missing DNN test files (dnn/MobileNetSSD_deploy_19e3ec3.{prototxt/caffemodel}). Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.") frame = cv.imread(img_path) model = cv.dnn_DetectionModel(weights, config) diff --git a/modules/dnn/perf/perf_caffe.cpp b/modules/dnn/perf/perf_caffe.cpp index 370f06dba2..f1ba26afcc 100644 --- a/modules/dnn/perf/perf_caffe.cpp +++ b/modules/dnn/perf/perf_caffe.cpp @@ -101,8 +101,8 @@ PERF_TEST(SqueezeNet_v1_1_caffe, CaffePerfTest) PERF_TEST(MobileNet_SSD, CaffePerfTest) { - caffe::Net* net = initNet("dnn/MobileNetSSD_deploy.prototxt", - "dnn/MobileNetSSD_deploy.caffemodel"); + caffe::Net* net = initNet("dnn/MobileNetSSD_deploy_19e3ec3.prototxt", + "dnn/MobileNetSSD_deploy_19e3ec3.caffemodel"); TEST_CYCLE() net->Forward(); SANITY_CHECK_NOTHING(); } diff --git a/modules/dnn/perf/perf_net.cpp b/modules/dnn/perf/perf_net.cpp index cfbb45b173..7f852e8f7b 100644 --- a/modules/dnn/perf/perf_net.cpp +++ b/modules/dnn/perf/perf_net.cpp @@ -141,7 +141,7 @@ PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_Caffe) { if (backend == DNN_BACKEND_HALIDE) throw SkipTestException(""); - processNet("dnn/MobileNetSSD_deploy.caffemodel", "dnn/MobileNetSSD_deploy.prototxt", "", + processNet("dnn/MobileNetSSD_deploy_19e3ec3.caffemodel", "dnn/MobileNetSSD_deploy_19e3ec3.prototxt", "", Mat(cv::Size(300, 300), CV_32FC3)); } diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 0ed2bb7feb..0488dc462d 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -1069,7 +1069,7 @@ public: config.pads = pads; config.stride = stride; config.dilation = dilation; - if (inputs[0].dims != 4 && inputs[0].dims != umat_blobs[0].dims) + if (inputs[0].dims != 4 && inputs[0].dims != (blobs.empty() ? umat_blobs[0].dims : blobs[0].dims)) { static bool bypassCheck = utils::getConfigurationParameterBool("OPENCV_OCL4DNN_CONVOLUTION_IGNORE_INPUT_DIMS_4_CHECK", false); if (!bypassCheck) @@ -1081,7 +1081,7 @@ public: return false; } } - config.group = inputs[0].size[1] / umat_blobs[0].size[1]; + config.group = inputs[0].size[1] / (blobs.empty() ? umat_blobs[0].size[1] : blobs[0].size[1]); if (config.group < 1) // config.group == 0 causes div by zero in ocl4dnn code { CV_LOG_WARNING(NULL, "DNN/OpenCL: Unsupported config.group=" << config.group diff --git a/modules/dnn/src/opencl/gemm_buffer.cl b/modules/dnn/src/opencl/gemm_buffer.cl index b345983aee..70028b0eec 100644 --- a/modules/dnn/src/opencl/gemm_buffer.cl +++ b/modules/dnn/src/opencl/gemm_buffer.cl @@ -453,14 +453,14 @@ __kernel void TEMPLATE(gemm_buffer_NT, Dtype)( int w; for(int b_tile = 0; b_tile < K; b_tile += SLM_BLOCK) { barrier(CLK_LOCAL_MEM_FENCE); - vstore4(vload4(0, (__global float *)(src1_read0 + mad24(0, K, local_index))), 0, (__local float *)(slm_brow + mad24(0, SLM_BLOCK, local_index))); - vstore4(vload4(0, (__global float *)(src1_read0 + mad24(1, K, local_index))), 0, (__local float *)(slm_brow + mad24(1, SLM_BLOCK, local_index))); - vstore4(vload4(0, (__global float *)(src1_read0 + mad24(2, K, local_index))), 0, (__local float *)(slm_brow + mad24(2, SLM_BLOCK, local_index))); - vstore4(vload4(0, (__global float *)(src1_read0 + mad24(3, K, local_index))), 0, (__local float *)(slm_brow + mad24(3, SLM_BLOCK, local_index))); - vstore4(vload4(0, (__global float *)(src1_read0 + mad24(4, K, local_index))), 0, (__local float *)(slm_brow + mad24(4, SLM_BLOCK, local_index))); - vstore4(vload4(0, (__global float *)(src1_read0 + mad24(5, K, local_index))), 0, (__local float *)(slm_brow + mad24(5, SLM_BLOCK, local_index))); - vstore4(vload4(0, (__global float *)(src1_read0 + mad24(6, K, local_index))), 0, (__local float *)(slm_brow + mad24(6, SLM_BLOCK, local_index))); - vstore4(vload4(0, (__global float *)(src1_read0 + mad24(7, K, local_index))), 0, (__local float *)(slm_brow + mad24(7, SLM_BLOCK, local_index))); + vstore8(vload8(0, src1_read0 + mad24(0, K, local_index)), 0, slm_brow + mad24(0, SLM_BLOCK, local_index)); + vstore8(vload8(0, src1_read0 + mad24(1, K, local_index)), 0, slm_brow + mad24(1, SLM_BLOCK, local_index)); + vstore8(vload8(0, src1_read0 + mad24(2, K, local_index)), 0, slm_brow + mad24(2, SLM_BLOCK, local_index)); + vstore8(vload8(0, src1_read0 + mad24(3, K, local_index)), 0, slm_brow + mad24(3, SLM_BLOCK, local_index)); + vstore8(vload8(0, src1_read0 + mad24(4, K, local_index)), 0, slm_brow + mad24(4, SLM_BLOCK, local_index)); + vstore8(vload8(0, src1_read0 + mad24(5, K, local_index)), 0, slm_brow + mad24(5, SLM_BLOCK, local_index)); + vstore8(vload8(0, src1_read0 + mad24(6, K, local_index)), 0, slm_brow + mad24(6, SLM_BLOCK, local_index)); + vstore8(vload8(0, src1_read0 + mad24(7, K, local_index)), 0, slm_brow + mad24(7, SLM_BLOCK, local_index)); barrier(CLK_LOCAL_MEM_FENCE); slm_brow0 = slm_brow + local_x * (TILE_K / 8); @@ -469,17 +469,17 @@ __kernel void TEMPLATE(gemm_buffer_NT, Dtype)( while( w + TILE_K <= end_w ) { Dtype8 arow; - brow0 = as_half8(vload4(0, (__local float *)(slm_brow0 + 0 * SLM_BLOCK))); - brow1 = as_half8(vload4(0, (__local float *)(slm_brow0 + 1 * SLM_BLOCK))); - brow2 = as_half8(vload4(0, (__local float *)(slm_brow0 + 2 * SLM_BLOCK))); - brow3 = as_half8(vload4(0, (__local float *)(slm_brow0 + 3 * SLM_BLOCK))); - brow4 = as_half8(vload4(0, (__local float *)(slm_brow0 + 4 * SLM_BLOCK))); - brow5 = as_half8(vload4(0, (__local float *)(slm_brow0 + 5 * SLM_BLOCK))); - brow6 = as_half8(vload4(0, (__local float *)(slm_brow0 + 6 * SLM_BLOCK))); - brow7 = as_half8(vload4(0, (__local float *)(slm_brow0 + 7 * SLM_BLOCK))); + brow0 = vload8(0, slm_brow0 + 0 * SLM_BLOCK); + brow1 = vload8(0, slm_brow0 + 1 * SLM_BLOCK); + brow2 = vload8(0, slm_brow0 + 2 * SLM_BLOCK); + brow3 = vload8(0, slm_brow0 + 3 * SLM_BLOCK); + brow4 = vload8(0, slm_brow0 + 4 * SLM_BLOCK); + brow5 = vload8(0, slm_brow0 + 5 * SLM_BLOCK); + brow6 = vload8(0, slm_brow0 + 6 * SLM_BLOCK); + brow7 = vload8(0, slm_brow0 + 7 * SLM_BLOCK); #define MM_DOT_PRODUCT( _row, _dot ) \ - arow = as_half8(vload4(0, (__global float *)(src0_read + _row * K))); \ + arow = vload8(0, src0_read + _row * K); \ _dot = mad( (Dtype8)(arow.s0), (Dtype8)(brow0.s0, brow1.s0, brow2.s0, brow3.s0, brow4.s0, brow5.s0, brow6.s0, brow7.s0), _dot ); \ _dot = mad( (Dtype8)(arow.s1), (Dtype8)(brow0.s1, brow1.s1, brow2.s1, brow3.s1, brow4.s1, brow5.s1, brow6.s1, brow7.s1), _dot ); \ _dot = mad( (Dtype8)(arow.s2), (Dtype8)(brow0.s2, brow1.s2, brow2.s2, brow3.s2, brow4.s2, brow5.s2, brow6.s2, brow7.s2), _dot ); \ @@ -510,7 +510,7 @@ __kernel void TEMPLATE(gemm_buffer_NT, Dtype)( Dtype8 arow; #define READ_BROW(_brow, _row) \ - _brow = as_half8(vload4(0, (__local float *)(slm_brow0 + _row * SLM_BLOCK))); \ + _brow = vload8(0, slm_brow0 + _row * SLM_BLOCK); \ _brow.s0 = (mad24(local_x, 8, w) < K) ? _brow.s0 : 0.0f; \ _brow.s1 = (mad24(local_x, 8, w + 1) < K) ? _brow.s1 : 0.0f; \ _brow.s2 = (mad24(local_x, 8, w + 2) < K) ? _brow.s2 : 0.0f; \ @@ -532,7 +532,7 @@ __kernel void TEMPLATE(gemm_buffer_NT, Dtype)( #undef READ_BROW #define MM_DOT_PRODUCT( _row, _dot ) \ - arow = as_half8(vload4(0, (__global float *)(src0_read + _row * K))); \ + arow = vload8(0, src0_read + _row * K); \ arow.s0 = (mad24(local_x, 8, w) < K) ? arow.s0 : 0.0f; \ arow.s1 = (mad24(local_x, 8, w + 1) < K) ? arow.s1 : 0.0f; \ arow.s2 = (mad24(local_x, 8, w + 2) < K) ? arow.s2 : 0.0f; \ diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp index da666ace01..9570355b4f 100644 --- a/modules/dnn/test/test_backends.cpp +++ b/modules/dnn/test/test_backends.cpp @@ -194,7 +194,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe) float scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CPU_FP16) ? 1.5e-2 : 0.0; float iouDiff = (target == DNN_TARGET_MYRIAD) ? 0.063 : 0.0; float detectionConfThresh = (target == DNN_TARGET_MYRIAD) ? 0.262 : FLT_MIN; - processNet("dnn/MobileNetSSD_deploy.caffemodel", "dnn/MobileNetSSD_deploy.prototxt", + processNet("dnn/MobileNetSSD_deploy_19e3ec3.caffemodel", "dnn/MobileNetSSD_deploy_19e3ec3.prototxt", inp, "detection_out", "", scoreDiff, iouDiff, detectionConfThresh); expectNoFallbacksFromIE(net); } @@ -237,7 +237,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe_Different_Width_Height) scoreDiff = 0.03; iouDiff = 0.08; } - processNet("dnn/MobileNetSSD_deploy.caffemodel", "dnn/MobileNetSSD_deploy.prototxt", + processNet("dnn/MobileNetSSD_deploy_19e3ec3.caffemodel", "dnn/MobileNetSSD_deploy_19e3ec3.prototxt", inp, "detection_out", "", scoreDiff, iouDiff); expectNoFallbacksFromIE(net); } diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp index 708e353aac..3f5458a873 100644 --- a/modules/dnn/test/test_caffe_importer.cpp +++ b/modules/dnn/test/test_caffe_importer.cpp @@ -290,8 +290,8 @@ TEST(Reproducibility_SSD, Accuracy) typedef testing::TestWithParam > Reproducibility_MobileNet_SSD; TEST_P(Reproducibility_MobileNet_SSD, Accuracy) { - const string proto = findDataFile("dnn/MobileNetSSD_deploy.prototxt", false); - const string model = findDataFile("dnn/MobileNetSSD_deploy.caffemodel", false); + const string proto = findDataFile("dnn/MobileNetSSD_deploy_19e3ec3.prototxt", false); + const string model = findDataFile("dnn/MobileNetSSD_deploy_19e3ec3.caffemodel", false); Net net = readNetFromCaffe(proto, model); int backendId = get<0>(GetParam()); int targetId = get<1>(GetParam()); diff --git a/modules/dnn/test/test_halide_layers.cpp b/modules/dnn/test/test_halide_layers.cpp index d8a16d3efa..3629f720fb 100644 --- a/modules/dnn/test/test_halide_layers.cpp +++ b/modules/dnn/test/test_halide_layers.cpp @@ -407,15 +407,16 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, MaxPooling, Combine( //////////////////////////////////////////////////////////////////////////////// // Fully-connected //////////////////////////////////////////////////////////////////////////////// -typedef TestWithParam > > FullyConnected; +typedef TestWithParam > > FullyConnected; TEST_P(FullyConnected, Accuracy) { - int inChannels = get<0>(GetParam()); - Size inSize = get<1>(GetParam()); - int outChannels = get<2>(GetParam()); - bool hasBias = get<3>(GetParam()); - Backend backendId = get<0>(get<4>(GetParam())); - Target targetId = get<1>(get<4>(GetParam())); + int batch = get<0>(GetParam()); + int inChannels = get<1>(GetParam()); + Size inSize = get<2>(GetParam()); + int outChannels = get<3>(GetParam()); + bool hasBias = get<4>(GetParam()); + Backend backendId = get<0>(get<5>(GetParam())); + Target targetId = get<1>(get<5>(GetParam())); #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && (targetId == DNN_TARGET_OPENCL_FP16 || @@ -439,7 +440,7 @@ TEST_P(FullyConnected, Accuracy) lp.type = "InnerProduct"; lp.name = "testLayer"; - int sz[] = {1, inChannels, inSize.height, inSize.width}; + int sz[] = {batch, inChannels, inSize.height, inSize.width}; Mat input(4, &sz[0], CV_32F); double l1 = 0.0; @@ -467,6 +468,7 @@ TEST_P(FullyConnected, Accuracy) } INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, FullyConnected, Combine( +/*batch*/ Values(1, 2, 4, 8, 16), /*in channels*/ Values(3, 4), /*in size*/ Values(Size(5, 4), Size(4, 5), Size(1, 1)), /*out channels*/ Values(3, 4), diff --git a/modules/dnn/test/test_int8_layers.cpp b/modules/dnn/test/test_int8_layers.cpp index 8b3cd01f29..caba112516 100644 --- a/modules/dnn/test/test_int8_layers.cpp +++ b/modules/dnn/test/test_int8_layers.cpp @@ -878,14 +878,14 @@ TEST_P(Test_Int8_nets, MobileNet_SSD) if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel()) applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL); - Net net = readNetFromCaffe(findDataFile("dnn/MobileNetSSD_deploy.prototxt", false), - findDataFile("dnn/MobileNetSSD_deploy.caffemodel", false)); + Net net = readNetFromCaffe(findDataFile("dnn/MobileNetSSD_deploy_19e3ec3.prototxt", false), + findDataFile("dnn/MobileNetSSD_deploy_19e3ec3.caffemodel", false)); Mat inp = imread(_tf("street.png")); Mat blob = blobFromImage(inp, 1.0 / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false); Mat ref = blobFromNPY(_tf("mobilenet_ssd_caffe_out.npy")); - float confThreshold = FLT_MIN, scoreDiff = 0.059, iouDiff = 0.11; + float confThreshold = FLT_MIN, scoreDiff = 0.084, iouDiff = 0.43; testDetectionNet(net, blob, ref, confThreshold, scoreDiff, iouDiff); } diff --git a/modules/dnn/test/test_model.cpp b/modules/dnn/test/test_model.cpp index a19923bf28..59b51c4bc0 100644 --- a/modules/dnn/test/test_model.cpp +++ b/modules/dnn/test/test_model.cpp @@ -490,8 +490,8 @@ TEST_P(Test_Model, DetectionMobilenetSSD) refBoxes.emplace_back(left, top, width, height); } - std::string weights_file = _tf("MobileNetSSD_deploy.caffemodel", false); - std::string config_file = _tf("MobileNetSSD_deploy.prototxt"); + std::string weights_file = _tf("MobileNetSSD_deploy_19e3ec3.caffemodel", false); + std::string config_file = _tf("MobileNetSSD_deploy_19e3ec3.prototxt"); Scalar mean = Scalar(127.5, 127.5, 127.5); double scale = 1.0 / 127.5; @@ -511,7 +511,7 @@ TEST_P(Test_Model, DetectionMobilenetSSD) } else if (target == DNN_TARGET_CUDA_FP16) { - scoreDiff = 0.0021; + scoreDiff = 0.0028; iouDiff = 1e-2; } float confThreshold = FLT_MIN; @@ -595,8 +595,8 @@ TEST_P(Test_Model, Detection_normalized) std::vector refConfidences = {0.999222f}; std::vector refBoxes = {Rect2d(0, 4, 227, 222)}; - std::string weights_file = _tf("MobileNetSSD_deploy.caffemodel", false); - std::string config_file = _tf("MobileNetSSD_deploy.prototxt"); + std::string weights_file = _tf("MobileNetSSD_deploy_19e3ec3.caffemodel", false); + std::string config_file = _tf("MobileNetSSD_deploy_19e3ec3.prototxt"); Scalar mean = Scalar(127.5, 127.5, 127.5); double scale = 1.0 / 127.5;