mirror of
https://github.com/opencv/opencv.git
synced 2025-01-22 01:13:11 +08:00
5229312ad2
DNN: FP16 support on Convolution 2D #22275 ## FP16 support on ARM platform This PR proposes to support FP16 backend in Convolution. For now, we only support FP16 at ARM aarch64. In addition to adding fp16, I also added `seperateIm2col` optimization in this patch. ## How to use FP16 to speed up convolution? ``` Net net = readNet(modelPath); net.setPreferableTarget(DNN_TARGET_CPU_FP16); net.setInput(blob); Mat output = net.forward(); ``` ### TODO List | Task | Status | Remarks | |:-------:|:--------:|:------------:| | Convolution 2D FP16 | ✔️ | Done | | Winograd FP16 | Because the current modification has reached 2k lines, winograd fp16 will be completed in the next PR. | | | Accuracy Test | ✔️ | Done | | Performance Test | ✔️ | Done | | Compiler bug | ✔️ | Done | ### Speed Test for FP 16. **Test on M1 chip, 4 threads.** | Model Name | FP32 (Conv+Wino) | Conv(FP16) + Wino(FP 32) | |:-------:|:--------:|:------------:| | ReseNet 50 | 26.0 ms | **18.05 ms** (25% speed up)| | MobileNet V2 | 4.17 ms | **3.09 ms (29% speed up)** | ### Speed Test for `seperateIm2col` trick on X86. **Test on AMD 5600x, 12 threads.** | Model Name | 4.x | Patch | |:-------:|:--------:|:------------:| | MobileNet V2 | 5.6 ms | **3.0 ms (46% speed up)** | ### Performance Test #### Performance Test of X86 platform: AMD 5600X, with `-perf_threas=1` |Name of Test|4.x|patch|patch vs 4.x (x-factor)| |---|:-:|:-:|:-:| |Name of Test|4.x 0|fp16pr final|fp16pr final vs 4.x 0 (x-factor)| |---|:-:|:-:|:-:| |conv1d::Conv1D::(GFLOPS=0.000, K=[3], IN={1, 2, 19}, OCN=2, G=2, S=2, P=(1, 1), BIAS, OCV/CPU)|0.001|0.001|1.00| |conv1d::Conv1D::(GFLOPS=0.000, K=[3], IN={1, 2, 25}, OCN=2, G=2, P=(2, 2), PM=SAME, OCV/CPU)|0.001|0.001|1.03| |conv1d::Conv1D::(GFLOPS=0.000, K=[3], IN={1, 6, 10}, OCN=6, PM=VALID, BIAS, OCV/CPU)|0.001|0.001|0.92| |conv3d::Conv3D::(GFLOPS=0.000, K=[1 x 1 x 1], IN={1, 4, 9, 10, 10}, OCN=4, S=[1 x 1 x 2], P=(1, 1) x (1, 1) x (1, 1), PM=VALID, OCV/CPU)|0.002|0.003|0.95| |conv3d::Conv3D::(GFLOPS=0.000, K=[1 x 1 x 1], IN={1, 8, 1, 10, 10}, OCN=8, G=8, P=(1, 1) x (1, 1) x (1, 1), BIAS, OCV/CPU)|0.006|0.006|1.00| |conv3d::Conv3D::(GFLOPS=0.000, K=[3 x 3 x 3], IN={1, 2, 19, 19, 19}, OCN=2, G=2, S=[2 x 2 x 2], P=(1, 1) x (1, 1) x (1, 1), BIAS, OCV/CPU)|0.045|0.033|1.39| |conv3d::Conv3D::(GFLOPS=0.000, K=[3 x 4 x 2], IN={1, 4, 8, 10, 10}, OCN=4, G=4, S=[1 x 2 x 1], BIAS, OCV/CPU)|0.011|0.009|1.17| |conv3d::Conv3D::(GFLOPS=0.001, K=[3 x 3 x 3], IN={1, 2, 25, 19, 19}, OCN=2, G=2, S=[1 x 2 x 2], P=(2, 2) x (2, 2) x (2, 2), PM=SAME, OCV/CPU)|0.109|0.078|1.39| |conv3d::Conv3D::(GFLOPS=0.002, K=[3 x 1 x 4], IN={1, 14, 5, 10, 10}, OCN=14, PM=SAME, OCV/CPU)|0.040|0.042|0.94| |conv3d::Conv3D::(GFLOPS=0.006, K=[5 x 5 x 5], IN={1, 4, 50, 19, 19}, OCN=4, S=[2 x 2 x 2], P=(1, 1) x (1, 1) x (1, 1), PM=VALID, OCV/CPU)|0.326|0.342|0.95| |conv3d::Conv3D::(GFLOPS=0.027, K=[3 x 3 x 3], IN={1, 6, 10, 38, 50}, OCN=6, PM=VALID, BIAS, OCV/CPU)|0.580|0.589|0.99| |conv3d::Conv3D::(GFLOPS=0.030, K=[5 x 5 x 5], IN={1, 6, 19, 19, 19}, OCN=6, G=2, OCV/CPU)|1.293|1.382|0.94| |conv3d::Conv3D::(GFLOPS=0.045, K=[7 x 7 x 7], IN={1, 2, 38, 38, 38}, OCN=2, S=[1 x 2 x 1], OCV/CPU)|3.590|3.710|0.97| |conv3d::Conv3D::(GFLOPS=0.053, K=[3 x 3 x 3], IN={1, 10, 98, 10, 10}, OCN=10, PM=SAME, OCV/CPU)|1.120|1.191|0.94| |conv3d::Conv3D::(GFLOPS=0.071, K=[7 x 7 x 7], IN={1, 6, 15, 19, 19}, OCN=6, S=[2 x 1 x 1], P=(3, 3) x (3, 3) x (3, 3), PM=SAME, BIAS, OCV/CPU)|2.576|2.872|0.90| |conv3d::Conv3D::(GFLOPS=0.093, K=[5 x 5 x 5], IN={1, 4, 40, 75, 75}, OCN=4, S=[2 x 2 x 2], OCV/CPU)|4.599|4.670|0.98| |conv3d::Conv3D::(GFLOPS=0.116, K=[5 x 5 x 5], IN={1, 2, 21, 75, 100}, OCN=2, BIAS, OCV/CPU)|9.230|9.582|0.96| |conv3d::Conv3D::(GFLOPS=1.267, K=[5 x 5 x 5], IN={1, 3, 75, 75, 100}, OCN=3, PM=SAME, BIAS, OCV/CPU)|65.946|69.381|0.95| |conv3d::Conv3D::(GFLOPS=1.343, K=[3 x 3 x 3], IN={1, 11, 9, 150, 200}, OCN=11, PM=VALID, BIAS, OCV/CPU)|18.915|19.289|0.98| |conv::Conv::(GFLOPS=0.177, K=[1 x 1], IN={1, 512, 26, 26}, OCN=256, OCV/CPU)|1.404|1.457|0.96| |conv::Conv::(GFLOPS=0.177, K=[1 x 1], IN={1, 1024, 13, 13}, OCN=512, OCV/CPU)|2.060|1.501|1.37| |conv::Conv::(GFLOPS=0.178, K=[1 x 1], IN={1, 256, 52, 52}, OCN=128, OCV/CPU)|1.409|1.464|0.96| |conv::Conv::(GFLOPS=0.210, K=[1 x 1], IN={1, 576, 38, 50}, OCN=96, PM=SAME, BIAS, OCV/CPU)|1.793|1.838|0.98| |conv::Conv::(GFLOPS=0.231, K=[3 x 3], IN={1, 128, 56, 56}, OCN=32, P=[1 x 1], OCV/CPU)|1.207|1.199|1.01| |conv::Conv::(GFLOPS=0.231, K=[3 x 3], IN={1, 256, 14, 14}, OCN=256, P=[1 x 1], OCV/CPU)|1.277|1.275|1.00| |conv::Conv::(GFLOPS=0.280, K=[1 x 1], IN={1, 576, 38, 50}, OCN=128, PM=SAME, BIAS, OCV/CPU)|2.319|2.370|0.98| |conv::Conv::(GFLOPS=0.302, K=[3 x 3], IN={1, 64, 64, 64}, OCN=64, PM=SAME, OCV/CPU)|1.351|1.346|1.00| |conv::Conv::(GFLOPS=0.357, K=[1 x 1], IN={1, 64, 208, 208}, OCN=64, OCV/CPU)|3.520|3.612|0.97| |conv::Conv::(GFLOPS=0.420, K=[3 x 3], IN={1, 96, 38, 50}, OCN=128, PM=SAME, BIAS, OCV/CPU)|1.876|1.880|1.00| |conv::Conv::(GFLOPS=0.472, K=[3 x 3], IN={1, 128, 40, 40}, OCN=128, PM=SAME, OCV/CPU)|1.981|1.995|0.99| |conv::Conv::(GFLOPS=0.472, K=[3 x 3], IN={1, 256, 20, 20}, OCN=256, PM=SAME, OCV/CPU)|2.620|2.627|1.00| |conv::Conv::(GFLOPS=0.472, K=[3 x 3], IN={1, 512, 10, 10}, OCN=512, PM=SAME, OCV/CPU)|4.202|4.123|1.02| |conv::Conv::(GFLOPS=0.561, K=[3 x 3], IN={1, 128, 38, 50}, OCN=128, PM=SAME, BIAS, OCV/CPU)|2.429|2.445|0.99| |conv::Conv::(GFLOPS=0.624, K=[3 x 3], IN={1, 128, 46, 46}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|2.591|2.576|1.01| |conv::Conv::(GFLOPS=0.701, K=[3 x 3], IN={1, 128, 38, 50}, OCN=160, PM=SAME, BIAS, OCV/CPU)|3.005|2.998|1.00| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 64, 104, 104}, OCN=64, P=[1 x 1], OCV/CPU)|3.515|3.532|1.00| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 128, 52, 52}, OCN=128, P=[1 x 1], OCV/CPU)|3.115|3.134|0.99| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 256, 26, 26}, OCN=256, P=[1 x 1], OCV/CPU)|3.937|3.899|1.01| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 512, 13, 13}, OCN=512, P=[1 x 1], OCV/CPU)|5.533|5.471|1.01| |conv::Conv::(GFLOPS=0.830, K=[3 x 3], IN={1, 64, 75, 100}, OCN=96, PM=SAME, BIAS, OCV/CPU)|3.472|3.464|1.00| |conv::Conv::(GFLOPS=0.958, K=[3 x 3], IN={1, 192, 38, 38}, OCN=192, PM=SAME, OCV/CPU)|4.302|4.322|1.00| |conv::Conv::(GFLOPS=0.958, K=[3 x 3], IN={1, 384, 19, 19}, OCN=384, PM=SAME, OCV/CPU)|6.100|6.035|1.01| |conv::Conv::(GFLOPS=1.022, K=[3 x 3], IN={1, 576, 19, 19}, OCN=273, PM=SAME, BIAS, OCV/CPU)|6.580|6.484|1.01| |conv::Conv::(GFLOPS=1.112, K=[3 x 3], IN={1, 512, 10, 10}, OCN=1206, P=[1 x 1], BIAS, OCV/CPU)|9.741|9.634|1.01| |conv::Conv::(GFLOPS=1.181, K=[3 x 3], IN={1, 64, 160, 200}, OCN=128, S=[2 x 2], P=[1 x 1], BIAS, OCV/CPU)|10.131|10.156|1.00| |conv::Conv::(GFLOPS=1.182, K=[3 x 3], IN={1, 32, 320, 400}, OCN=64, S=[2 x 2], P=[1 x 1], BIAS, OCV/CPU)|12.391|12.350|1.00| |conv::Conv::(GFLOPS=1.195, K=[9 x 9], IN={1, 32, 240, 320}, OCN=3, P=[4 x 4], BIAS, OCV/CPU)|91.074|87.893|1.04| |conv::Conv::(GFLOPS=1.196, K=[3 x 3], IN={1, 384, 26, 26}, OCN=256, P=[1 x 1], OCV/CPU)|5.903|5.903|1.00| |conv::Conv::(GFLOPS=1.210, K=[3 x 3], IN={1, 32, 256, 256}, OCN=32, PM=SAME, OCV/CPU)|6.890|6.794|1.01| |conv::Conv::(GFLOPS=1.245, K=[3 x 3], IN={1, 64, 75, 75}, OCN=192, PM=SAME, BIAS, OCV/CPU)|5.160|5.131|1.01| |conv::Conv::(GFLOPS=1.245, K=[3 x 3], IN={1, 96, 75, 100}, OCN=96, PM=SAME, BIAS, OCV/CPU)|4.970|5.036|0.99| |conv::Conv::(GFLOPS=1.248, K=[3 x 3], IN={1, 256, 46, 46}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|5.045|5.015|1.01| |conv::Conv::(GFLOPS=1.258, K=[3 x 3], IN={1, 1280, 10, 10}, OCN=546, PM=SAME, BIAS, OCV/CPU)|11.583|11.343|1.02| |conv::Conv::(GFLOPS=1.261, K=[3 x 3], IN={1, 192, 38, 50}, OCN=192, PM=SAME, BIAS, OCV/CPU)|5.348|5.320|1.01| |conv::Conv::(GFLOPS=1.416, K=[3 x 3], IN={1, 128, 62, 82}, OCN=128, BIAS, OCV/CPU)|5.357|5.396|0.99| |conv::Conv::(GFLOPS=1.500, K=[3 x 3], IN={1, 128, 64, 84}, OCN=128, BIAS, OCV/CPU)|6.050|6.006|1.01| |conv::Conv::(GFLOPS=1.586, K=[3 x 3], IN={1, 128, 66, 86}, OCN=128, BIAS, OCV/CPU)|5.952|5.953|1.00| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 256, 26, 26}, OCN=512, P=[1 x 1], OCV/CPU)|8.014|8.014|1.00| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 256, 52, 52}, OCN=512, S=[2 x 2], P=[1 x 1], OCV/CPU)|12.472|12.577|0.99| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 512, 13, 13}, OCN=1024, P=[1 x 1], OCV/CPU)|10.803|10.655|1.01| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 512, 26, 26}, OCN=1024, S=[2 x 2], P=[1 x 1], OCV/CPU)|18.429|13.405|1.37| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 64, 104, 104}, OCN=128, P=[1 x 1], OCV/CPU)|6.659|6.647|1.00| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 64, 208, 208}, OCN=128, S=[2 x 2], P=[1 x 1], OCV/CPU)|14.192|13.819|1.03| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 128, 52, 52}, OCN=256, P=[1 x 1], OCV/CPU)|6.045|6.068|1.00| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 128, 104, 104}, OCN=256, S=[2 x 2], P=[1 x 1], OCV/CPU)|12.742|12.828|0.99| |conv::Conv::(GFLOPS=1.598, K=[3 x 3], IN={1, 32, 208, 208}, OCN=64, P=[1 x 1], OCV/CPU)|8.046|7.773|1.04| |conv::Conv::(GFLOPS=1.598, K=[3 x 3], IN={1, 32, 416, 416}, OCN=64, S=[2 x 2], P=[1 x 1], OCV/CPU)|17.440|17.192|1.01| |conv::Conv::(GFLOPS=1.659, K=[3 x 3], IN={1, 960, 10, 10}, OCN=960, PM=SAME, OCV/CPU)|15.418|14.972|1.03| |conv::Conv::(GFLOPS=1.660, K=[3 x 3], IN={1, 128, 75, 75}, OCN=128, G=128, P=[1 x 1], BIAS, OCV/CPU)|0.430|0.430|1.00| |conv::Conv::(GFLOPS=1.660, K=[3 x 3], IN={1, 128, 75, 75}, OCN=128, PM=SAME, OCV/CPU)|6.692|6.663|1.00| |conv::Conv::(GFLOPS=1.675, K=[3 x 3], IN={1, 128, 68, 88}, OCN=128, BIAS, OCV/CPU)|6.350|6.347|1.00| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 256, 38, 38}, OCN=256, G=256, P=[1 x 1], BIAS, OCV/CPU)|0.267|0.265|1.01| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 256, 38, 38}, OCN=256, PM=SAME, OCV/CPU)|7.755|7.558|1.03| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 512, 19, 19}, OCN=512, G=512, P=[1 x 1], BIAS, OCV/CPU)|0.203|0.202|1.00| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 512, 19, 19}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|10.663|10.576|1.01| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 512, 19, 19}, OCN=512, PM=SAME, OCV/CPU)|10.827|10.614|1.02| |conv::Conv::(GFLOPS=1.766, K=[3 x 3], IN={1, 128, 70, 90}, OCN=128, BIAS, OCV/CPU)|7.049|6.947|1.01| |conv::Conv::(GFLOPS=1.859, K=[3 x 3], IN={1, 128, 72, 92}, OCN=128, BIAS, OCV/CPU)|6.900|6.901|1.00| |conv::Conv::(GFLOPS=1.888, K=[3 x 3], IN={1, 1024, 10, 10}, OCN=1024, G=1024, P=[1 x 1], BIAS, OCV/CPU)|0.165|0.165|1.00| |conv::Conv::(GFLOPS=1.888, K=[3 x 3], IN={1, 1024, 10, 10}, OCN=1024, PM=SAME, OCV/CPU)|17.953|17.251|1.04| |conv::Conv::(GFLOPS=1.954, K=[3 x 3], IN={1, 128, 74, 94}, OCN=128, BIAS, OCV/CPU)|7.430|7.320|1.01| |conv::Conv::(GFLOPS=1.995, K=[9 x 9], IN={1, 3, 320, 400}, OCN=32, P=[4 x 4], BIAS, OCV/CPU)|22.187|21.705|1.02| |conv::Conv::(GFLOPS=2.052, K=[3 x 3], IN={1, 128, 76, 96}, OCN=128, BIAS, OCV/CPU)|8.349|8.126|1.03| |conv::Conv::(GFLOPS=2.100, K=[3 x 3], IN={1, 144, 75, 75}, OCN=144, PM=SAME, OCV/CPU)|8.273|8.297|1.00| |conv::Conv::(GFLOPS=2.153, K=[3 x 3], IN={1, 128, 78, 98}, OCN=128, BIAS, OCV/CPU)|8.169|8.094|1.01| |conv::Conv::(GFLOPS=2.156, K=[3 x 3], IN={1, 576, 19, 19}, OCN=576, PM=SAME, OCV/CPU)|13.602|13.359|1.02| |conv::Conv::(GFLOPS=2.255, K=[3 x 3], IN={1, 128, 80, 100}, OCN=128, BIAS, OCV/CPU)|8.633|8.584|1.01| |conv::Conv::(GFLOPS=2.719, K=[3 x 3], IN={1, 96, 256, 256}, OCN=96, S=[2 x 2], PM=SAME, OCV/CPU)|29.339|28.897|1.02| |conv::Conv::(GFLOPS=3.319, K=[3 x 3], IN={1, 128, 75, 75}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|13.000|12.920|1.01| |conv::Conv::(GFLOPS=3.321, K=[3 x 3], IN={1, 64, 150, 150}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|14.262|13.319|1.07| |conv::Conv::(GFLOPS=3.398, K=[7 x 7], IN={1, 128, 46, 46}, OCN=128, P=[3 x 3], BIAS, OCV/CPU)|27.453|27.253|1.01| |conv::Conv::(GFLOPS=3.407, K=[3 x 3], IN={1, 512, 19, 19}, OCN=1024, D=[6 x 6], P=[6 x 6], BIAS, OCV/CPU)|32.052|27.269|1.18| |conv::Conv::(GFLOPS=3.408, K=[3 x 3], IN={1, 256, 38, 38}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|15.363|15.208|1.01| |conv::Conv::(GFLOPS=4.247, K=[3 x 3], IN={1, 480, 32, 32}, OCN=480, PM=SAME, OCV/CPU)|18.543|18.434|1.01| |conv::Conv::(GFLOPS=4.247, K=[5 x 5], IN={1, 144, 128, 128}, OCN=144, S=[2 x 2], PM=SAME, OCV/CPU)|39.114|37.954|1.03| |conv::Conv::(GFLOPS=4.566, K=[7 x 7], IN={1, 172, 46, 46}, OCN=128, P=[3 x 3], BIAS, OCV/CPU)|36.271|36.972|0.98| |conv::Conv::(GFLOPS=4.993, K=[3 x 3], IN={1, 256, 46, 46}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|19.262|19.427|0.99| |conv::Conv::(GFLOPS=4.993, K=[3 x 3], IN={1, 512, 46, 46}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|19.298|19.349|1.00| |conv::Conv::(GFLOPS=4.994, K=[3 x 3], IN={1, 128, 92, 92}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|20.261|19.847|1.02| |conv::Conv::(GFLOPS=4.997, K=[3 x 3], IN={1, 64, 184, 184}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|21.867|21.525|1.02| |conv::Conv::(GFLOPS=5.780, K=[5 x 5], IN={1, 672, 32, 32}, OCN=672, S=[2 x 2], PM=SAME, OCV/CPU)|51.756|49.979|1.04| |conv::Conv::(GFLOPS=6.116, K=[3 x 3], IN={1, 1152, 16, 16}, OCN=1152, PM=SAME, OCV/CPU)|28.133|27.060|1.04| |conv::Conv::(GFLOPS=6.118, K=[3 x 3], IN={1, 144, 128, 128}, OCN=144, PM=SAME, OCV/CPU)|25.035|24.980|1.00| |conv::Conv::(GFLOPS=6.637, K=[3 x 3], IN={1, 256, 75, 75}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|25.858|25.821|1.00| |conv::Conv::(GFLOPS=6.638, K=[3 x 3], IN={1, 128, 150, 150}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|27.313|27.149|1.01| |conv::Conv::(GFLOPS=6.641, K=[3 x 3], IN={1, 64, 150, 200}, OCN=192, PM=SAME, BIAS, OCV/CPU)|28.219|28.111|1.00| |conv::Conv::(GFLOPS=6.641, K=[3 x 3], IN={1, 64, 300, 300}, OCN=64, P=[1 x 1], BIAS, OCV/CPU)|46.025|46.674|0.99| |conv::Conv::(GFLOPS=6.814, K=[3 x 3], IN={1, 512, 38, 38}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|30.220|29.446|1.03| |conv::Conv::(GFLOPS=8.025, K=[3 x 3], IN={1, 1024, 19, 19}, OCN=1206, P=[1 x 1], BIAS, OCV/CPU)|49.410|48.708|1.01| |conv::Conv::(GFLOPS=9.986, K=[3 x 3], IN={1, 512, 46, 46}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|38.203|38.001|1.01| |conv::Conv::(GFLOPS=9.987, K=[3 x 3], IN={1, 256, 92, 92}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|39.961|39.021|1.02| |conv::Conv::(GFLOPS=9.989, K=[3 x 3], IN={1, 128, 184, 184}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|48.685|47.075|1.03| |conv::Conv::(GFLOPS=9.993, K=[3 x 3], IN={1, 64, 368, 368}, OCN=64, P=[1 x 1], BIAS, OCV/CPU)|75.114|72.586|1.03| |conv::Conv::(GFLOPS=10.087, K=[3 x 3], IN={1, 576, 38, 50}, OCN=512, PM=SAME, BIAS, OCV/CPU)|41.222|41.144|1.00| |conv::Conv::(GFLOPS=10.701, K=[3 x 3], IN={1, 512, 38, 38}, OCN=804, P=[1 x 1], BIAS, OCV/CPU)|46.220|46.353|1.00| |conv::Conv::(GFLOPS=11.797, K=[5 x 5], IN={1, 240, 64, 64}, OCN=240, PM=SAME, OCV/CPU)|98.201|98.771|0.99| |conv::Conv::(GFLOPS=11.797, K=[5 x 5], IN={1, 480, 32, 32}, OCN=480, PM=SAME, OCV/CPU)|100.106|96.971|1.03| |conv::Conv::(GFLOPS=16.987, K=[5 x 5], IN={1, 1152, 16, 16}, OCN=1152, PM=SAME, OCV/CPU)|146.977|140.445|1.05| |conv::Conv::(GFLOPS=23.122, K=[5 x 5], IN={1, 672, 32, 32}, OCN=672, PM=SAME, OCV/CPU)|198.618|194.665|1.02| #### Performance Test of ARM platform: apple M1, with `-perf_threas=1` Min (ms) |Name of Test|4.x|patch|4.x vs patch (x-factor)| |---|:-:|:-:|:-:| |conv1d::Conv1D::(GFLOPS=0.000, K=[3], IN={1, 2, 19}, OCN=2, G=2, S=2, P=(1, 1), BIAS, OCV/CPU)|0.001|0.001|1.07| |conv1d::Conv1D::(GFLOPS=0.000, K=[3], IN={1, 2, 25}, OCN=2, G=2, P=(2, 2), PM=SAME, OCV/CPU)|0.001|0.001|1.10| |conv1d::Conv1D::(GFLOPS=0.000, K=[3], IN={1, 6, 10}, OCN=6, PM=VALID, BIAS, OCV/CPU)|0.002|0.002|0.97| |conv3d::Conv3D::(GFLOPS=0.000, K=[1 x 1 x 1], IN={1, 4, 9, 10, 10}, OCN=4, S=[1 x 1 x 2], P=(1, 1) x (1, 1) x (1, 1), PM=VALID, OCV/CPU)|0.003|0.003|0.84| |conv3d::Conv3D::(GFLOPS=0.000, K=[1 x 1 x 1], IN={1, 8, 1, 10, 10}, OCN=8, G=8, P=(1, 1) x (1, 1) x (1, 1), BIAS, OCV/CPU)|0.009|0.009|1.00| |conv3d::Conv3D::(GFLOPS=0.000, K=[3 x 3 x 3], IN={1, 2, 19, 19, 19}, OCN=2, G=2, S=[2 x 2 x 2], P=(1, 1) x (1, 1) x (1, 1), BIAS, OCV/CPU)|0.027|0.030|0.90| |conv3d::Conv3D::(GFLOPS=0.000, K=[3 x 4 x 2], IN={1, 4, 8, 10, 10}, OCN=4, G=4, S=[1 x 2 x 1], BIAS, OCV/CPU)|0.008|0.007|1.07| |conv3d::Conv3D::(GFLOPS=0.001, K=[3 x 3 x 3], IN={1, 2, 25, 19, 19}, OCN=2, G=2, S=[1 x 2 x 2], P=(2, 2) x (2, 2) x (2, 2), PM=SAME, OCV/CPU)|0.066|0.072|0.91| |conv3d::Conv3D::(GFLOPS=0.002, K=[3 x 1 x 4], IN={1, 14, 5, 10, 10}, OCN=14, PM=SAME, OCV/CPU)|0.090|0.054|1.68| |conv3d::Conv3D::(GFLOPS=0.006, K=[5 x 5 x 5], IN={1, 4, 50, 19, 19}, OCN=4, S=[2 x 2 x 2], P=(1, 1) x (1, 1) x (1, 1), PM=VALID, OCV/CPU)|0.328|0.409|0.80| |conv3d::Conv3D::(GFLOPS=0.027, K=[3 x 3 x 3], IN={1, 6, 10, 38, 50}, OCN=6, PM=VALID, BIAS, OCV/CPU)|0.659|0.697|0.95| |conv3d::Conv3D::(GFLOPS=0.030, K=[5 x 5 x 5], IN={1, 6, 19, 19, 19}, OCN=6, G=2, OCV/CPU)|1.266|1.403|0.90| |conv3d::Conv3D::(GFLOPS=0.045, K=[7 x 7 x 7], IN={1, 2, 38, 38, 38}, OCN=2, S=[1 x 2 x 1], OCV/CPU)|3.550|4.145|0.86| |conv3d::Conv3D::(GFLOPS=0.053, K=[3 x 3 x 3], IN={1, 10, 98, 10, 10}, OCN=10, PM=SAME, OCV/CPU)|1.188|1.375|0.86| |conv3d::Conv3D::(GFLOPS=0.071, K=[7 x 7 x 7], IN={1, 6, 15, 19, 19}, OCN=6, S=[2 x 1 x 1], P=(3, 3) x (3, 3) x (3, 3), PM=SAME, BIAS, OCV/CPU)|2.683|3.236|0.83| |conv3d::Conv3D::(GFLOPS=0.093, K=[5 x 5 x 5], IN={1, 4, 40, 75, 75}, OCN=4, S=[2 x 2 x 2], OCV/CPU)|4.491|5.501|0.82| |conv3d::Conv3D::(GFLOPS=0.116, K=[5 x 5 x 5], IN={1, 2, 21, 75, 100}, OCN=2, BIAS, OCV/CPU)|8.916|10.181|0.88| |conv3d::Conv3D::(GFLOPS=1.267, K=[5 x 5 x 5], IN={1, 3, 75, 75, 100}, OCN=3, PM=SAME, BIAS, OCV/CPU)|69.995|72.296|0.97| |conv3d::Conv3D::(GFLOPS=1.343, K=[3 x 3 x 3], IN={1, 11, 9, 150, 200}, OCN=11, PM=VALID, BIAS, OCV/CPU)|22.531|23.139|0.97| |conv::Conv::(GFLOPS=0.177, K=[1 x 1], IN={1, 512, 26, 26}, OCN=256, OCV/CPU)|2.239|1.933|1.16| |conv::Conv::(GFLOPS=0.177, K=[1 x 1], IN={1, 512, 26, 26}, OCN=256, OCV/CPU_FP16)|-|1.010|-| |conv::Conv::(GFLOPS=0.177, K=[1 x 1], IN={1, 1024, 13, 13}, OCN=512, OCV/CPU)|3.134|2.068|1.52| |conv::Conv::(GFLOPS=0.177, K=[1 x 1], IN={1, 1024, 13, 13}, OCN=512, OCV/CPU_FP16)|-|1.062|-| |conv::Conv::(GFLOPS=0.178, K=[1 x 1], IN={1, 256, 52, 52}, OCN=128, OCV/CPU)|1.918|1.920|1.00| |conv::Conv::(GFLOPS=0.178, K=[1 x 1], IN={1, 256, 52, 52}, OCN=128, OCV/CPU_FP16)|-|1.014|-| |conv::Conv::(GFLOPS=0.210, K=[1 x 1], IN={1, 576, 38, 50}, OCN=96, PM=SAME, BIAS, OCV/CPU)|2.340|2.352|0.99| |conv::Conv::(GFLOPS=0.210, K=[1 x 1], IN={1, 576, 38, 50}, OCN=96, PM=SAME, BIAS, OCV/CPU_FP16)|-|1.247|-| |conv::Conv::(GFLOPS=0.231, K=[3 x 3], IN={1, 128, 56, 56}, OCN=32, P=[1 x 1], OCV/CPU)|1.116|1.111|1.00| |conv::Conv::(GFLOPS=0.231, K=[3 x 3], IN={1, 128, 56, 56}, OCN=32, P=[1 x 1], OCV/CPU_FP16)|-|1.114|-| |conv::Conv::(GFLOPS=0.231, K=[3 x 3], IN={1, 256, 14, 14}, OCN=256, P=[1 x 1], OCV/CPU)|1.116|1.112|1.00| |conv::Conv::(GFLOPS=0.231, K=[3 x 3], IN={1, 256, 14, 14}, OCN=256, P=[1 x 1], OCV/CPU_FP16)|-|1.113|-| |conv::Conv::(GFLOPS=0.280, K=[1 x 1], IN={1, 576, 38, 50}, OCN=128, PM=SAME, BIAS, OCV/CPU)|3.067|3.085|0.99| |conv::Conv::(GFLOPS=0.280, K=[1 x 1], IN={1, 576, 38, 50}, OCN=128, PM=SAME, BIAS, OCV/CPU_FP16)|-|1.622|-| |conv::Conv::(GFLOPS=0.302, K=[3 x 3], IN={1, 64, 64, 64}, OCN=64, PM=SAME, OCV/CPU)|1.153|1.187|0.97| |conv::Conv::(GFLOPS=0.302, K=[3 x 3], IN={1, 64, 64, 64}, OCN=64, PM=SAME, OCV/CPU_FP16)|-|1.150|-| |conv::Conv::(GFLOPS=0.357, K=[1 x 1], IN={1, 64, 208, 208}, OCN=64, OCV/CPU)|4.804|4.849|0.99| |conv::Conv::(GFLOPS=0.357, K=[1 x 1], IN={1, 64, 208, 208}, OCN=64, OCV/CPU_FP16)|-|2.922|-| |conv::Conv::(GFLOPS=0.420, K=[3 x 3], IN={1, 96, 38, 50}, OCN=128, PM=SAME, BIAS, OCV/CPU)|1.463|1.469|1.00| |conv::Conv::(GFLOPS=0.420, K=[3 x 3], IN={1, 96, 38, 50}, OCN=128, PM=SAME, BIAS, OCV/CPU_FP16)|-|1.459|-| |conv::Conv::(GFLOPS=0.472, K=[3 x 3], IN={1, 128, 40, 40}, OCN=128, PM=SAME, OCV/CPU)|1.577|1.580|1.00| |conv::Conv::(GFLOPS=0.472, K=[3 x 3], IN={1, 128, 40, 40}, OCN=128, PM=SAME, OCV/CPU_FP16)|-|1.580|-| |conv::Conv::(GFLOPS=0.472, K=[3 x 3], IN={1, 256, 20, 20}, OCN=256, PM=SAME, OCV/CPU)|1.826|1.818|1.00| |conv::Conv::(GFLOPS=0.472, K=[3 x 3], IN={1, 256, 20, 20}, OCN=256, PM=SAME, OCV/CPU_FP16)|-|1.817|-| |conv::Conv::(GFLOPS=0.472, K=[3 x 3], IN={1, 512, 10, 10}, OCN=512, PM=SAME, OCV/CPU)|6.541|5.081|1.29| |conv::Conv::(GFLOPS=0.472, K=[3 x 3], IN={1, 512, 10, 10}, OCN=512, PM=SAME, OCV/CPU_FP16)|-|2.809|-| |conv::Conv::(GFLOPS=0.561, K=[3 x 3], IN={1, 128, 38, 50}, OCN=128, PM=SAME, BIAS, OCV/CPU)|1.912|1.919|1.00| |conv::Conv::(GFLOPS=0.561, K=[3 x 3], IN={1, 128, 38, 50}, OCN=128, PM=SAME, BIAS, OCV/CPU_FP16)|-|1.919|-| |conv::Conv::(GFLOPS=0.624, K=[3 x 3], IN={1, 128, 46, 46}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|1.961|1.971|0.99| |conv::Conv::(GFLOPS=0.624, K=[3 x 3], IN={1, 128, 46, 46}, OCN=128, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|1.961|-| |conv::Conv::(GFLOPS=0.701, K=[3 x 3], IN={1, 128, 38, 50}, OCN=160, PM=SAME, BIAS, OCV/CPU)|2.317|2.329|0.99| |conv::Conv::(GFLOPS=0.701, K=[3 x 3], IN={1, 128, 38, 50}, OCN=160, PM=SAME, BIAS, OCV/CPU_FP16)|-|2.322|-| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 64, 104, 104}, OCN=64, P=[1 x 1], OCV/CPU)|2.920|2.947|0.99| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 64, 104, 104}, OCN=64, P=[1 x 1], OCV/CPU_FP16)|-|2.924|-| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 128, 52, 52}, OCN=128, P=[1 x 1], OCV/CPU)|2.467|2.466|1.00| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 128, 52, 52}, OCN=128, P=[1 x 1], OCV/CPU_FP16)|-|2.496|-| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 256, 26, 26}, OCN=256, P=[1 x 1], OCV/CPU)|3.028|2.997|1.01| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 256, 26, 26}, OCN=256, P=[1 x 1], OCV/CPU_FP16)|-|2.986|-| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 512, 13, 13}, OCN=512, P=[1 x 1], OCV/CPU)|4.353|4.355|1.00| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 512, 13, 13}, OCN=512, P=[1 x 1], OCV/CPU_FP16)|-|4.355|-| |conv::Conv::(GFLOPS=0.830, K=[3 x 3], IN={1, 64, 75, 100}, OCN=96, PM=SAME, BIAS, OCV/CPU)|2.762|2.793|0.99| |conv::Conv::(GFLOPS=0.830, K=[3 x 3], IN={1, 64, 75, 100}, OCN=96, PM=SAME, BIAS, OCV/CPU_FP16)|-|2.797|-| |conv::Conv::(GFLOPS=0.958, K=[3 x 3], IN={1, 192, 38, 38}, OCN=192, PM=SAME, OCV/CPU)|3.428|3.226|1.06| |conv::Conv::(GFLOPS=0.958, K=[3 x 3], IN={1, 192, 38, 38}, OCN=192, PM=SAME, OCV/CPU_FP16)|-|3.223|-| |conv::Conv::(GFLOPS=0.958, K=[3 x 3], IN={1, 384, 19, 19}, OCN=384, PM=SAME, OCV/CPU)|3.967|3.957|1.00| |conv::Conv::(GFLOPS=0.958, K=[3 x 3], IN={1, 384, 19, 19}, OCN=384, PM=SAME, OCV/CPU_FP16)|-|3.960|-| |conv::Conv::(GFLOPS=1.022, K=[3 x 3], IN={1, 576, 19, 19}, OCN=273, PM=SAME, BIAS, OCV/CPU)|4.806|4.387|1.10| |conv::Conv::(GFLOPS=1.022, K=[3 x 3], IN={1, 576, 19, 19}, OCN=273, PM=SAME, BIAS, OCV/CPU_FP16)|-|4.366|-| |conv::Conv::(GFLOPS=1.112, K=[3 x 3], IN={1, 512, 10, 10}, OCN=1206, P=[1 x 1], BIAS, OCV/CPU)|14.509|11.756|1.23| |conv::Conv::(GFLOPS=1.112, K=[3 x 3], IN={1, 512, 10, 10}, OCN=1206, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|6.510|-| |conv::Conv::(GFLOPS=1.181, K=[3 x 3], IN={1, 64, 160, 200}, OCN=128, S=[2 x 2], P=[1 x 1], BIAS, OCV/CPU)|13.718|13.287|1.03| |conv::Conv::(GFLOPS=1.181, K=[3 x 3], IN={1, 64, 160, 200}, OCN=128, S=[2 x 2], P=[1 x 1], BIAS, OCV/CPU_FP16)|-|7.190|-| |conv::Conv::(GFLOPS=1.182, K=[3 x 3], IN={1, 32, 320, 400}, OCN=64, S=[2 x 2], P=[1 x 1], BIAS, OCV/CPU)|15.133|14.853|1.02| |conv::Conv::(GFLOPS=1.182, K=[3 x 3], IN={1, 32, 320, 400}, OCN=64, S=[2 x 2], P=[1 x 1], BIAS, OCV/CPU_FP16)|-|8.671|-| |conv::Conv::(GFLOPS=1.195, K=[9 x 9], IN={1, 32, 240, 320}, OCN=3, P=[4 x 4], BIAS, OCV/CPU)|41.928|43.328|0.97| |conv::Conv::(GFLOPS=1.195, K=[9 x 9], IN={1, 32, 240, 320}, OCN=3, P=[4 x 4], BIAS, OCV/CPU_FP16)|-|38.072|-| |conv::Conv::(GFLOPS=1.196, K=[3 x 3], IN={1, 384, 26, 26}, OCN=256, P=[1 x 1], OCV/CPU)|4.409|4.428|1.00| |conv::Conv::(GFLOPS=1.196, K=[3 x 3], IN={1, 384, 26, 26}, OCN=256, P=[1 x 1], OCV/CPU_FP16)|-|4.427|-| |conv::Conv::(GFLOPS=1.210, K=[3 x 3], IN={1, 32, 256, 256}, OCN=32, PM=SAME, OCV/CPU)|6.144|5.363|1.15| |conv::Conv::(GFLOPS=1.210, K=[3 x 3], IN={1, 32, 256, 256}, OCN=32, PM=SAME, OCV/CPU_FP16)|-|5.368|-| |conv::Conv::(GFLOPS=1.245, K=[3 x 3], IN={1, 64, 75, 75}, OCN=192, PM=SAME, BIAS, OCV/CPU)|3.926|3.932|1.00| |conv::Conv::(GFLOPS=1.245, K=[3 x 3], IN={1, 64, 75, 75}, OCN=192, PM=SAME, BIAS, OCV/CPU_FP16)|-|3.938|-| |conv::Conv::(GFLOPS=1.245, K=[3 x 3], IN={1, 96, 75, 100}, OCN=96, PM=SAME, BIAS, OCV/CPU)|3.920|3.915|1.00| |conv::Conv::(GFLOPS=1.245, K=[3 x 3], IN={1, 96, 75, 100}, OCN=96, PM=SAME, BIAS, OCV/CPU_FP16)|-|3.950|-| |conv::Conv::(GFLOPS=1.248, K=[3 x 3], IN={1, 256, 46, 46}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|3.767|3.764|1.00| |conv::Conv::(GFLOPS=1.248, K=[3 x 3], IN={1, 256, 46, 46}, OCN=128, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|3.762|-| |conv::Conv::(GFLOPS=1.258, K=[3 x 3], IN={1, 1280, 10, 10}, OCN=546, PM=SAME, BIAS, OCV/CPU)|19.959|13.875|1.44| |conv::Conv::(GFLOPS=1.258, K=[3 x 3], IN={1, 1280, 10, 10}, OCN=546, PM=SAME, BIAS, OCV/CPU_FP16)|-|7.781|-| |conv::Conv::(GFLOPS=1.261, K=[3 x 3], IN={1, 192, 38, 50}, OCN=192, PM=SAME, BIAS, OCV/CPU)|3.951|3.955|1.00| |conv::Conv::(GFLOPS=1.261, K=[3 x 3], IN={1, 192, 38, 50}, OCN=192, PM=SAME, BIAS, OCV/CPU_FP16)|-|3.969|-| |conv::Conv::(GFLOPS=1.416, K=[3 x 3], IN={1, 128, 62, 82}, OCN=128, BIAS, OCV/CPU)|4.050|4.034|1.00| |conv::Conv::(GFLOPS=1.416, K=[3 x 3], IN={1, 128, 62, 82}, OCN=128, BIAS, OCV/CPU_FP16)|-|4.093|-| |conv::Conv::(GFLOPS=1.500, K=[3 x 3], IN={1, 128, 64, 84}, OCN=128, BIAS, OCV/CPU)|4.923|4.506|1.09| |conv::Conv::(GFLOPS=1.500, K=[3 x 3], IN={1, 128, 64, 84}, OCN=128, BIAS, OCV/CPU_FP16)|-|4.509|-| |conv::Conv::(GFLOPS=1.586, K=[3 x 3], IN={1, 128, 66, 86}, OCN=128, BIAS, OCV/CPU)|4.759|4.476|1.06| |conv::Conv::(GFLOPS=1.586, K=[3 x 3], IN={1, 128, 66, 86}, OCN=128, BIAS, OCV/CPU_FP16)|-|4.447|-| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 256, 26, 26}, OCN=512, P=[1 x 1], OCV/CPU)|6.079|5.628|1.08| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 256, 26, 26}, OCN=512, P=[1 x 1], OCV/CPU_FP16)|-|5.625|-| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 256, 52, 52}, OCN=512, S=[2 x 2], P=[1 x 1], OCV/CPU)|19.843|17.523|1.13| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 256, 52, 52}, OCN=512, S=[2 x 2], P=[1 x 1], OCV/CPU_FP16)|-|8.917|-| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 512, 13, 13}, OCN=1024, P=[1 x 1], OCV/CPU)|8.334|8.247|1.01| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 512, 13, 13}, OCN=1024, P=[1 x 1], OCV/CPU_FP16)|-|8.246|-| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 512, 26, 26}, OCN=1024, S=[2 x 2], P=[1 x 1], OCV/CPU)|23.164|18.199|1.27| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 512, 26, 26}, OCN=1024, S=[2 x 2], P=[1 x 1], OCV/CPU_FP16)|-|9.305|-| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 64, 104, 104}, OCN=128, P=[1 x 1], OCV/CPU)|5.184|5.178|1.00| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 64, 104, 104}, OCN=128, P=[1 x 1], OCV/CPU_FP16)|-|5.149|-| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 64, 208, 208}, OCN=128, S=[2 x 2], P=[1 x 1], OCV/CPU)|17.990|18.103|0.99| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 64, 208, 208}, OCN=128, S=[2 x 2], P=[1 x 1], OCV/CPU_FP16)|-|9.777|-| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 128, 52, 52}, OCN=256, P=[1 x 1], OCV/CPU)|4.831|4.522|1.07| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 128, 52, 52}, OCN=256, P=[1 x 1], OCV/CPU_FP16)|-|4.523|-| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 128, 104, 104}, OCN=256, S=[2 x 2], P=[1 x 1], OCV/CPU)|17.328|17.319|1.00| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 128, 104, 104}, OCN=256, S=[2 x 2], P=[1 x 1], OCV/CPU_FP16)|-|8.948|-| |conv::Conv::(GFLOPS=1.598, K=[3 x 3], IN={1, 32, 208, 208}, OCN=64, P=[1 x 1], OCV/CPU)|5.944|5.961|1.00| |conv::Conv::(GFLOPS=1.598, K=[3 x 3], IN={1, 32, 208, 208}, OCN=64, P=[1 x 1], OCV/CPU_FP16)|-|5.936|-| |conv::Conv::(GFLOPS=1.598, K=[3 x 3], IN={1, 32, 416, 416}, OCN=64, S=[2 x 2], P=[1 x 1], OCV/CPU)|19.811|20.064|0.99| |conv::Conv::(GFLOPS=1.598, K=[3 x 3], IN={1, 32, 416, 416}, OCN=64, S=[2 x 2], P=[1 x 1], OCV/CPU_FP16)|-|11.705|-| |conv::Conv::(GFLOPS=1.659, K=[3 x 3], IN={1, 960, 10, 10}, OCN=960, PM=SAME, OCV/CPU)|22.398|17.686|1.27| |conv::Conv::(GFLOPS=1.659, K=[3 x 3], IN={1, 960, 10, 10}, OCN=960, PM=SAME, OCV/CPU_FP16)|-|9.859|-| |conv::Conv::(GFLOPS=1.660, K=[3 x 3], IN={1, 128, 75, 75}, OCN=128, G=128, P=[1 x 1], BIAS, OCV/CPU)|0.416|0.416|1.00| |conv::Conv::(GFLOPS=1.660, K=[3 x 3], IN={1, 128, 75, 75}, OCN=128, G=128, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|0.417|-| |conv::Conv::(GFLOPS=1.660, K=[3 x 3], IN={1, 128, 75, 75}, OCN=128, PM=SAME, OCV/CPU)|5.356|5.110|1.05| |conv::Conv::(GFLOPS=1.660, K=[3 x 3], IN={1, 128, 75, 75}, OCN=128, PM=SAME, OCV/CPU_FP16)|-|5.114|-| |conv::Conv::(GFLOPS=1.675, K=[3 x 3], IN={1, 128, 68, 88}, OCN=128, BIAS, OCV/CPU)|5.092|4.748|1.07| |conv::Conv::(GFLOPS=1.675, K=[3 x 3], IN={1, 128, 68, 88}, OCN=128, BIAS, OCV/CPU_FP16)|-|4.754|-| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 256, 38, 38}, OCN=256, G=256, P=[1 x 1], BIAS, OCV/CPU)|0.260|0.229|1.13| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 256, 38, 38}, OCN=256, G=256, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|0.229|-| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 256, 38, 38}, OCN=256, PM=SAME, OCV/CPU)|5.872|5.460|1.08| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 256, 38, 38}, OCN=256, PM=SAME, OCV/CPU_FP16)|-|5.460|-| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 512, 19, 19}, OCN=512, G=512, P=[1 x 1], BIAS, OCV/CPU)|0.161|0.161|1.00| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 512, 19, 19}, OCN=512, G=512, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|0.161|-| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 512, 19, 19}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|7.176|7.175|1.00| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 512, 19, 19}, OCN=512, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|7.162|-| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 512, 19, 19}, OCN=512, PM=SAME, OCV/CPU)|7.174|7.185|1.00| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 512, 19, 19}, OCN=512, PM=SAME, OCV/CPU_FP16)|-|7.157|-| |conv::Conv::(GFLOPS=1.766, K=[3 x 3], IN={1, 128, 70, 90}, OCN=128, BIAS, OCV/CPU)|5.400|5.180|1.04| |conv::Conv::(GFLOPS=1.766, K=[3 x 3], IN={1, 128, 70, 90}, OCN=128, BIAS, OCV/CPU_FP16)|-|5.201|-| |conv::Conv::(GFLOPS=1.859, K=[3 x 3], IN={1, 128, 72, 92}, OCN=128, BIAS, OCV/CPU)|5.330|5.188|1.03| |conv::Conv::(GFLOPS=1.859, K=[3 x 3], IN={1, 128, 72, 92}, OCN=128, BIAS, OCV/CPU_FP16)|-|5.177|-| |conv::Conv::(GFLOPS=1.888, K=[3 x 3], IN={1, 1024, 10, 10}, OCN=1024, G=1024, P=[1 x 1], BIAS, OCV/CPU)|0.115|0.115|1.00| |conv::Conv::(GFLOPS=1.888, K=[3 x 3], IN={1, 1024, 10, 10}, OCN=1024, G=1024, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|0.115|-| |conv::Conv::(GFLOPS=1.888, K=[3 x 3], IN={1, 1024, 10, 10}, OCN=1024, PM=SAME, OCV/CPU)|26.156|20.222|1.29| |conv::Conv::(GFLOPS=1.888, K=[3 x 3], IN={1, 1024, 10, 10}, OCN=1024, PM=SAME, OCV/CPU_FP16)|-|11.203|-| |conv::Conv::(GFLOPS=1.954, K=[3 x 3], IN={1, 128, 74, 94}, OCN=128, BIAS, OCV/CPU)|5.627|5.543|1.02| |conv::Conv::(GFLOPS=1.954, K=[3 x 3], IN={1, 128, 74, 94}, OCN=128, BIAS, OCV/CPU_FP16)|-|5.506|-| |conv::Conv::(GFLOPS=1.995, K=[9 x 9], IN={1, 3, 320, 400}, OCN=32, P=[4 x 4], BIAS, OCV/CPU)|27.925|27.741|1.01| |conv::Conv::(GFLOPS=1.995, K=[9 x 9], IN={1, 3, 320, 400}, OCN=32, P=[4 x 4], BIAS, OCV/CPU_FP16)|-|17.217|-| |conv::Conv::(GFLOPS=2.052, K=[3 x 3], IN={1, 128, 76, 96}, OCN=128, BIAS, OCV/CPU)|6.359|6.062|1.05| |conv::Conv::(GFLOPS=2.052, K=[3 x 3], IN={1, 128, 76, 96}, OCN=128, BIAS, OCV/CPU_FP16)|-|6.048|-| |conv::Conv::(GFLOPS=2.100, K=[3 x 3], IN={1, 144, 75, 75}, OCN=144, PM=SAME, OCV/CPU)|6.559|6.322|1.04| |conv::Conv::(GFLOPS=2.100, K=[3 x 3], IN={1, 144, 75, 75}, OCN=144, PM=SAME, OCV/CPU_FP16)|-|6.280|-| |conv::Conv::(GFLOPS=2.153, K=[3 x 3], IN={1, 128, 78, 98}, OCN=128, BIAS, OCV/CPU)|6.412|6.200|1.03| |conv::Conv::(GFLOPS=2.153, K=[3 x 3], IN={1, 128, 78, 98}, OCN=128, BIAS, OCV/CPU_FP16)|-|6.197|-| |conv::Conv::(GFLOPS=2.156, K=[3 x 3], IN={1, 576, 19, 19}, OCN=576, PM=SAME, OCV/CPU)|9.167|8.624|1.06| |conv::Conv::(GFLOPS=2.156, K=[3 x 3], IN={1, 576, 19, 19}, OCN=576, PM=SAME, OCV/CPU_FP16)|-|8.626|-| |conv::Conv::(GFLOPS=2.255, K=[3 x 3], IN={1, 128, 80, 100}, OCN=128, BIAS, OCV/CPU)|6.755|6.491|1.04| |conv::Conv::(GFLOPS=2.255, K=[3 x 3], IN={1, 128, 80, 100}, OCN=128, BIAS, OCV/CPU_FP16)|-|6.520|-| |conv::Conv::(GFLOPS=2.719, K=[3 x 3], IN={1, 96, 256, 256}, OCN=96, S=[2 x 2], PM=SAME, OCV/CPU)|35.664|34.752|1.03| |conv::Conv::(GFLOPS=2.719, K=[3 x 3], IN={1, 96, 256, 256}, OCN=96, S=[2 x 2], PM=SAME, OCV/CPU_FP16)|-|20.260|-| |conv::Conv::(GFLOPS=3.319, K=[3 x 3], IN={1, 128, 75, 75}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|9.514|9.414|1.01| |conv::Conv::(GFLOPS=3.319, K=[3 x 3], IN={1, 128, 75, 75}, OCN=256, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|9.462|-| |conv::Conv::(GFLOPS=3.321, K=[3 x 3], IN={1, 64, 150, 150}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|10.631|9.963|1.07| |conv::Conv::(GFLOPS=3.321, K=[3 x 3], IN={1, 64, 150, 150}, OCN=128, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|9.935|-| |conv::Conv::(GFLOPS=3.398, K=[7 x 7], IN={1, 128, 46, 46}, OCN=128, P=[3 x 3], BIAS, OCV/CPU)|37.465|36.798|1.02| |conv::Conv::(GFLOPS=3.398, K=[7 x 7], IN={1, 128, 46, 46}, OCN=128, P=[3 x 3], BIAS, OCV/CPU_FP16)|-|19.569|-| |conv::Conv::(GFLOPS=3.407, K=[3 x 3], IN={1, 512, 19, 19}, OCN=1024, D=[6 x 6], P=[6 x 6], BIAS, OCV/CPU)|38.157|36.157|1.06| |conv::Conv::(GFLOPS=3.407, K=[3 x 3], IN={1, 512, 19, 19}, OCN=1024, D=[6 x 6], P=[6 x 6], BIAS, OCV/CPU_FP16)|-|18.902|-| |conv::Conv::(GFLOPS=3.408, K=[3 x 3], IN={1, 256, 38, 38}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|10.356|10.401|1.00| |conv::Conv::(GFLOPS=3.408, K=[3 x 3], IN={1, 256, 38, 38}, OCN=512, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|10.360|-| |conv::Conv::(GFLOPS=4.247, K=[3 x 3], IN={1, 480, 32, 32}, OCN=480, PM=SAME, OCV/CPU)|12.641|12.150|1.04| |conv::Conv::(GFLOPS=4.247, K=[3 x 3], IN={1, 480, 32, 32}, OCN=480, PM=SAME, OCV/CPU_FP16)|-|12.162|-| |conv::Conv::(GFLOPS=4.247, K=[5 x 5], IN={1, 144, 128, 128}, OCN=144, S=[2 x 2], PM=SAME, OCV/CPU)|50.545|50.505|1.00| |conv::Conv::(GFLOPS=4.247, K=[5 x 5], IN={1, 144, 128, 128}, OCN=144, S=[2 x 2], PM=SAME, OCV/CPU_FP16)|-|27.950|-| |conv::Conv::(GFLOPS=4.566, K=[7 x 7], IN={1, 172, 46, 46}, OCN=128, P=[3 x 3], BIAS, OCV/CPU)|54.233|49.603|1.09| |conv::Conv::(GFLOPS=4.566, K=[7 x 7], IN={1, 172, 46, 46}, OCN=128, P=[3 x 3], BIAS, OCV/CPU_FP16)|-|26.515|-| |conv::Conv::(GFLOPS=4.993, K=[3 x 3], IN={1, 256, 46, 46}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|13.779|12.968|1.06| |conv::Conv::(GFLOPS=4.993, K=[3 x 3], IN={1, 256, 46, 46}, OCN=512, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|12.984|-| |conv::Conv::(GFLOPS=4.993, K=[3 x 3], IN={1, 512, 46, 46}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|15.809|15.329|1.03| |conv::Conv::(GFLOPS=4.993, K=[3 x 3], IN={1, 512, 46, 46}, OCN=256, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|15.433|-| |conv::Conv::(GFLOPS=4.994, K=[3 x 3], IN={1, 128, 92, 92}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|14.563|14.527|1.00| |conv::Conv::(GFLOPS=4.994, K=[3 x 3], IN={1, 128, 92, 92}, OCN=256, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|14.480|-| |conv::Conv::(GFLOPS=4.997, K=[3 x 3], IN={1, 64, 184, 184}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|16.714|16.484|1.01| |conv::Conv::(GFLOPS=4.997, K=[3 x 3], IN={1, 64, 184, 184}, OCN=128, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|16.362|-| |conv::Conv::(GFLOPS=5.780, K=[5 x 5], IN={1, 672, 32, 32}, OCN=672, S=[2 x 2], PM=SAME, OCV/CPU)|77.832|65.729|1.18| |conv::Conv::(GFLOPS=5.780, K=[5 x 5], IN={1, 672, 32, 32}, OCN=672, S=[2 x 2], PM=SAME, OCV/CPU_FP16)|-|32.065|-| |conv::Conv::(GFLOPS=6.116, K=[3 x 3], IN={1, 1152, 16, 16}, OCN=1152, PM=SAME, OCV/CPU)|21.903|20.386|1.07| |conv::Conv::(GFLOPS=6.116, K=[3 x 3], IN={1, 1152, 16, 16}, OCN=1152, PM=SAME, OCV/CPU_FP16)|-|20.416|-| |conv::Conv::(GFLOPS=6.118, K=[3 x 3], IN={1, 144, 128, 128}, OCN=144, PM=SAME, OCV/CPU)|20.405|18.148|1.12| |conv::Conv::(GFLOPS=6.118, K=[3 x 3], IN={1, 144, 128, 128}, OCN=144, PM=SAME, OCV/CPU_FP16)|-|18.128|-| |conv::Conv::(GFLOPS=6.637, K=[3 x 3], IN={1, 256, 75, 75}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|20.334|18.521|1.10| |conv::Conv::(GFLOPS=6.637, K=[3 x 3], IN={1, 256, 75, 75}, OCN=256, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|18.495|-| |conv::Conv::(GFLOPS=6.638, K=[3 x 3], IN={1, 128, 150, 150}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|21.527|19.584|1.10| |conv::Conv::(GFLOPS=6.638, K=[3 x 3], IN={1, 128, 150, 150}, OCN=128, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|19.630|-| |conv::Conv::(GFLOPS=6.641, K=[3 x 3], IN={1, 64, 150, 200}, OCN=192, PM=SAME, BIAS, OCV/CPU)|22.715|20.057|1.13| |conv::Conv::(GFLOPS=6.641, K=[3 x 3], IN={1, 64, 150, 200}, OCN=192, PM=SAME, BIAS, OCV/CPU_FP16)|-|20.068|-| |conv::Conv::(GFLOPS=6.641, K=[3 x 3], IN={1, 64, 300, 300}, OCN=64, P=[1 x 1], BIAS, OCV/CPU)|26.228|24.992|1.05| |conv::Conv::(GFLOPS=6.641, K=[3 x 3], IN={1, 64, 300, 300}, OCN=64, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|24.957|-| |conv::Conv::(GFLOPS=6.814, K=[3 x 3], IN={1, 512, 38, 38}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|21.524|21.581|1.00| |conv::Conv::(GFLOPS=6.814, K=[3 x 3], IN={1, 512, 38, 38}, OCN=512, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|21.782|-| |conv::Conv::(GFLOPS=8.025, K=[3 x 3], IN={1, 1024, 19, 19}, OCN=1206, P=[1 x 1], BIAS, OCV/CPU)|34.094|31.964|1.07| |conv::Conv::(GFLOPS=8.025, K=[3 x 3], IN={1, 1024, 19, 19}, OCN=1206, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|31.925|-| |conv::Conv::(GFLOPS=9.986, K=[3 x 3], IN={1, 512, 46, 46}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|28.677|27.813|1.03| |conv::Conv::(GFLOPS=9.986, K=[3 x 3], IN={1, 512, 46, 46}, OCN=512, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|27.808|-| |conv::Conv::(GFLOPS=9.987, K=[3 x 3], IN={1, 256, 92, 92}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|31.274|27.892|1.12| |conv::Conv::(GFLOPS=9.987, K=[3 x 3], IN={1, 256, 92, 92}, OCN=256, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|27.910|-| |conv::Conv::(GFLOPS=9.989, K=[3 x 3], IN={1, 128, 184, 184}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|30.533|30.007|1.02| |conv::Conv::(GFLOPS=9.989, K=[3 x 3], IN={1, 128, 184, 184}, OCN=128, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|30.089|-| |conv::Conv::(GFLOPS=9.993, K=[3 x 3], IN={1, 64, 368, 368}, OCN=64, P=[1 x 1], BIAS, OCV/CPU)|39.837|38.312|1.04| |conv::Conv::(GFLOPS=9.993, K=[3 x 3], IN={1, 64, 368, 368}, OCN=64, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|38.477|-| |conv::Conv::(GFLOPS=10.087, K=[3 x 3], IN={1, 576, 38, 50}, OCN=512, PM=SAME, BIAS, OCV/CPU)|32.480|29.237|1.11| |conv::Conv::(GFLOPS=10.087, K=[3 x 3], IN={1, 576, 38, 50}, OCN=512, PM=SAME, BIAS, OCV/CPU_FP16)|-|29.452|-| |conv::Conv::(GFLOPS=10.701, K=[3 x 3], IN={1, 512, 38, 38}, OCN=804, P=[1 x 1], BIAS, OCV/CPU)|33.544|32.832|1.02| |conv::Conv::(GFLOPS=10.701, K=[3 x 3], IN={1, 512, 38, 38}, OCN=804, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|32.784|-| |conv::Conv::(GFLOPS=11.797, K=[5 x 5], IN={1, 240, 64, 64}, OCN=240, PM=SAME, OCV/CPU)|134.481|130.678|1.03| |conv::Conv::(GFLOPS=11.797, K=[5 x 5], IN={1, 240, 64, 64}, OCN=240, PM=SAME, OCV/CPU_FP16)|-|70.134|-| |conv::Conv::(GFLOPS=11.797, K=[5 x 5], IN={1, 480, 32, 32}, OCN=480, PM=SAME, OCV/CPU)|127.930|126.530|1.01| |conv::Conv::(GFLOPS=11.797, K=[5 x 5], IN={1, 480, 32, 32}, OCN=480, PM=SAME, OCV/CPU_FP16)|-|65.261|-| |conv::Conv::(GFLOPS=16.987, K=[5 x 5], IN={1, 1152, 16, 16}, OCN=1152, PM=SAME, OCV/CPU)|201.346|187.007|1.08| |conv::Conv::(GFLOPS=16.987, K=[5 x 5], IN={1, 1152, 16, 16}, OCN=1152, PM=SAME, OCV/CPU_FP16)|-|91.525|-| |conv::Conv::(GFLOPS=23.122, K=[5 x 5], IN={1, 672, 32, 32}, OCN=672, PM=SAME, OCV/CPU)|252.038|245.587|1.03| |conv::Conv::(GFLOPS=23.122, K=[5 x 5], IN={1, 672, 32, 32}, OCN=672, PM=SAME, OCV/CPU_FP16)|-|125.477|-| ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
2717 lines
92 KiB
C++
2717 lines
92 KiB
C++
/*M///////////////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
|
//
|
|
// By downloading, copying, installing or using the software you agree to this license.
|
|
// If you do not agree to this license, do not download, install,
|
|
// copy or use the software.
|
|
//
|
|
//
|
|
// License Agreement
|
|
// For Open Source Computer Vision Library
|
|
//
|
|
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
|
// Third party copyrights are property of their respective owners.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without modification,
|
|
// are permitted provided that the following conditions are met:
|
|
//
|
|
// * Redistribution's of source code must retain the above copyright notice,
|
|
// this list of conditions and the following disclaimer.
|
|
//
|
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
|
// this list of conditions and the following disclaimer in the documentation
|
|
// and/or other materials provided with the distribution.
|
|
//
|
|
// * The name of the copyright holders may not be used to endorse or promote products
|
|
// derived from this software without specific prior written permission.
|
|
//
|
|
// This software is provided by the copyright holders and contributors "as is" and
|
|
// any express or implied warranties, including, but not limited to, the implied
|
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
|
// indirect, incidental, special, exemplary, or consequential damages
|
|
// (including, but not limited to, procurement of substitute goods or services;
|
|
// loss of use, data, or profits; or business interruption) however caused
|
|
// and on any theory of liability, whether in contract, strict liability,
|
|
// or tort (including negligence or otherwise) arising in any way out of
|
|
// the use of this software, even if advised of the possibility of such damage.
|
|
//
|
|
//M*/
|
|
|
|
#include "test_precomp.hpp"
|
|
#include <opencv2/core/ocl.hpp>
|
|
#include "npy_blob.hpp"
|
|
#include <opencv2/dnn/shape_utils.hpp>
|
|
#include <opencv2/dnn/all_layers.hpp>
|
|
#include <opencv2/dnn/layer.details.hpp> // CV_DNN_REGISTER_LAYER_CLASS
|
|
|
|
#ifdef HAVE_INF_ENGINE
|
|
#include <thread>
|
|
#endif
|
|
|
|
namespace opencv_test { namespace {
|
|
|
|
template<typename TString>
|
|
static String _tf(TString filename)
|
|
{
|
|
String basetestdir = getOpenCVExtraDir();
|
|
size_t len = basetestdir.size();
|
|
if(len > 0 && basetestdir[len-1] != '/' && basetestdir[len-1] != '\\')
|
|
return (basetestdir + "/dnn/layers") + filename;
|
|
return (basetestdir + "dnn/layers/") + filename;
|
|
}
|
|
|
|
void runLayer(Ptr<Layer> layer, std::vector<Mat> &inpBlobs, std::vector<Mat> &outBlobs)
|
|
{
|
|
size_t ninputs = inpBlobs.size();
|
|
std::vector<Mat> inp(ninputs), outp, intp;
|
|
std::vector<MatShape> inputs, outputs, internals;
|
|
|
|
for (size_t i = 0; i < ninputs; i++)
|
|
{
|
|
inp[i] = inpBlobs[i].clone();
|
|
inputs.push_back(shape(inp[i]));
|
|
}
|
|
|
|
layer->getMemoryShapes(inputs, 0, outputs, internals);
|
|
for (size_t i = 0; i < outputs.size(); i++)
|
|
{
|
|
outp.push_back(Mat(outputs[i], CV_32F));
|
|
}
|
|
for (size_t i = 0; i < internals.size(); i++)
|
|
{
|
|
intp.push_back(Mat(internals[i], CV_32F));
|
|
}
|
|
|
|
layer->finalize(inp, outp);
|
|
layer->forward(inp, outp, intp);
|
|
|
|
size_t noutputs = outp.size();
|
|
outBlobs.resize(noutputs);
|
|
for (size_t i = 0; i < noutputs; i++)
|
|
outBlobs[i] = outp[i];
|
|
}
|
|
|
|
class Test_Caffe_layers : public DNNTestLayer
|
|
{
|
|
public:
|
|
void testLayerUsingCaffeModels(const String& basename, bool useCaffeModel = false,
|
|
bool useCommonInputBlob = true, double l1 = 0.0, double lInf = 0.0,
|
|
int numInps = 1, int numOuts = 1)
|
|
{
|
|
CV_Assert_N(numInps >= 1, numInps <= 10, numOuts >= 1, numOuts <= 10);
|
|
String prototxt = _tf(basename + ".prototxt");
|
|
String caffemodel = _tf(basename + ".caffemodel");
|
|
|
|
std::vector<Mat> inps, refs, outs;
|
|
|
|
if (numInps > 1)
|
|
{
|
|
for (int i = 0; i < numInps; i++)
|
|
{
|
|
String inpfile = _tf(basename + cv::format(".input_%d.npy", i));
|
|
inps.push_back(blobFromNPY(inpfile));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
String inpfile = (useCommonInputBlob) ? _tf("blob.npy") : _tf(basename + ".input.npy");
|
|
inps.push_back(blobFromNPY(inpfile));
|
|
}
|
|
|
|
if (numOuts > 1)
|
|
{
|
|
for (int i = 0; i < numOuts; i++)
|
|
{
|
|
String outfile = _tf(basename + cv::format("_%d.npy", i));
|
|
refs.push_back(blobFromNPY(outfile));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
String outfile = _tf(basename + ".npy");
|
|
refs.push_back(blobFromNPY(outfile));
|
|
}
|
|
|
|
Net net = readNetFromCaffe(prototxt, (useCaffeModel) ? caffemodel : String());
|
|
ASSERT_FALSE(net.empty());
|
|
checkBackend(&inps[0], &refs[0]);
|
|
|
|
net.setPreferableBackend(backend);
|
|
net.setPreferableTarget(target);
|
|
|
|
String inp_name = "input";
|
|
if (numInps > 1)
|
|
{
|
|
for (int i = 0; i < numInps; i++)
|
|
{
|
|
net.setInput(inps[i], inp_name + cv::format("_%d", i));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
net.setInput(inps.back(), inp_name);
|
|
}
|
|
|
|
net.forward(outs);
|
|
for (int i = 0; i < refs.size(); i++)
|
|
{
|
|
normAssert(refs[i], outs[i], "", l1 ? l1 : default_l1, lInf ? lInf : default_lInf);
|
|
}
|
|
}
|
|
};
|
|
|
|
TEST_P(Test_Caffe_layers, Softmax)
|
|
{
|
|
testLayerUsingCaffeModels("layer_softmax");
|
|
}
|
|
|
|
TEST_P(Test_Caffe_layers, LRN)
|
|
{
|
|
double l1 = 0.0, lInf = 0.0;
|
|
// The OpenCL kernels use the native_ math functions which have
|
|
// implementation defined accuracy, so we use relaxed thresholds. See
|
|
// https://github.com/opencv/opencv/issues/9821 for more details.
|
|
if (target == DNN_TARGET_OPENCL)
|
|
{
|
|
l1 = 0.01;
|
|
lInf = 0.01;
|
|
}
|
|
testLayerUsingCaffeModels("layer_lrn_spatial", false, true, l1, lInf);
|
|
testLayerUsingCaffeModels("layer_lrn_channels", false, true, l1, lInf);
|
|
}
|
|
|
|
TEST_P(Test_Caffe_layers, Convolution)
|
|
{
|
|
testLayerUsingCaffeModels("layer_convolution", true);
|
|
}
|
|
|
|
TEST_P(Test_Caffe_layers, DeConvolution)
|
|
{
|
|
if(target == DNN_TARGET_CUDA_FP16)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA_FP16);
|
|
testLayerUsingCaffeModels("layer_deconvolution", true, false);
|
|
}
|
|
|
|
TEST_P(Test_Caffe_layers, InnerProduct)
|
|
{
|
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000)
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
|
#endif
|
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000)
|
|
// IE exception: Ngraph operation Reshape with name Reshape_4219609 has dynamic output shape on 0 port, but CPU plug-in supports only static shape
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
|
|
applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16,
|
|
CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION
|
|
);
|
|
#endif
|
|
|
|
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
|
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_CPU_FP16)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_CPU_FP16);
|
|
|
|
testLayerUsingCaffeModels("layer_inner_product", true);
|
|
}
|
|
|
|
TEST_P(Test_Caffe_layers, Pooling_max)
|
|
{
|
|
testLayerUsingCaffeModels("layer_pooling_max");
|
|
}
|
|
|
|
TEST_P(Test_Caffe_layers, Pooling_ave)
|
|
{
|
|
testLayerUsingCaffeModels("layer_pooling_ave");
|
|
}
|
|
|
|
TEST_P(Test_Caffe_layers, MVN)
|
|
{
|
|
if(backend == DNN_BACKEND_CUDA)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); /* MVN is unsupported */
|
|
|
|
testLayerUsingCaffeModels("layer_mvn");
|
|
}
|
|
|
|
void testReshape(const MatShape& inputShape, const MatShape& targetShape,
|
|
int axis = 0, int num_axes = -1,
|
|
MatShape mask = MatShape())
|
|
{
|
|
LayerParams params;
|
|
params.set("axis", axis);
|
|
params.set("num_axes", num_axes);
|
|
if (!mask.empty())
|
|
{
|
|
params.set("dim", DictValue::arrayInt<int*>(&mask[0], mask.size()));
|
|
}
|
|
|
|
Mat inp(inputShape.size(), &inputShape[0], CV_32F);
|
|
std::vector<Mat> inpVec(1, inp);
|
|
std::vector<Mat> outVec, intVec;
|
|
|
|
Ptr<Layer> rl = LayerFactory::createLayerInstance("Reshape", params);
|
|
runLayer(rl, inpVec, outVec);
|
|
|
|
Mat& out = outVec[0];
|
|
MatShape shape(out.size.p, out.size.p + out.dims);
|
|
EXPECT_EQ(shape, targetShape);
|
|
}
|
|
|
|
TEST(Layer_Test_Reshape, Accuracy)
|
|
{
|
|
{
|
|
int inp[] = {4, 3, 1, 2};
|
|
int out[] = {4, 3, 2};
|
|
testReshape(MatShape(inp, inp + 4), MatShape(out, out + 3), 2, 1);
|
|
}
|
|
{
|
|
int inp[] = {1, 128, 4, 4};
|
|
int out[] = {1, 2048};
|
|
int mask[] = {-1, 2048};
|
|
testReshape(MatShape(inp, inp + 4), MatShape(out, out + 2), 0, -1,
|
|
MatShape(mask, mask + 2));
|
|
}
|
|
{
|
|
int inp[] = {1, 2, 3};
|
|
int out[] = {3, 1, 2};
|
|
int mask[] = {3, 1, 2};
|
|
testReshape(MatShape(inp, inp + 3), MatShape(out, out + 3), 0, -1,
|
|
MatShape(mask, mask + 3));
|
|
}
|
|
}
|
|
|
|
TEST_P(Test_Caffe_layers, BatchNorm)
|
|
{
|
|
testLayerUsingCaffeModels("layer_batch_norm", true);
|
|
testLayerUsingCaffeModels("layer_batch_norm_local_stats", true, false);
|
|
}
|
|
|
|
TEST_P(Test_Caffe_layers, ReLU)
|
|
{
|
|
testLayerUsingCaffeModels("layer_relu");
|
|
}
|
|
|
|
TEST_P(Test_Caffe_layers, Dropout)
|
|
{
|
|
testLayerUsingCaffeModels("layer_dropout");
|
|
}
|
|
|
|
TEST_P(Test_Caffe_layers, Concat)
|
|
{
|
|
#if defined(INF_ENGINE_RELEASE)
|
|
#if INF_ENGINE_VER_MAJOR_GE(2019010000) && INF_ENGINE_VER_MAJOR_LT(2019020000)
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
|
|
#elif INF_ENGINE_VER_MAJOR_EQ(2019020000)
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 &&
|
|
(target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
|
|
applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16,
|
|
CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
|
|
#endif
|
|
|
|
#if INF_ENGINE_VER_MAJOR_LT(2021040000)
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH &&
|
|
(target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
|
|
applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16,
|
|
CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
|
|
#endif
|
|
|
|
#endif
|
|
testLayerUsingCaffeModels("layer_concat");
|
|
testLayerUsingCaffeModels("layer_concat_optim", true, false);
|
|
testLayerUsingCaffeModels("layer_concat_shared_input", true, false);
|
|
}
|
|
|
|
TEST_P(Test_Caffe_layers, Fused_Concat)
|
|
{
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
|
|
applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16,
|
|
CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
|
|
|
|
checkBackend();
|
|
|
|
// Test case
|
|
// input
|
|
// |
|
|
// v
|
|
// some_layer
|
|
// | |
|
|
// v v
|
|
// concat
|
|
Net net;
|
|
int interLayer;
|
|
{
|
|
LayerParams lp;
|
|
lp.type = "AbsVal";
|
|
lp.name = "someLayer";
|
|
interLayer = net.addLayerToPrev(lp.name, lp.type, lp);
|
|
}
|
|
{
|
|
LayerParams lp;
|
|
lp.set("axis", 1);
|
|
lp.type = "Concat";
|
|
lp.name = "testConcat";
|
|
int id = net.addLayer(lp.name, lp.type, lp);
|
|
net.connect(interLayer, 0, id, 0);
|
|
net.connect(interLayer, 0, id, 1);
|
|
}
|
|
int shape[] = {1, 2, 3, 4};
|
|
Mat input(4, shape, CV_32F);
|
|
randu(input, 0.0f, 1.0f); // [0, 1] to make AbsVal an identity transformation.
|
|
|
|
net.setInput(input);
|
|
net.setPreferableBackend(backend);
|
|
net.setPreferableTarget(target);
|
|
Mat out = net.forward();
|
|
|
|
normAssert(slice(out, Range::all(), Range(0, 2), Range::all(), Range::all()), input, "", default_l1, default_lInf);
|
|
normAssert(slice(out, Range::all(), Range(2, 4), Range::all(), Range::all()), input, "", default_l1, default_lInf);
|
|
}
|
|
|
|
TEST_P(Test_Caffe_layers, Eltwise)
|
|
{
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
|
|
testLayerUsingCaffeModels("layer_eltwise");
|
|
}
|
|
|
|
TEST_P(Test_Caffe_layers, PReLU)
|
|
{
|
|
double lInf = (target == DNN_TARGET_MYRIAD || target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_CPU_FP16) ? 0.021 : 0.0;
|
|
testLayerUsingCaffeModels("layer_prelu", true, true, 0.0, lInf);
|
|
}
|
|
|
|
// TODO: fix an unstable test case
|
|
TEST_P(Test_Caffe_layers, layer_prelu_fc)
|
|
{
|
|
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
|
// Reference output values are in range [-0.0001, 10.3906]
|
|
double l1 = (target == DNN_TARGET_MYRIAD) ? 0.005 : 0.0;
|
|
double lInf = (target == DNN_TARGET_MYRIAD) ? 0.021 : 0.0;
|
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020040000)
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
|
|
{
|
|
l1 = 0.006f; lInf = 0.05f;
|
|
}
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
|
|
{
|
|
l1 = 0.01f; lInf = 0.05f;
|
|
}
|
|
#endif
|
|
testLayerUsingCaffeModels("layer_prelu_fc", true, false, l1, lInf);
|
|
}
|
|
|
|
TEST_P(Test_Caffe_layers, Reshape_Split_Slice)
|
|
{
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
|
|
|
Net net = readNetFromCaffe(_tf("reshape_and_slice_routines.prototxt"));
|
|
ASSERT_FALSE(net.empty());
|
|
|
|
net.setPreferableBackend(backend);
|
|
net.setPreferableTarget(target);
|
|
|
|
Mat input(6, 12, CV_32F);
|
|
RNG rng(0);
|
|
rng.fill(input, RNG::UNIFORM, -1, 1);
|
|
|
|
net.setInput(input, "input");
|
|
Mat output = net.forward("output");
|
|
|
|
normAssert(input, output, "", default_l1, default_lInf);
|
|
}
|
|
|
|
TEST_P(Test_Caffe_layers, Conv_Elu)
|
|
{
|
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE <= 2018050000
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
|
|
#endif
|
|
|
|
Net net = readNetFromTensorflow(_tf("layer_elu_model.pb"));
|
|
ASSERT_FALSE(net.empty());
|
|
|
|
Mat inp = blobFromNPY(_tf("layer_elu_in.npy"));
|
|
Mat ref = blobFromNPY(_tf("layer_elu_out.npy"));
|
|
|
|
net.setInput(inp, "input");
|
|
net.setPreferableBackend(backend);
|
|
net.setPreferableTarget(target);
|
|
Mat out = net.forward();
|
|
|
|
double l1 = default_l1, lInf = default_lInf;
|
|
if (target == DNN_TARGET_CUDA_FP16)
|
|
{
|
|
l1 = 0.0002;
|
|
lInf = 0.0005;
|
|
}
|
|
normAssert(ref, out, "", l1, lInf);
|
|
}
|
|
|
|
class Layer_LSTM_Test : public ::testing::Test
|
|
{
|
|
public:
|
|
int numInp, numOut;
|
|
Mat Wh, Wx, b, h, c;
|
|
Ptr<LSTMLayer> layer;
|
|
std::vector<Mat> inputs, outputs;
|
|
|
|
Layer_LSTM_Test() {}
|
|
|
|
void init(const MatShape &inpShape_, const MatShape &outShape_,
|
|
bool produceCellOutput, bool useTimestampDim)
|
|
{
|
|
numInp = total(inpShape_);
|
|
numOut = total(outShape_);
|
|
|
|
Wh = Mat::ones(4 * numOut, numOut, CV_32F);
|
|
Wx = Mat::ones(4 * numOut, numInp, CV_32F);
|
|
b = Mat::ones(4 * numOut, 1, CV_32F);
|
|
h = Mat::ones(4, numOut, CV_32F);
|
|
c = Mat::ones(4, numOut, CV_32F);
|
|
|
|
LayerParams lp;
|
|
lp.blobs.resize(5);
|
|
lp.blobs[0] = Wh;
|
|
lp.blobs[1] = Wx;
|
|
lp.blobs[2] = b;
|
|
lp.blobs[3] = h;
|
|
lp.blobs[4] = c;
|
|
|
|
lp.set<bool>("produce_cell_output", produceCellOutput);
|
|
lp.set<bool>("use_timestamp_dim", useTimestampDim);
|
|
|
|
layer = LSTMLayer::create(lp);
|
|
layer->setOutShape(outShape_);
|
|
}
|
|
};
|
|
|
|
TEST_F(Layer_LSTM_Test, get_set_test)
|
|
{
|
|
const int TN = 4;
|
|
MatShape inpShape = shape(5, 3, 2);
|
|
MatShape outShape = shape(3, 1, 2);
|
|
MatShape inpResShape = concat(shape(TN), inpShape);
|
|
MatShape outResShape = concat(shape(TN), outShape);
|
|
|
|
init(inpShape, outShape, true, false);
|
|
layer->setOutShape(outShape);
|
|
|
|
Mat C((int)outResShape.size(), &outResShape[0], CV_32F);
|
|
randu(C, -1., 1.);
|
|
Mat H = C.clone();
|
|
randu(H, -1., 1.);
|
|
|
|
Mat inp((int)inpResShape.size(), &inpResShape[0], CV_32F);
|
|
randu(inp, -1., 1.);
|
|
|
|
inputs.push_back(inp);
|
|
runLayer(layer, inputs, outputs);
|
|
|
|
EXPECT_EQ(2u, outputs.size());
|
|
|
|
print(outResShape, "outResShape");
|
|
print(shape(outputs[0]), "out0");
|
|
print(shape(outputs[0]), "out1");
|
|
|
|
EXPECT_EQ(outResShape, shape(outputs[0]));
|
|
EXPECT_EQ(outResShape, shape(outputs[1]));
|
|
|
|
EXPECT_EQ(0, layer->inputNameToIndex("x"));
|
|
EXPECT_EQ(0, layer->outputNameToIndex("h"));
|
|
EXPECT_EQ(1, layer->outputNameToIndex("c"));
|
|
}
|
|
|
|
TEST(Layer_LSTM_Test_Accuracy_with_, CaffeRecurrent)
|
|
{
|
|
LayerParams lp;
|
|
lp.blobs.resize(5);
|
|
lp.blobs[0] = blobFromNPY(_tf("lstm.prototxt.w_2.npy")); // Wh
|
|
lp.blobs[1] = blobFromNPY(_tf("lstm.prototxt.w_0.npy")); // Wx
|
|
lp.blobs[2] = blobFromNPY(_tf("lstm.prototxt.w_1.npy")); // bias
|
|
lp.blobs[3] = Mat::zeros(2, 17, CV_32F); // h_0
|
|
lp.blobs[4] = Mat::zeros(2, 17, CV_32F); // c_0
|
|
Ptr<LSTMLayer> layer = LSTMLayer::create(lp);
|
|
|
|
Mat inp = blobFromNPY(_tf("recurrent.input.npy"));
|
|
std::vector<Mat> inputs(1, inp), outputs;
|
|
runLayer(layer, inputs, outputs);
|
|
|
|
Mat h_t_reference = blobFromNPY(_tf("lstm.prototxt.h_1.npy"));
|
|
normAssert(h_t_reference, outputs[0]);
|
|
}
|
|
|
|
TEST(Layer_LSTM_Test_Accuracy_with_, HiddenParams)
|
|
{
|
|
Mat Wx = blobFromNPY(_tf("lstm.hidden.W.npy"));
|
|
Mat Wh = blobFromNPY(_tf("lstm.hidden.R.npy"));
|
|
Mat b = blobFromNPY(_tf("lstm.hidden.B.npy"));
|
|
Mat h0 = blobFromNPY(_tf("lstm.hidden.h0.npy"));
|
|
Mat c0 = blobFromNPY(_tf("lstm.hidden.c0.npy"));
|
|
|
|
const int numHidden = 3;
|
|
const int numDirs = Wx.size[0];
|
|
const int numFeatures = Wx.size[2];
|
|
|
|
b = b.reshape(1, b.size[0]);
|
|
Mat bx = b.colRange(0, b.cols / 2);
|
|
Mat bh = b.colRange(b.cols / 2, b.cols);
|
|
b = bx + bh;
|
|
|
|
// IFGO->IGFO
|
|
for (int k = 0; k < numDirs; ++k)
|
|
{
|
|
float* WxData = Wx.ptr<float>(k);
|
|
float* WhData = Wh.ptr<float>(k);
|
|
float* biasData = b.ptr<float>(k);
|
|
for (int j = 0; j < numHidden; ++j)
|
|
{
|
|
for (int i = 0; i < numFeatures; ++i)
|
|
{
|
|
std::swap(WxData[(numHidden + j) * numFeatures + i],
|
|
WxData[(numHidden * 2 + j) * numFeatures + i]);
|
|
}
|
|
for (int i = 0; i < numHidden; ++i)
|
|
{
|
|
std::swap(WhData[(numHidden + j) * numHidden + i],
|
|
WhData[(numHidden * 2 + j) * numHidden + i]);
|
|
}
|
|
std::swap(biasData[numHidden + j], biasData[numHidden * 2 + j]);
|
|
}
|
|
}
|
|
|
|
Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]);
|
|
Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]);
|
|
h0 = h0.reshape(1, h0.size[0] * h0.size[1]);
|
|
c0 = c0.reshape(1, c0.size[0] * c0.size[1]);
|
|
|
|
LayerParams lstmParams;
|
|
lstmParams.blobs.resize(5);
|
|
lstmParams.blobs[0] = Wh;
|
|
lstmParams.blobs[1] = Wx;
|
|
lstmParams.blobs[2] = b;
|
|
lstmParams.blobs[3] = h0;
|
|
lstmParams.blobs[4] = c0;
|
|
lstmParams.set("bidirectional", false);
|
|
Ptr<LSTMLayer> layer = LSTMLayer::create(lstmParams);
|
|
|
|
Mat inp = blobFromNPY(_tf("lstm.hidden.input.npy"));
|
|
std::vector<Mat> inputs(1, inp), outputs;
|
|
runLayer(layer, inputs, outputs);
|
|
|
|
Mat h_t_reference = blobFromNPY(_tf("lstm.hidden.output.npy"));
|
|
normAssert(h_t_reference, outputs[0]);
|
|
}
|
|
|
|
TEST(Layer_GRU_Test_Accuracy_with_, Pytorch)
|
|
{
|
|
Mat Wx = blobFromNPY(_tf("gru.W.npy"));
|
|
Mat Wh = blobFromNPY(_tf("gru.R.npy"));
|
|
Mat b = blobFromNPY(_tf("gru.B.npy"));
|
|
Mat h0 = blobFromNPY(_tf("gru.h0.npy"));
|
|
|
|
Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]);
|
|
Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]);
|
|
h0 = h0.reshape(1, h0.size[0] * h0.size[1]);
|
|
b = b.reshape(1, b.size[0]);
|
|
|
|
LayerParams gruParams;
|
|
gruParams.blobs.resize(4);
|
|
gruParams.blobs[0] = Wh;
|
|
gruParams.blobs[1] = Wx;
|
|
gruParams.blobs[2] = b;
|
|
gruParams.blobs[3] = h0;
|
|
gruParams.set("bidirectional", false);
|
|
Ptr<GRULayer> layer = GRULayer::create(gruParams);
|
|
|
|
Mat inp = blobFromNPY(_tf("gru.input.npy"));
|
|
std::vector<Mat> inputs(1, inp), outputs;
|
|
runLayer(layer, inputs, outputs);
|
|
|
|
Mat h_t_reference = blobFromNPY(_tf("gru.output.npy"));
|
|
normAssert(h_t_reference, outputs[0]);
|
|
}
|
|
|
|
TEST(Layer_RNN_Test_Accuracy_with_, CaffeRecurrent)
|
|
{
|
|
Ptr<RNNLayer> layer = RNNLayer::create(LayerParams());
|
|
|
|
layer->setWeights(
|
|
blobFromNPY(_tf("rnn.prototxt.w_0.npy")),
|
|
blobFromNPY(_tf("rnn.prototxt.w_1.npy")),
|
|
blobFromNPY(_tf("rnn.prototxt.w_2.npy")),
|
|
blobFromNPY(_tf("rnn.prototxt.w_3.npy")),
|
|
blobFromNPY(_tf("rnn.prototxt.w_4.npy")) );
|
|
|
|
std::vector<Mat> output, input(1, blobFromNPY(_tf("recurrent.input.npy")));
|
|
runLayer(layer, input, output);
|
|
|
|
Mat h_ref = blobFromNPY(_tf("rnn.prototxt.h_1.npy"));
|
|
normAssert(h_ref, output[0]);
|
|
}
|
|
|
|
TEST(Layer_LSTM_Test_Accuracy_, Reverse)
|
|
{
|
|
// This handcrafted setup calculates (approximately) the prefix sum of the
|
|
// input, assuming the inputs are suitably small.
|
|
cv::Mat input(2, 1, CV_32FC1);
|
|
input.at<float>(0, 0) = 1e-5f;
|
|
input.at<float>(1, 0) = 2e-5f;
|
|
|
|
cv::Mat Wx(4, 1, CV_32FC1);
|
|
Wx.at<float>(0, 0) = 0.f; // Input gate
|
|
Wx.at<float>(1, 0) = 0.f; // Forget gate
|
|
Wx.at<float>(2, 0) = 0.f; // Output gate
|
|
Wx.at<float>(3, 0) = 1.f; // Update signal
|
|
|
|
cv::Mat Wh(4, 1, CV_32FC1);
|
|
Wh.at<float>(0, 0) = 0.f; // Input gate
|
|
Wh.at<float>(1, 0) = 0.f; // Forget gate
|
|
Wh.at<float>(2, 0) = 0.f; // Output gate
|
|
Wh.at<float>(3, 0) = 0.f; // Update signal
|
|
|
|
cv::Mat bias(4, 1, CV_32FC1);
|
|
bias.at<float>(0, 0) = 1e10f; // Input gate - always allows input to c
|
|
bias.at<float>(1, 0) = 1e10f; // Forget gate - never forget anything on c
|
|
bias.at<float>(2, 0) = 1e10f; // Output gate - always output everything
|
|
bias.at<float>(3, 0) = 0.f; // Update signal
|
|
|
|
cv::Mat hInternal = cv::Mat::zeros(1, 1, CV_32FC1);
|
|
cv::Mat cInternal = cv::Mat::zeros(1, 1, CV_32FC1);
|
|
|
|
LayerParams lp;
|
|
lp.set("reverse", true);
|
|
lp.set("use_timestamp_dim", true);
|
|
lp.blobs.clear();
|
|
lp.blobs.push_back(Wh);
|
|
lp.blobs.push_back(Wx);
|
|
lp.blobs.push_back(bias);
|
|
lp.blobs.push_back(hInternal);
|
|
lp.blobs.push_back(cInternal);
|
|
|
|
cv::Ptr<cv::dnn::LSTMLayer> layer = LSTMLayer::create(lp);
|
|
std::vector<cv::Mat> outputs;
|
|
std::vector<cv::Mat> inputs;
|
|
inputs.push_back(input);
|
|
runLayer(layer, inputs, outputs);
|
|
|
|
ASSERT_EQ(1, outputs.size());
|
|
cv::Mat out = outputs[0];
|
|
ASSERT_EQ(3, out.dims);
|
|
ASSERT_EQ(shape(2, 1, 1), shape(out));
|
|
float* data = reinterpret_cast<float*>(out.data);
|
|
EXPECT_NEAR(std::tanh(1e-5f) + std::tanh(2e-5f), data[0], 1e-10);
|
|
EXPECT_NEAR(std::tanh(2e-5f), data[1], 1e-10);
|
|
}
|
|
|
|
|
|
class Layer_RNN_Test : public ::testing::Test
|
|
{
|
|
public:
|
|
int nX, nH, nO, nT, nS;
|
|
Mat Whh, Wxh, bh, Who, bo;
|
|
Ptr<RNNLayer> layer;
|
|
|
|
std::vector<Mat> inputs, outputs;
|
|
|
|
Layer_RNN_Test()
|
|
{
|
|
nT = 3;
|
|
nS = 5;
|
|
nX = 31;
|
|
nH = 64;
|
|
nO = 100;
|
|
|
|
Whh = Mat::ones(nH, nH, CV_32F);
|
|
Wxh = Mat::ones(nH, nX, CV_32F);
|
|
bh = Mat::ones(nH, 1, CV_32F);
|
|
Who = Mat::ones(nO, nH, CV_32F);
|
|
bo = Mat::ones(nO, 1, CV_32F);
|
|
|
|
layer = RNNLayer::create(LayerParams());
|
|
layer->setProduceHiddenOutput(true);
|
|
layer->setWeights(Wxh, bh, Whh, Who, bo);
|
|
}
|
|
};
|
|
|
|
TEST_F(Layer_RNN_Test, get_set_test)
|
|
{
|
|
int sz[] = { nT, nS, 1, nX };
|
|
Mat inp(4, sz, CV_32F);
|
|
randu(inp, -1., 1.);
|
|
inputs.push_back(inp);
|
|
runLayer(layer, inputs, outputs);
|
|
|
|
EXPECT_EQ(outputs.size(), 2u);
|
|
EXPECT_EQ(shape(outputs[0]), shape(nT, nS, nO));
|
|
EXPECT_EQ(shape(outputs[1]), shape(nT, nS, nH));
|
|
}
|
|
|
|
TEST_P(Test_Caffe_layers, Accum)
|
|
{
|
|
if (backend == DNN_BACKEND_OPENCV && target != DNN_TARGET_CPU)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL, CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
|
|
|
testLayerUsingCaffeModels("accum", false, false, 0.0, 0.0, 2);
|
|
testLayerUsingCaffeModels("accum_ref", false, false, 0.0, 0.0, 2);
|
|
}
|
|
|
|
TEST_P(Test_Caffe_layers, FlowWarp)
|
|
{
|
|
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
|
|
|
testLayerUsingCaffeModels("flow_warp", false, false, 0.0, 0.0, 2);
|
|
}
|
|
|
|
TEST_P(Test_Caffe_layers, ChannelNorm)
|
|
{
|
|
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
|
testLayerUsingCaffeModels("channel_norm", false, false);
|
|
}
|
|
|
|
TEST_P(Test_Caffe_layers, DataAugmentation)
|
|
{
|
|
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
|
testLayerUsingCaffeModels("data_augmentation", true, false);
|
|
testLayerUsingCaffeModels("data_augmentation_2x1", true, false);
|
|
testLayerUsingCaffeModels("data_augmentation_8x6", true, false);
|
|
}
|
|
|
|
TEST_P(Test_Caffe_layers, Resample)
|
|
{
|
|
if (backend != DNN_BACKEND_OPENCV)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
|
testLayerUsingCaffeModels("nearest_2inps", false, false, 0.0, 0.0, 2);
|
|
testLayerUsingCaffeModels("nearest", false, false);
|
|
}
|
|
|
|
TEST_P(Test_Caffe_layers, Correlation)
|
|
{
|
|
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER,
|
|
CV_TEST_TAG_DNN_SKIP_OPENCL, CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
|
testLayerUsingCaffeModels("correlation", false, false, 0.0, 0.0, 2);
|
|
}
|
|
|
|
TEST_P(Test_Caffe_layers, Convolution2Inputs)
|
|
{
|
|
testLayerUsingCaffeModels("conv_2_inps", true, false, 0.0, 0.0, 2);
|
|
}
|
|
|
|
TEST_P(Test_Caffe_layers, ROIPooling_Accuracy)
|
|
{
|
|
Net net = readNetFromCaffe(_tf("net_roi_pooling.prototxt"));
|
|
ASSERT_FALSE(net.empty());
|
|
|
|
Mat inp = blobFromNPY(_tf("net_roi_pooling.input.npy"));
|
|
Mat rois = blobFromNPY(_tf("net_roi_pooling.rois.npy"));
|
|
Mat ref = blobFromNPY(_tf("net_roi_pooling.npy"));
|
|
|
|
checkBackend(&inp, &ref);
|
|
|
|
net.setPreferableBackend(backend);
|
|
net.setPreferableTarget(target);
|
|
|
|
net.setInput(inp, "input");
|
|
net.setInput(rois, "rois");
|
|
|
|
Mat out = net.forward();
|
|
|
|
double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 1e-3 : 1e-5;
|
|
double lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 1e-3 : 1e-4;
|
|
if (target == DNN_TARGET_CUDA_FP16)
|
|
{
|
|
l1 = 2e-4;
|
|
lInf = 9e-4;
|
|
}
|
|
normAssert(out, ref, "", l1, lInf);
|
|
}
|
|
|
|
TEST_P(Test_Caffe_layers, FasterRCNN_Proposal)
|
|
{
|
|
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
|
if(backend == DNN_BACKEND_CUDA)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); /* Proposal layer is unsupported */
|
|
|
|
Net net = readNetFromCaffe(_tf("net_faster_rcnn_proposal.prototxt"));
|
|
|
|
Mat scores = blobFromNPY(_tf("net_faster_rcnn_proposal.scores.npy"));
|
|
Mat deltas = blobFromNPY(_tf("net_faster_rcnn_proposal.deltas.npy"));
|
|
Mat imInfo = (Mat_<float>(1, 3) << 600, 800, 1.6f);
|
|
|
|
net.setInput(scores, "rpn_cls_prob_reshape");
|
|
net.setInput(deltas, "rpn_bbox_pred");
|
|
net.setInput(imInfo, "im_info");
|
|
|
|
std::vector<Mat> outs;
|
|
net.setPreferableBackend(backend);
|
|
net.setPreferableTarget(target);
|
|
net.forward(outs, "output");
|
|
|
|
for (int i = 0; i < 2; ++i)
|
|
{
|
|
Mat ref = blobFromNPY(_tf(i == 0 ? "net_faster_rcnn_proposal.out_rois.npy" :
|
|
"net_faster_rcnn_proposal.out_scores.npy"));
|
|
const int numDets = ref.size[0];
|
|
EXPECT_LE(numDets, outs[i].size[0]);
|
|
normAssert(outs[i].rowRange(0, numDets), ref);
|
|
|
|
if (numDets < outs[i].size[0])
|
|
{
|
|
EXPECT_EQ(countNonZero(outs[i].rowRange(numDets, outs[i].size[0])), 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
typedef testing::TestWithParam<tuple<Vec4i, Vec2i, bool> > Scale_untrainable;
|
|
TEST_P(Scale_untrainable, Accuracy)
|
|
{
|
|
Vec4i inpShapeVec = get<0>(GetParam());
|
|
int axis = get<1>(GetParam())[0];
|
|
int weightsDims = get<1>(GetParam())[1];
|
|
bool testFusion = get<2>(GetParam());
|
|
const int inpShape[] = {inpShapeVec[0], inpShapeVec[1], inpShapeVec[2], inpShapeVec[3]};
|
|
|
|
// Create a network with two inputs. Scale layer multiplies a first input to
|
|
// a second one. See http://caffe.berkeleyvision.org/tutorial/layers/scale.html
|
|
Net net;
|
|
// Check that this version of Scale layer won't be fused with Convolution layer.
|
|
if (testFusion)
|
|
{
|
|
LayerParams lp;
|
|
lp.set("kernel_size", 1);
|
|
lp.set("num_output", 3);
|
|
lp.set("group", 3);
|
|
lp.set("bias_term", false);
|
|
lp.type = "Convolution";
|
|
lp.name = "testConv";
|
|
|
|
std::vector<int> weightsShape(4);
|
|
weightsShape[0] = 3; // #outChannels
|
|
weightsShape[1] = 1; // #inpChannels / group
|
|
weightsShape[2] = 1; // height
|
|
weightsShape[3] = 1; // width
|
|
Mat weights(weightsShape, CV_32F);
|
|
weights.setTo(1);
|
|
lp.blobs.push_back(weights);
|
|
net.addLayerToPrev(lp.name, lp.type, lp);
|
|
}
|
|
LayerParams lp;
|
|
lp.type = "Scale";
|
|
lp.name = "testLayer";
|
|
lp.set("axis", axis);
|
|
int id = net.addLayerToPrev(lp.name, lp.type, lp);
|
|
net.connect(0, 1, id, 1);
|
|
|
|
Mat input(4, inpShape, CV_32F);
|
|
Mat weights(weightsDims, &inpShape[axis], CV_32F);
|
|
randu(input, -1, 1);
|
|
randu(weights, -1, 1);
|
|
|
|
std::vector<String> inpNames(2);
|
|
inpNames[0] = "scale_input";
|
|
inpNames[1] = "scale_weights";
|
|
net.setInputsNames(inpNames);
|
|
net.setInput(input, inpNames[0]);
|
|
net.setInput(weights, inpNames[1]);
|
|
net.setPreferableBackend(DNN_BACKEND_OPENCV);
|
|
Mat out = net.forward();
|
|
|
|
Mat ref(input.dims, input.size, CV_32F);
|
|
float* inpData = (float*)input.data;
|
|
float* refData = (float*)ref.data;
|
|
float* weightsData = (float*)weights.data;
|
|
int spatialSize = 1;
|
|
for (int i = axis + weightsDims; i < 4; ++i)
|
|
spatialSize *= inpShape[i];
|
|
for (int i = 0; i < ref.total(); ++i)
|
|
{
|
|
float w = weightsData[(i / spatialSize) % weights.total()];
|
|
refData[i] = inpData[i] * w;
|
|
}
|
|
normAssert(out, ref);
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(Layer_Test, Scale_untrainable, Combine(
|
|
/*input size*/ Values(Vec4i(2, 3, 4, 5)),
|
|
/*axis, #dims*/ Values(Vec2i(0, 1), Vec2i(0, 2), Vec2i(0, 3), Vec2i(0, 4),
|
|
Vec2i(1, 1), Vec2i(1, 2), Vec2i(1, 3),
|
|
Vec2i(2, 1), Vec2i(2, 2),
|
|
Vec2i(3, 1)),
|
|
/*conv fusion*/ testing::Bool()
|
|
));
|
|
|
|
typedef testing::TestWithParam<tuple<Vec4i, Vec4i, int, int, int> > Crop;
|
|
TEST_P(Crop, Accuracy)
|
|
{
|
|
Vec4i inpShapeVec = get<0>(GetParam());
|
|
Vec4i sizShapeVec = get<1>(GetParam());
|
|
int axis = get<2>(GetParam());
|
|
int numOffsets = get<3>(GetParam());
|
|
int offsetVal = get<4>(GetParam());
|
|
const int inpShape[] = {inpShapeVec[0], inpShapeVec[1], inpShapeVec[2], inpShapeVec[3]};
|
|
const int sizShape[] = {sizShapeVec[0], sizShapeVec[1], sizShapeVec[2], sizShapeVec[3]};
|
|
|
|
// Create a network with two inputs. Crop layer crops a first input to
|
|
// the size of a second one.
|
|
// See http://caffe.berkeleyvision.org/tutorial/layers/crop.html
|
|
Net net;
|
|
|
|
LayerParams lp;
|
|
lp.name = "testCrop";
|
|
lp.type = "Crop";
|
|
lp.set("axis", axis);
|
|
if (numOffsets > 0)
|
|
{
|
|
std::vector<int> offsets(numOffsets, offsetVal);
|
|
lp.set("offset", DictValue::arrayInt<int*>(&offsets[0], offsets.size()));
|
|
}
|
|
else
|
|
offsetVal = 0;
|
|
int id = net.addLayerToPrev(lp.name, lp.type, lp);
|
|
net.connect(0, 1, id, 1);
|
|
|
|
Mat inpImage(4, inpShape, CV_32F);
|
|
Mat sizImage(4, sizShape, CV_32F);
|
|
randu(inpImage, -1, 1);
|
|
randu(sizImage, -1, 1);
|
|
|
|
std::vector<String> inpNames(2);
|
|
inpNames[0] = "cropImage";
|
|
inpNames[1] = "sizImage";
|
|
net.setInputsNames(inpNames);
|
|
net.setInput(inpImage, inpNames[0]);
|
|
net.setInput(sizImage, inpNames[1]);
|
|
net.setPreferableBackend(DNN_BACKEND_OPENCV);
|
|
|
|
// There are a few conditions that represent invalid input to the crop
|
|
// layer, so in those cases we want to verify an exception is thrown.
|
|
|
|
bool shouldThrowException = false;
|
|
if (numOffsets > 1 && numOffsets != 4 - axis)
|
|
shouldThrowException = true;
|
|
else
|
|
for (int i = axis; i < 4; i++)
|
|
if (sizShape[i] + offsetVal > inpShape[i])
|
|
shouldThrowException = true;
|
|
|
|
Mat out;
|
|
if (shouldThrowException)
|
|
{
|
|
ASSERT_ANY_THROW(out = net.forward());
|
|
return;
|
|
}
|
|
else
|
|
out = net.forward();
|
|
|
|
// Finally, compare the cropped output blob from the DNN layer (out)
|
|
// to a reference blob (ref) that we compute here.
|
|
|
|
std::vector<Range> crop_range;
|
|
crop_range.resize(4, Range::all());
|
|
for (int i = axis; i < 4; i++)
|
|
crop_range[i] = Range(offsetVal, sizShape[i] + offsetVal);
|
|
|
|
Mat ref(sizImage.dims, sizImage.size, CV_32F);
|
|
inpImage(&crop_range[0]).copyTo(ref);
|
|
normAssert(out, ref);
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(Layer_Test, Crop, Combine(
|
|
/*input blob shape*/ Values(Vec4i(1, 3, 20, 30)),
|
|
/*cropsize blob shape*/ Values(Vec4i(1, 3, 10, 12)),
|
|
/*start axis*/ Values(0, 1, 2),
|
|
/*number of offsets*/ Values(0, 1, 2, 4),
|
|
/*offset value*/ Values(3, 4)
|
|
));
|
|
|
|
// Check that by default average pooling layer should not count zero padded values
|
|
// into the normalization area.
|
|
TEST_P(Test_Caffe_layers, Average_pooling_kernel_area)
|
|
{
|
|
LayerParams lp;
|
|
lp.name = "testAvePool";
|
|
lp.type = "Pooling";
|
|
lp.set("kernel_size", 2);
|
|
lp.set("stride", 2);
|
|
lp.set("pool", "AVE");
|
|
|
|
Net net;
|
|
net.addLayerToPrev(lp.name, lp.type, lp);
|
|
// 1 2 | 3
|
|
// 4 5 | 6
|
|
// ----+--
|
|
// 7 8 | 9
|
|
Mat inp = (Mat_<float>(3, 3) << 1, 2, 3, 4, 5, 6, 7, 8, 9);
|
|
Mat ref = (Mat_<float>(2, 2) << (1 + 2 + 4 + 5) / 4.f, (3 + 6) / 2.f, (7 + 8) / 2.f, 9);
|
|
Mat tmp = blobFromImage(inp);
|
|
net.setInput(blobFromImage(inp));
|
|
net.setPreferableBackend(backend);
|
|
net.setPreferableTarget(target);
|
|
Mat out = net.forward();
|
|
normAssert(out, blobFromImage(ref));
|
|
}
|
|
|
|
TEST_P(Test_Caffe_layers, PriorBox_repeated)
|
|
{
|
|
Net net = readNet(_tf("prior_box.prototxt"));
|
|
int inp_size[] = {1, 3, 10, 10};
|
|
int shape_size[] = {1, 2, 3, 4};
|
|
Mat inp(4, inp_size, CV_32F);
|
|
randu(inp, -1.0f, 1.0f);
|
|
Mat shape(4, shape_size, CV_32F);
|
|
randu(shape, -1.0f, 1.0f);
|
|
net.setInput(inp, "data");
|
|
net.setInput(shape, "shape");
|
|
net.setPreferableBackend(backend);
|
|
net.setPreferableTarget(target);
|
|
Mat out = net.forward();
|
|
Mat ref = blobFromNPY(_tf("priorbox_output.npy"));
|
|
|
|
double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 1e-3 : 1e-5;
|
|
double lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 1e-3 : 1e-4;
|
|
if (target == DNN_TARGET_CUDA_FP16)
|
|
{
|
|
l1 = 7e-5;
|
|
lInf = 0.0005;
|
|
}
|
|
normAssert(out, ref, "", l1, lInf);
|
|
}
|
|
|
|
// Test PriorBoxLayer in case of no aspect ratios (just squared proposals).
|
|
TEST_P(Test_Caffe_layers, PriorBox_squares)
|
|
{
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
|
LayerParams lp;
|
|
lp.name = "testPriorBox";
|
|
lp.type = "PriorBox";
|
|
lp.set("min_size", 2);
|
|
lp.set("flip", true);
|
|
lp.set("clip", true);
|
|
float variance[] = {0.1f, 0.1f, 0.2f, 0.2f};
|
|
float aspectRatios[] = {1.0f}; // That should be ignored.
|
|
lp.set("variance", DictValue::arrayReal<float*>(&variance[0], 4));
|
|
lp.set("aspect_ratio", DictValue::arrayReal<float*>(&aspectRatios[0], 1));
|
|
|
|
Net net;
|
|
int id = net.addLayerToPrev(lp.name, lp.type, lp);
|
|
net.connect(0, 0, id, 1); // The second input is an input image. Shapes are used for boxes normalization.
|
|
Mat inp(1, 2, CV_32F);
|
|
randu(inp, -1, 1);
|
|
net.setInput(blobFromImage(inp));
|
|
net.setPreferableBackend(backend);
|
|
net.setPreferableTarget(target);
|
|
Mat out = net.forward();
|
|
|
|
Mat ref = (Mat_<float>(4, 4) << 0.0, 0.0, 0.75, 1.0,
|
|
0.25, 0.0, 1.0, 1.0,
|
|
0.1f, 0.1f, 0.2f, 0.2f,
|
|
0.1f, 0.1f, 0.2f, 0.2f);
|
|
double l1 = 1e-5;
|
|
if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CUDA_FP16)
|
|
l1 = 2e-5;
|
|
normAssert(out.reshape(1, 4), ref, "", l1);
|
|
}
|
|
|
|
typedef TestWithParam<tuple<int, int> > Layer_Test_DWconv_Prelu;
|
|
TEST_P(Layer_Test_DWconv_Prelu, Accuracy)
|
|
{
|
|
// Test case
|
|
// input img size 3x16x16 value all 1
|
|
// |
|
|
// v
|
|
// dw_conv weight[0]=-1 weight[1]=-2 weight[2]=-3 bias={1,2,3}
|
|
// |
|
|
// v
|
|
// prelu weight={1,2,3}
|
|
// |
|
|
// v
|
|
// output out size 3x14x14 if right: out[0]=-8 out[0]=-32 out[0]=-72
|
|
// but current opencv output: out[0]=-24 out[0]=-48 out[0]=-72
|
|
|
|
const int num_input = get<0>(GetParam()); //inpChannels
|
|
const int group = 3; //outChannels=group when group>1
|
|
const int num_output = get<1>(GetParam());
|
|
const int kernel_depth = num_input/group;
|
|
CV_Assert_N(num_output >= group, num_output % group == 0, num_input % group == 0);
|
|
|
|
Net net;
|
|
//layer 1: dwconv
|
|
LayerParams lp;
|
|
lp.name = "dwconv";
|
|
lp.type = "Convolution";
|
|
lp.set("kernel_size", 3);
|
|
lp.set("num_output", num_output);
|
|
lp.set("pad", 0);
|
|
lp.set("group", group);
|
|
lp.set("stride", 1);
|
|
lp.set("engine", "CAFFE");
|
|
lp.set("bias_term", "true");
|
|
|
|
std::vector<int> weightsShape(4);
|
|
weightsShape[0] = num_output; // #outChannels
|
|
weightsShape[1] = kernel_depth; // #inpChannels / group
|
|
weightsShape[2] = 3; // height
|
|
weightsShape[3] = 3; // width
|
|
Mat weights(weightsShape, CV_32F, Scalar(1));
|
|
|
|
//assign weights
|
|
for (int i = 0; i < weightsShape[0]; ++i)
|
|
{
|
|
for (int j = 0; j < weightsShape[1]; ++j)
|
|
{
|
|
for (int k = 0; k < weightsShape[2]; ++k)
|
|
{
|
|
for (int l = 0; l < weightsShape[3]; ++l)
|
|
{
|
|
weights.ptr<float>(i, j, k)[l]=-1*(i+1);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
lp.blobs.push_back(weights);
|
|
|
|
//assign bias
|
|
Mat bias(1, num_output, CV_32F, Scalar(1));
|
|
for (int i = 0; i < 1; ++i)
|
|
{
|
|
for (int j = 0; j < num_output; ++j)
|
|
{
|
|
bias.ptr<float>(i)[j]=j+1;
|
|
}
|
|
}
|
|
lp.blobs.push_back(bias);
|
|
net.addLayerToPrev(lp.name, lp.type, lp);
|
|
|
|
//layer 2: prelu
|
|
LayerParams lpr;
|
|
lpr.name = "dw_relu";
|
|
lpr.type = "PReLU";
|
|
Mat weightsp(1, num_output, CV_32F, Scalar(1));
|
|
|
|
//assign weights
|
|
for (int i = 0; i < 1; ++i)
|
|
{
|
|
for (int j = 0; j < num_output; ++j)
|
|
{
|
|
weightsp.ptr<float>(i)[j]=j+1;
|
|
}
|
|
}
|
|
|
|
lpr.blobs.push_back(weightsp);
|
|
net.addLayerToPrev(lpr.name, lpr.type, lpr);
|
|
|
|
int shape[] = {1, num_input, 16, 16};
|
|
Mat in_blob(4, &shape[0], CV_32FC1, Scalar(1));
|
|
|
|
net.setPreferableBackend(DNN_BACKEND_OPENCV);
|
|
net.enableWinograd(false);
|
|
net.setInput(in_blob);
|
|
Mat out = net.forward();
|
|
|
|
//assign target
|
|
std::vector<int> outShape(4);
|
|
outShape[0] = 1;
|
|
outShape[1] = num_output; // outChannels
|
|
outShape[2] = 14; // height
|
|
outShape[3] = 14; // width
|
|
Mat target(outShape, CV_32F, Scalar(1));
|
|
for (int i = 0; i < outShape[0]; ++i)
|
|
{
|
|
for (int j = 0; j < outShape[1]; ++j)
|
|
{
|
|
for (int k = 0; k < outShape[2]; ++k)
|
|
{
|
|
for (int l = 0; l < outShape[3]; ++l)
|
|
{
|
|
target.ptr<float>(i, j, k)[l]=(-9*kernel_depth*(j+1)+j+1)*(j+1);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
normAssert(out, target);
|
|
}
|
|
INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_DWconv_Prelu, Combine(Values(3, 6), Values(3, 6)));
|
|
|
|
#ifdef HAVE_INF_ENGINE
|
|
// Using Intel's Model Optimizer generate .xml and .bin files:
|
|
// ./ModelOptimizer -w /path/to/caffemodel -d /path/to/prototxt \
|
|
// -p FP32 -i -b ${batch_size} -o /path/to/output/folder
|
|
typedef testing::TestWithParam<tuple<Backend, Target> > Layer_Test_Convolution_DLDT;
|
|
TEST_P(Layer_Test_Convolution_DLDT, Accuracy)
|
|
{
|
|
const Backend backendId = get<0>(GetParam());
|
|
const Target targetId = get<1>(GetParam());
|
|
|
|
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
|
|
|
|
if (backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
|
throw SkipTestException("No support for async forward");
|
|
|
|
ASSERT_EQ(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, backendId);
|
|
|
|
Net netDefault = readNet(_tf("layer_convolution.caffemodel"), _tf("layer_convolution.prototxt"));
|
|
Net net = readNet(_tf("layer_convolution.xml"), _tf("layer_convolution.bin"));
|
|
|
|
Mat inp = blobFromNPY(_tf("blob.npy"));
|
|
|
|
netDefault.setInput(inp);
|
|
netDefault.setPreferableBackend(DNN_BACKEND_OPENCV);
|
|
Mat outDefault = netDefault.forward();
|
|
|
|
net.setInput(inp);
|
|
net.setPreferableBackend(backendId);
|
|
net.setPreferableTarget(targetId);
|
|
|
|
Mat out = net.forward();
|
|
|
|
double l1 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 1.5e-3 : 1e-5;
|
|
double lInf = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 1.8e-2 : 1e-4;
|
|
normAssert(outDefault, out, "", l1, lInf);
|
|
|
|
std::vector<int> outLayers = net.getUnconnectedOutLayers();
|
|
ASSERT_EQ(net.getLayer(outLayers[0])->name, "output");
|
|
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
|
ASSERT_EQ(net.getLayer(outLayers[0])->type, "Convolution");
|
|
else
|
|
ASSERT_EQ(net.getLayer(outLayers[0])->type, "Result");
|
|
}
|
|
|
|
TEST_P(Layer_Test_Convolution_DLDT, setInput_uint8)
|
|
{
|
|
const Backend backendId = get<0>(GetParam());
|
|
const Target targetId = get<1>(GetParam());
|
|
|
|
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
|
|
|
|
if (backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
|
throw SkipTestException("No support for async forward");
|
|
|
|
ASSERT_EQ(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, backendId);
|
|
|
|
int blobSize[] = {2, 6, 75, 113};
|
|
Mat inputs[] = {Mat(4, &blobSize[0], CV_8U), Mat()};
|
|
|
|
randu(inputs[0], 0, 255);
|
|
inputs[0].convertTo(inputs[1], CV_32F);
|
|
|
|
Mat outs[2];
|
|
for (int i = 0; i < 2; ++i)
|
|
{
|
|
Net net = readNet(_tf("layer_convolution.xml"), _tf("layer_convolution.bin"));
|
|
net.setPreferableBackend(backendId);
|
|
net.setPreferableTarget(targetId);
|
|
net.setInput(inputs[i]);
|
|
outs[i] = net.forward();
|
|
ASSERT_EQ(outs[i].type(), CV_32F);
|
|
}
|
|
if (targetId != DNN_TARGET_MYRIAD)
|
|
normAssert(outs[0], outs[1]);
|
|
}
|
|
|
|
TEST_P(Layer_Test_Convolution_DLDT, multithreading)
|
|
{
|
|
const Backend backendId = get<0>(GetParam());
|
|
const Target targetId = get<1>(GetParam());
|
|
|
|
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
|
|
|
|
if (backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
|
throw SkipTestException("No support for async forward");
|
|
|
|
ASSERT_EQ(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, backendId);
|
|
|
|
std::string xmlPath = _tf("layer_convolution.xml");
|
|
std::string binPath = _tf("layer_convolution.bin");
|
|
Net firstNet = readNet(xmlPath, binPath);
|
|
Net secondNet = readNet(xmlPath, binPath);
|
|
Mat inp = blobFromNPY(_tf("blob.npy"));
|
|
|
|
firstNet.setInput(inp);
|
|
secondNet.setInput(inp);
|
|
firstNet.setPreferableBackend(backendId);
|
|
firstNet.setPreferableTarget(targetId);
|
|
secondNet.setPreferableBackend(backendId);
|
|
secondNet.setPreferableTarget(targetId);
|
|
|
|
Mat out1, out2;
|
|
std::thread t1([&]{out1 = firstNet.forward();});
|
|
std::thread t2([&]{out2 = secondNet.forward();});
|
|
|
|
t1.join();
|
|
t2.join();
|
|
|
|
Mat ref = blobFromNPY(_tf("layer_convolution.npy"));
|
|
double l1 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 1.5e-3 : 1e-5;
|
|
double lInf = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 1.8e-2 : 1e-4;
|
|
normAssert(out1, ref, "first thread", l1, lInf);
|
|
normAssert(out2, ref, "second thread", l1, lInf);
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_Convolution_DLDT,
|
|
dnnBackendsAndTargetsIE()
|
|
);
|
|
|
|
// 1. Create a .prototxt file with the following network:
|
|
// layer {
|
|
// type: "Input" name: "data" top: "data"
|
|
// input_param { shape { dim: 1 dim: 2 dim: 3 } }
|
|
// }
|
|
// layer {
|
|
// type: "Input" name: "second_input" top: "second_input"
|
|
// input_param { shape { dim: 1 dim: 2 dim: 3 } }
|
|
// }
|
|
// layer {
|
|
// type: "Eltwise" name: "output" top: "output"
|
|
// bottom: "data" bottom: "second_input"
|
|
// eltwise_param { operation: SUM }
|
|
// }
|
|
//
|
|
// 2. Create a .caffemodel file using Caffe:
|
|
//
|
|
// import caffe
|
|
// net = caffe.Net('/path/to/prototxt', caffe.TEST)
|
|
// net.save('/path/to/caffemodel')
|
|
//
|
|
// 3. Convert using ModelOptimizer.
|
|
typedef testing::TestWithParam<tuple<int, int, Target, std::vector<int> > > Test_DLDT_two_inputs_3dim;
|
|
TEST_P(Test_DLDT_two_inputs_3dim, as_IR)
|
|
{
|
|
int firstInpType = get<0>(GetParam());
|
|
int secondInpType = get<1>(GetParam());
|
|
Target targetId = get<2>(GetParam());
|
|
|
|
Net net = readNet(_tf("net_two_inputs.xml"), _tf("net_two_inputs.bin"));
|
|
std::vector<int> inpSize = get<3>(GetParam());
|
|
Mat firstInp(3, inpSize.data(), firstInpType);
|
|
Mat secondInp(3, inpSize.data(), secondInpType);
|
|
randu(firstInp, 0, 255);
|
|
randu(secondInp, 0, 255);
|
|
|
|
net.setInput(firstInp, "data");
|
|
net.setInput(secondInp, "second_input");
|
|
net.setPreferableTarget(targetId);
|
|
|
|
double l1 = ((targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) &&
|
|
(firstInpType == CV_32F || secondInpType == CV_32F)) ? 0.06 : 0.0;
|
|
double lInf = ((targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) &&
|
|
(firstInpType == CV_32F || secondInpType == CV_32F)) ? 0.23 : 0.0;
|
|
|
|
Mat out = net.forward();
|
|
|
|
Mat ref;
|
|
cv::add(firstInp, secondInp, ref, Mat(), CV_32F);
|
|
normAssert(out, ref, "", l1, lInf);
|
|
}
|
|
|
|
std::vector< std::vector<int> > list_sizes{ {1, 2, 3}, {3, 2, 1}, {5, 5, 5}, {13, 7, 11} };
|
|
|
|
INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_DLDT_two_inputs_3dim, Combine(
|
|
Values(CV_8U, CV_32F), Values(CV_8U, CV_32F),
|
|
testing::ValuesIn(getAvailableTargets(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)),
|
|
testing::ValuesIn(list_sizes)
|
|
));
|
|
|
|
class UnsupportedLayer : public Layer
|
|
{
|
|
public:
|
|
UnsupportedLayer(const LayerParams ¶ms) : Layer(params) {}
|
|
|
|
static Ptr<Layer> create(const LayerParams& params)
|
|
{
|
|
return Ptr<Layer>(new UnsupportedLayer(params));
|
|
}
|
|
|
|
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
|
{
|
|
return backendId == DNN_BACKEND_OPENCV;
|
|
}
|
|
|
|
virtual void forward(cv::InputArrayOfArrays inputs, cv::OutputArrayOfArrays outputs, cv::OutputArrayOfArrays internals) CV_OVERRIDE {}
|
|
};
|
|
|
|
typedef DNNTestLayer Test_DLDT_layers;
|
|
|
|
static void test_dldt_fused_output(Backend backend, Target target)
|
|
{
|
|
static const int kNumChannels = 3;
|
|
Net net;
|
|
{
|
|
LayerParams lp;
|
|
lp.set("kernel_size", 1);
|
|
lp.set("num_output", 3);
|
|
lp.set("bias_term", false);
|
|
lp.type = "Convolution";
|
|
lp.name = "testConv";
|
|
lp.blobs.push_back(Mat({kNumChannels, 1, 1, 1}, CV_32F, Scalar(1)));
|
|
net.addLayerToPrev(lp.name, lp.type, lp);
|
|
}
|
|
{
|
|
LayerParams lp;
|
|
lp.set("bias_term", false);
|
|
lp.type = "Scale";
|
|
lp.name = "testScale";
|
|
lp.blobs.push_back(Mat({kNumChannels}, CV_32F, Scalar(1)));
|
|
net.addLayerToPrev(lp.name, lp.type, lp);
|
|
}
|
|
{
|
|
LayerParams lp;
|
|
net.addLayerToPrev("unsupported_layer", "Unsupported", lp);
|
|
}
|
|
net.setPreferableBackend(backend);
|
|
net.setPreferableTarget(target);
|
|
net.setInput(Mat({1, 1, 2, 3}, CV_32FC1, Scalar(1)));
|
|
net.forward();
|
|
}
|
|
|
|
TEST_P(Test_DLDT_layers, fused_output)
|
|
{
|
|
CV_DNN_REGISTER_LAYER_CLASS(Unsupported, UnsupportedLayer);
|
|
try
|
|
{
|
|
test_dldt_fused_output(backend, target);
|
|
}
|
|
catch (const std::exception& e)
|
|
{
|
|
ADD_FAILURE() << "Exception: " << e.what();
|
|
}
|
|
catch(...)
|
|
{
|
|
ADD_FAILURE() << "Unknown exception";
|
|
}
|
|
LayerFactory::unregisterLayer("Unsupported");
|
|
}
|
|
|
|
TEST_P(Test_DLDT_layers, multiple_networks)
|
|
{
|
|
Net nets[2];
|
|
for (int i = 0; i < 2; ++i)
|
|
{
|
|
nets[i].setInputsNames(std::vector<String>(1, format("input_%d", i)));
|
|
|
|
LayerParams lp;
|
|
lp.set("kernel_size", 1);
|
|
lp.set("num_output", 1);
|
|
lp.set("bias_term", false);
|
|
lp.type = "Convolution";
|
|
lp.name = format("testConv_%d", i);
|
|
lp.blobs.push_back(Mat({1, 1, 1, 1}, CV_32F, Scalar(1 + i)));
|
|
nets[i].addLayerToPrev(lp.name, lp.type, lp);
|
|
nets[i].setPreferableBackend(backend);
|
|
nets[i].setPreferableTarget(target);
|
|
nets[i].setInput(Mat({1, 1, 2, 3}, CV_32FC1, Scalar(1)));
|
|
}
|
|
Mat out_1 = nets[0].forward();
|
|
Mat out_2 = nets[1].forward();
|
|
// After the second model is initialized we try to receive an output from the first network again.
|
|
out_1 = nets[0].forward();
|
|
normAssert(2 * out_1, out_2);
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_DLDT_layers, dnnBackendsAndTargets());
|
|
|
|
#endif // HAVE_INF_ENGINE
|
|
|
|
// Test a custom layer.
|
|
class CustomInterpLayer CV_FINAL : public Layer
|
|
{
|
|
public:
|
|
CustomInterpLayer(const LayerParams ¶ms) : Layer(params)
|
|
{
|
|
zoomFactor = params.get<int>("zoom_factor", 0);
|
|
outWidth = params.get<int>("width", 0);
|
|
outHeight = params.get<int>("height", 0);
|
|
}
|
|
|
|
static Ptr<Layer> create(LayerParams& params)
|
|
{
|
|
return Ptr<Layer>(new CustomInterpLayer(params));
|
|
}
|
|
|
|
virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
|
|
const int requiredOutputs,
|
|
std::vector<std::vector<int> > &outputs,
|
|
std::vector<std::vector<int> > &internals) const CV_OVERRIDE
|
|
{
|
|
const int batchSize = inputs[0][0];
|
|
const int numChannels = inputs[0][1];
|
|
const int inpHeight = inputs[0][2];
|
|
const int inpWidth = inputs[0][3];
|
|
|
|
std::vector<int> outShape(4);
|
|
outShape[0] = batchSize;
|
|
outShape[1] = numChannels;
|
|
outShape[2] = outHeight != 0 ? outHeight : (inpHeight + (inpHeight - 1) * (zoomFactor - 1));
|
|
outShape[3] = outWidth != 0 ? outWidth : (inpWidth + (inpWidth - 1) * (zoomFactor - 1));
|
|
outputs.assign(1, outShape);
|
|
return false;
|
|
}
|
|
|
|
virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
|
|
{
|
|
std::vector<Mat> outputs;
|
|
outputs_arr.getMatVector(outputs);
|
|
|
|
if (!outWidth && !outHeight)
|
|
{
|
|
outHeight = outputs[0].size[2];
|
|
outWidth = outputs[0].size[3];
|
|
}
|
|
}
|
|
|
|
// Implementation of this custom layer is based on https://github.com/cdmh/deeplab-public/blob/master/src/caffe/layers/interp_layer.cpp
|
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
|
{
|
|
CV_TRACE_FUNCTION();
|
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
|
|
|
if (inputs_arr.depth() == CV_16S)
|
|
{
|
|
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
|
return;
|
|
}
|
|
|
|
std::vector<Mat> inputs, outputs;
|
|
inputs_arr.getMatVector(inputs);
|
|
outputs_arr.getMatVector(outputs);
|
|
|
|
Mat& inp = inputs[0];
|
|
Mat& out = outputs[0];
|
|
const float* inpData = (float*)inp.data;
|
|
float* outData = (float*)out.data;
|
|
|
|
const int batchSize = inp.size[0];
|
|
const int numChannels = inp.size[1];
|
|
const int inpHeight = inp.size[2];
|
|
const int inpWidth = inp.size[3];
|
|
|
|
const float rheight = (outHeight > 1) ? static_cast<float>(inpHeight - 1) / (outHeight - 1) : 0.f;
|
|
const float rwidth = (outWidth > 1) ? static_cast<float>(inpWidth - 1) / (outWidth - 1) : 0.f;
|
|
for (int h2 = 0; h2 < outHeight; ++h2)
|
|
{
|
|
const float h1r = rheight * h2;
|
|
const int h1 = h1r;
|
|
const int h1p = (h1 < inpHeight - 1) ? 1 : 0;
|
|
const float h1lambda = h1r - h1;
|
|
const float h0lambda = 1.f - h1lambda;
|
|
for (int w2 = 0; w2 < outWidth; ++w2)
|
|
{
|
|
const float w1r = rwidth * w2;
|
|
const int w1 = w1r;
|
|
const int w1p = (w1 < inpWidth - 1) ? 1 : 0;
|
|
const float w1lambda = w1r - w1;
|
|
const float w0lambda = 1.f - w1lambda;
|
|
const float* pos1 = inpData + h1 * inpWidth + w1;
|
|
float* pos2 = outData + h2 * outWidth + w2;
|
|
for (int c = 0; c < batchSize * numChannels; ++c)
|
|
{
|
|
pos2[0] =
|
|
h0lambda * (w0lambda * pos1[0] + w1lambda * pos1[w1p]) +
|
|
h1lambda * (w0lambda * pos1[h1p * inpWidth] + w1lambda * pos1[h1p * inpWidth + w1p]);
|
|
pos1 += inpWidth * inpHeight;
|
|
pos2 += outWidth * outHeight;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
private:
|
|
int outWidth, outHeight, zoomFactor;
|
|
};
|
|
|
|
#ifndef OPENCV_DNN_EXTERNAL_PROTOBUF
|
|
TEST_P(Test_Caffe_layers, Interp)
|
|
#else
|
|
TEST_P(Test_Caffe_layers, DISABLED_Interp) // requires patched protobuf (available in OpenCV source tree only)
|
|
#endif
|
|
{
|
|
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021030000)
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); // exception
|
|
#endif
|
|
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
|
|
|
|
// Test a custom layer.
|
|
CV_DNN_REGISTER_LAYER_CLASS(Interp, CustomInterpLayer);
|
|
try
|
|
{
|
|
testLayerUsingCaffeModels("layer_interp", false, false);
|
|
}
|
|
catch (...)
|
|
{
|
|
LayerFactory::unregisterLayer("Interp");
|
|
throw;
|
|
}
|
|
LayerFactory::unregisterLayer("Interp");
|
|
|
|
// Test an implemented layer.
|
|
testLayerUsingCaffeModels("layer_interp", false, false);
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_Caffe_layers, dnnBackendsAndTargets());
|
|
|
|
TEST(Layer_Test_PoolingIndices, Accuracy)
|
|
{
|
|
Net net;
|
|
|
|
LayerParams lp;
|
|
lp.set("pool", "max");
|
|
lp.set("kernel_w", 2);
|
|
lp.set("kernel_h", 2);
|
|
lp.set("stride_w", 2);
|
|
lp.set("stride_h", 2);
|
|
lp.set("pad_w", 0);
|
|
lp.set("pad_h", 0);
|
|
lp.name = "testLayer.name"; // This test also checks that OpenCV lets use names with dots.
|
|
lp.type = "Pooling";
|
|
net.addLayerToPrev(lp.name, lp.type, lp);
|
|
|
|
Mat inp(10, 10, CV_8U);
|
|
randu(inp, 0, 255);
|
|
|
|
Mat maxValues(5, 5, CV_32F, Scalar(-1)), indices(5, 5, CV_32F, Scalar(-1));
|
|
for (int y = 0; y < 10; ++y)
|
|
{
|
|
int dstY = y / 2;
|
|
for (int x = 0; x < 10; ++x)
|
|
{
|
|
int dstX = x / 2;
|
|
uint8_t val = inp.at<uint8_t>(y, x);
|
|
if ((float)inp.at<uint8_t>(y, x) > maxValues.at<float>(dstY, dstX))
|
|
{
|
|
maxValues.at<float>(dstY, dstX) = val;
|
|
indices.at<float>(dstY, dstX) = y * 10 + x;
|
|
}
|
|
}
|
|
}
|
|
net.setPreferableBackend(DNN_BACKEND_OPENCV);
|
|
net.setInput(blobFromImage(inp));
|
|
|
|
std::vector<Mat> outputs;
|
|
net.forward(outputs, lp.name);
|
|
normAssert(maxValues, outputs[0].reshape(1, 5));
|
|
normAssert(indices, outputs[1].reshape(1, 5));
|
|
}
|
|
|
|
typedef testing::TestWithParam<tuple<Vec4i, int, tuple<Backend, Target> > > Layer_Test_ShuffleChannel;
|
|
TEST_P(Layer_Test_ShuffleChannel, Accuracy)
|
|
{
|
|
Vec4i inpShapeVec = get<0>(GetParam());
|
|
int group = get<1>(GetParam());
|
|
ASSERT_EQ(inpShapeVec[1] % group, 0);
|
|
const int groupSize = inpShapeVec[1] / group;
|
|
int backendId = get<0>(get<2>(GetParam()));
|
|
int targetId = get<1>(get<2>(GetParam()));
|
|
|
|
Net net;
|
|
LayerParams lp;
|
|
lp.set("group", group);
|
|
lp.type = "ShuffleChannel";
|
|
lp.name = "testLayer";
|
|
net.addLayerToPrev(lp.name, lp.type, lp);
|
|
|
|
const int inpShape[] = {inpShapeVec[0], inpShapeVec[1], inpShapeVec[2], inpShapeVec[3]};
|
|
Mat inp(4, inpShape, CV_32F);
|
|
randu(inp, 0, 255);
|
|
|
|
net.setInput(inp);
|
|
net.setPreferableBackend(backendId);
|
|
net.setPreferableTarget(targetId);
|
|
Mat out = net.forward();
|
|
|
|
double l1 = 1e-5, lInf = 1e-4;
|
|
if (targetId == DNN_TARGET_OPENCL_FP16)
|
|
{
|
|
l1 = 5e-2;
|
|
lInf = 7e-2;
|
|
}
|
|
else if (targetId == DNN_TARGET_CUDA_FP16)
|
|
{
|
|
l1 = 0.06;
|
|
lInf = 0.07;
|
|
}
|
|
for (int n = 0; n < inpShapeVec[0]; ++n)
|
|
{
|
|
for (int c = 0; c < inpShapeVec[1]; ++c)
|
|
{
|
|
Mat outChannel = getPlane(out, n, c);
|
|
Mat inpChannel = getPlane(inp, n, groupSize * (c % group) + c / group);
|
|
normAssert(outChannel, inpChannel, "", l1, lInf);
|
|
}
|
|
}
|
|
}
|
|
INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_ShuffleChannel, Combine(
|
|
/*input shape*/ Values(Vec4i(1, 6, 5, 7), Vec4i(3, 12, 1, 4)),
|
|
/*group*/ Values(1, 2, 3, 6), dnnBackendsAndTargets(/*with IE*/ false)
|
|
));
|
|
|
|
// Check if relu is not fused to convolution if we requested it's output
|
|
TEST(Layer_Test_Convolution, relu_fusion)
|
|
{
|
|
Net net;
|
|
{
|
|
LayerParams lp;
|
|
lp.set("kernel_size", 1);
|
|
lp.set("num_output", 1);
|
|
lp.set("bias_term", false);
|
|
lp.type = "Convolution";
|
|
lp.name = "testConv";
|
|
|
|
int weightsShape[] = {1, 1, 1, 1};
|
|
Mat weights(4, &weightsShape[0], CV_32F, Scalar(1));
|
|
lp.blobs.push_back(weights);
|
|
net.addLayerToPrev(lp.name, lp.type, lp);
|
|
}
|
|
{
|
|
LayerParams lp;
|
|
lp.type = "ReLU";
|
|
lp.name = "testReLU";
|
|
net.addLayerToPrev(lp.name, lp.type, lp);
|
|
}
|
|
int sz[] = {1, 1, 2, 3};
|
|
Mat input(4, &sz[0], CV_32F);
|
|
randu(input, -1.0, -0.1);
|
|
net.setInput(input);
|
|
net.setPreferableBackend(DNN_BACKEND_OPENCV);
|
|
Mat output = net.forward("testConv");
|
|
normAssert(input, output);
|
|
}
|
|
|
|
typedef testing::TestWithParam<tuple<bool, tuple<Backend, Target> > > Layer_Test_Eltwise_unequal;
|
|
TEST_P(Layer_Test_Eltwise_unequal, accuracy_input_0_truncate)
|
|
{
|
|
bool weighted = get<0>(GetParam());
|
|
int backendId = get<0>(get<1>(GetParam()));
|
|
int targetId = get<1>(get<1>(GetParam()));
|
|
|
|
if (backendId == DNN_BACKEND_CUDA && weighted)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA);
|
|
|
|
Net net;
|
|
LayerParams lp;
|
|
lp.type = "Eltwise";
|
|
lp.name = "testLayer";
|
|
lp.set<std::string>("output_channels_mode", "input_0_truncate");
|
|
|
|
const int inpShapes[][4] = {{1, 4, 2, 2}, {1, 5, 2, 2}, {1, 3, 2, 2}};
|
|
const int out_channels = inpShapes[0][1];
|
|
std::vector<String> inpNames(3);
|
|
std::vector<Mat> inputs(3);
|
|
|
|
std::vector<float> weights(3, 1);
|
|
if (weighted)
|
|
{
|
|
for (int i = 0; i < inputs.size(); ++i)
|
|
weights[i] = -0.125f + i * 0.25f;
|
|
lp.set("coeff", DictValue::arrayReal<float*>(&weights[0], weights.size()));
|
|
}
|
|
|
|
int eltwiseId = net.addLayer(lp.name, lp.type, lp);
|
|
for (int i = 0; i < inputs.size(); ++i)
|
|
{
|
|
inputs[i].create(4, inpShapes[i], CV_32F);
|
|
size_t total = inputs[i].total();
|
|
for (size_t j = 0; j < total; j++)
|
|
inputs[i].ptr<float>()[j] = j + i * 100;
|
|
inpNames[i] = format("input_%d", i);
|
|
net.connect(0, i, eltwiseId, i);
|
|
}
|
|
Mat ref(4, inpShapes[0], CV_32F, Scalar(0));
|
|
|
|
net.setInputsNames(inpNames);
|
|
for (int i = 0; i < inputs.size(); ++i)
|
|
{
|
|
//std::cout << ref.reshape(1,1) << endl;
|
|
net.setInput(inputs[i], inpNames[i]);
|
|
for (size_t batchId = 0; batchId < ref.size[0]; batchId++)
|
|
{
|
|
int input_channels = inputs[i].size[1];
|
|
Range ranges[4] = { Range(batchId, batchId + 1), Range(0, std::min(out_channels, input_channels)), Range::all(), Range::all() };
|
|
Mat ref_slice = ref(ranges);
|
|
Mat input_slice = inputs[i](ranges);
|
|
ref_slice += weights[i] * input_slice;
|
|
}
|
|
}
|
|
|
|
net.setPreferableBackend(backendId);
|
|
net.setPreferableTarget(targetId);
|
|
Mat out = net.forward();
|
|
normAssert(out, ref);
|
|
if (testing::Test::HasFailure())
|
|
{
|
|
std::cout << out.reshape(1,1) << endl;
|
|
std::cout << ref.reshape(1,1) << endl;
|
|
}
|
|
}
|
|
|
|
TEST_P(Layer_Test_Eltwise_unequal, accuracy_input_0)
|
|
{
|
|
bool weighted = get<0>(GetParam());
|
|
int backendId = get<0>(get<1>(GetParam()));
|
|
int targetId = get<1>(get<1>(GetParam()));
|
|
|
|
Net net;
|
|
LayerParams lp;
|
|
lp.type = "Eltwise";
|
|
lp.name = "testLayer";
|
|
lp.set<std::string>("output_channels_mode", "input_0");
|
|
|
|
if (backendId == DNN_BACKEND_CUDA && weighted)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA);
|
|
|
|
const int inpShapes[][4] = {{1, 4, 2, 2}, {1, 2, 2, 2}, {1, 3, 2, 2}};
|
|
const int out_channels = inpShapes[0][1];
|
|
std::vector<String> inpNames(3);
|
|
std::vector<Mat> inputs(3);
|
|
|
|
std::vector<float> weights(3, 1);
|
|
if (weighted)
|
|
{
|
|
for (int i = 0; i < inputs.size(); ++i)
|
|
weights[i] = -0.125f + i * 0.25f;
|
|
lp.set("coeff", DictValue::arrayReal<float*>(&weights[0], weights.size()));
|
|
}
|
|
|
|
int eltwiseId = net.addLayer(lp.name, lp.type, lp);
|
|
for (int i = 0; i < inputs.size(); ++i)
|
|
{
|
|
inputs[i].create(4, inpShapes[i], CV_32F);
|
|
size_t total = inputs[i].total();
|
|
for (size_t j = 0; j < total; j++)
|
|
inputs[i].ptr<float>()[j] = j + i * 100;
|
|
inpNames[i] = format("input_%d", i);
|
|
net.connect(0, i, eltwiseId, i);
|
|
}
|
|
Mat ref(4, inpShapes[0], CV_32F, Scalar(0));
|
|
|
|
net.setInputsNames(inpNames);
|
|
for (int i = 0; i < inputs.size(); ++i)
|
|
{
|
|
//std::cout << ref.reshape(1,1) << endl;
|
|
net.setInput(inputs[i], inpNames[i]);
|
|
for (size_t batchId = 0; batchId < ref.size[0]; batchId++)
|
|
{
|
|
int input_channels = inputs[i].size[1];
|
|
Range ranges[4] = { Range(batchId, batchId + 1), Range(0, std::min(out_channels, input_channels)), Range::all(), Range::all() };
|
|
Mat ref_slice = ref(ranges);
|
|
Mat input_slice = inputs[i](ranges);
|
|
ref_slice += weights[i] * input_slice;
|
|
}
|
|
}
|
|
|
|
net.setPreferableBackend(backendId);
|
|
net.setPreferableTarget(targetId);
|
|
Mat out = net.forward();
|
|
normAssert(out, ref);
|
|
if (testing::Test::HasFailure())
|
|
{
|
|
std::cout << out.reshape(1,1) << endl;
|
|
std::cout << ref.reshape(1,1) << endl;
|
|
}
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_Eltwise_unequal, Combine(
|
|
testing::Bool(),
|
|
dnnBackendsAndTargets()
|
|
));
|
|
|
|
|
|
struct Layer_Test_Eltwise_bcast : testing::TestWithParam<tuple<string, int, tuple<Backend, Target>>>
|
|
{
|
|
public:
|
|
void test_bcast()
|
|
{
|
|
string op = get<0>(GetParam());
|
|
int dim = get<1>(GetParam());
|
|
tuple<Backend, Target> backend_target= get<2>(GetParam());
|
|
int backend = get<0>(backend_target);
|
|
int target = get<1>(backend_target);
|
|
|
|
if (backend == DNN_BACKEND_CUDA && dim > 4)
|
|
applyTestTag(CV_TEST_TAG_LONG);
|
|
|
|
vector<vector<int>> dim_shape_list;
|
|
get_all_arr(dim_shape_list, dim);
|
|
replace(dim_shape_list, 1, 3);
|
|
// same shape
|
|
for (int i = 0; i < dim_shape_list.size(); i++)
|
|
for (int j = 0; j < dim_shape_list.size(); j++)
|
|
run(dim_shape_list[i], dim_shape_list[j], op, backend, target);
|
|
|
|
vector<vector<int>> sub_shape_list;
|
|
vector<vector<int>> tmp;
|
|
for(int i = 1; i < dim; i++){
|
|
get_all_arr(tmp, i);
|
|
replace(tmp, 1, 3);
|
|
sub_shape_list.insert(sub_shape_list.end(), tmp.begin(), tmp.end());
|
|
}
|
|
|
|
// diff shape
|
|
for (const auto &shp1: dim_shape_list)
|
|
for (const auto &shp2: sub_shape_list)
|
|
run(shp1, shp2, op, backend, target);
|
|
|
|
// diff shape
|
|
for (const auto &shp1: sub_shape_list)
|
|
for (const auto &shp2: dim_shape_list)
|
|
run(shp1, shp2, op, backend, target);
|
|
}
|
|
|
|
private:
|
|
// give n to generate all n-D arrays with 0 or 1
|
|
static void get_all_arr(vector<vector<int>> &arr, int n)
|
|
{
|
|
int total = 1 << n;
|
|
arr.assign(total, vector<int>(n, -1));
|
|
for (int i = 0; i < total; i++)
|
|
for (int j = 0; j < n; j++)
|
|
arr[i][j] = (i >> (n - j - 1)) & 1;
|
|
}
|
|
|
|
// zero will replace all 0, one will replace all 1
|
|
static void replace(vector<vector<int>> &arr, int zero, int one)
|
|
{
|
|
for (int i = 0; i < arr.size(); i++)
|
|
for (int j = 0; j < arr[0].size(); j++)
|
|
arr[i][j] = arr[i][j] ? one : zero;
|
|
}
|
|
|
|
static void run(const vector<int> &a_shape, const vector<int> &b_shape, const String &op, const int backend, const int target)
|
|
{
|
|
Mat a = Mat::zeros((int) a_shape.size(), a_shape.data(), CV_32FC1);
|
|
Mat b = Mat::ones((int) b_shape.size(), b_shape.data(), CV_32FC1);
|
|
|
|
Net net;
|
|
LayerParams lp;
|
|
lp.type = "NaryEltwise";
|
|
lp.name = "testLayer";
|
|
lp.set("operation", op);
|
|
int id = net.addLayerToPrev(lp.name, lp.type, lp);
|
|
net.connect(0, 1, id, 1);
|
|
|
|
vector<String> inpNames(2);
|
|
inpNames[0] = "a";
|
|
inpNames[1] = "b";
|
|
net.setInputsNames(inpNames);
|
|
net.setInput(a, inpNames[0]);
|
|
net.setInput(b, inpNames[1]);
|
|
|
|
net.setPreferableBackend(backend);
|
|
net.setPreferableTarget(target);
|
|
|
|
Mat re;
|
|
ASSERT_NO_THROW(re = net.forward()); // runtime error
|
|
auto ptr_re = (float *) re.data;
|
|
for (int i = 0; i < re.total(); i++)
|
|
if (op == "sum"){
|
|
ASSERT_EQ(1, ptr_re[i]); // sum result should be 1
|
|
}
|
|
}
|
|
};
|
|
|
|
TEST_P(Layer_Test_Eltwise_bcast, brute_force)
|
|
{
|
|
test_bcast();
|
|
}
|
|
|
|
// This test is to verify whether the broadcast operations of unidirectional and bidirectional,
|
|
// as well as tensors with same and different shapes, can be forwarded correctly.
|
|
// This can ensure that the elementwise layer does not have any errors when forwarding.
|
|
//
|
|
// To test which cases the backend will fallback to the cpu, replace the fallback command like
|
|
// `return Ptr<BackendNode>();` in `initCUDA()` with `throw std::runtime_error("fallback");`
|
|
//
|
|
// To test more operators, add more ops after "sum".
|
|
// Default only "sum" is tested, because for the most cases they have the same implementation.
|
|
INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_Eltwise_bcast, Combine(
|
|
Values("sum"),
|
|
Values(1, 2, 3, 4, 5),
|
|
dnnBackendsAndTargets()
|
|
));
|
|
|
|
typedef testing::TestWithParam<tuple<Backend, Target> > Layer_Test_Resize;
|
|
TEST_P(Layer_Test_Resize, change_input)
|
|
{
|
|
int backendId = get<0>(GetParam());
|
|
int targetId = get<1>(GetParam());
|
|
|
|
Net net;
|
|
LayerParams lp;
|
|
lp.type = "Resize";
|
|
lp.name = "testLayer";
|
|
lp.set("zoom_factor", 2);
|
|
lp.set("interpolation", "nearest");
|
|
net.addLayerToPrev(lp.name, lp.type, lp);
|
|
|
|
for (int i = 0; i < 2; ++i)
|
|
{
|
|
Mat inp(4 + i, 5 + i, CV_8UC3), ref;
|
|
randu(inp, 0, 255);
|
|
resize(inp, ref, Size(0, 0), 2, 2, INTER_NEAREST);
|
|
ref = blobFromImage(ref);
|
|
|
|
net.setInput(blobFromImage(inp));
|
|
net.setPreferableBackend(backendId);
|
|
net.setPreferableTarget(targetId);
|
|
Mat out = net.forward();
|
|
normAssert(out, ref);
|
|
}
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_Resize, dnnBackendsAndTargets());
|
|
|
|
struct Layer_Test_Slice : public testing::TestWithParam<tuple<Backend, Target> >
|
|
{
|
|
template<int DIMS>
|
|
void test_slice(const int* inputShape, const int* begin, const int* end)
|
|
{
|
|
int backendId = get<0>(GetParam());
|
|
int targetId = get<1>(GetParam());
|
|
|
|
Mat input(DIMS, inputShape, CV_32FC1, Scalar::all(0));
|
|
for (int i = 0; i < (int)input.total(); ++i)
|
|
input.ptr<float>()[i] = (float)i;
|
|
|
|
std::vector<Range> range(DIMS);
|
|
for (int i = 0; i < DIMS; ++i)
|
|
range[i] = Range(begin[i], end[i]);
|
|
|
|
Net net;
|
|
LayerParams lp;
|
|
lp.type = "Slice";
|
|
lp.name = "testLayer";
|
|
lp.set("begin", DictValue::arrayInt<int*>((int*)&begin[0], DIMS));
|
|
lp.set("end", DictValue::arrayInt<int*>((int*)&end[0], DIMS));
|
|
net.addLayerToPrev(lp.name, lp.type, lp);
|
|
|
|
{
|
|
net.setInput(input);
|
|
net.setPreferableBackend(backendId);
|
|
net.setPreferableTarget(targetId);
|
|
Mat out = net.forward();
|
|
|
|
EXPECT_GT(cv::norm(out, NORM_INF), 0);
|
|
normAssert(out, input(range));
|
|
#if 0
|
|
cout << input(range).clone().reshape(1, 1) << endl;
|
|
cout << out.reshape(1, 1) << endl;
|
|
#endif
|
|
}
|
|
}
|
|
};
|
|
|
|
TEST_P(Layer_Test_Slice, slice_channels_17762)
|
|
{
|
|
const int inputShape[4] = {1, 16, 6, 8};
|
|
const int begin[] = {0, 4, 0, 0};
|
|
const int end[] = {1, 8, 6, 8};
|
|
test_slice<4>(inputShape, begin, end);
|
|
}
|
|
|
|
TEST_P(Layer_Test_Slice, slice_channels_with_batch_17762)
|
|
{
|
|
const int inputShape[4] = {4, 4, 3, 4};
|
|
const int begin[] = {0, 1, 0, 0};
|
|
const int end[] = {4, 3, 3, 4};
|
|
test_slice<4>(inputShape, begin, end);
|
|
}
|
|
|
|
TEST_P(Layer_Test_Slice, slice_channels_and_batch_17762)
|
|
{
|
|
int backend = get<0>(GetParam());
|
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
|
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
|
|
|
|
const int inputShape[4] = {4, 4, 3, 4};
|
|
const int begin[] = {2, 1, 0, 0};
|
|
const int end[] = {4, 3, 3, 4};
|
|
test_slice<4>(inputShape, begin, end);
|
|
}
|
|
|
|
TEST_P(Layer_Test_Slice, slice_rows)
|
|
{
|
|
const int inputShape[4] = {1, 2, 6, 4};
|
|
const int begin[] = {0, 0, 4, 0};
|
|
const int end[] = {1, 2, 6, 4};
|
|
test_slice<4>(inputShape, begin, end);
|
|
}
|
|
|
|
TEST_P(Layer_Test_Slice, slice_cols)
|
|
{
|
|
const int inputShape[4] = {1, 2, 3, 8};
|
|
const int begin[] = {0, 0, 0, 4};
|
|
const int end[] = {1, 2, 3, 8};
|
|
test_slice<4>(inputShape, begin, end);
|
|
}
|
|
|
|
|
|
TEST_P(Layer_Test_Slice, slice_complex_1_unaligned)
|
|
{
|
|
const int inputShape[4] = {1, 4, 2, 3};
|
|
const int begin[] = {0, 2, 1, 0};
|
|
const int end[] = {1, 3, 2, 2};
|
|
test_slice<4>(inputShape, begin, end);
|
|
}
|
|
|
|
TEST_P(Layer_Test_Slice, slice_complex_2_x4)
|
|
{
|
|
const int inputShape[4] = {1, 3, 2, 4};
|
|
const int begin[] = {0, 2, 1, 0};
|
|
const int end[] = {1, 3, 2, 2};
|
|
test_slice<4>(inputShape, begin, end);
|
|
}
|
|
|
|
TEST_P(Layer_Test_Slice, slice_complex_3)
|
|
{
|
|
const int inputShape[4] = {1, 6, 4, 8};
|
|
const int begin[] = {0, 2, 1, 4};
|
|
const int end[] = {1, 4, 3, 8};
|
|
test_slice<4>(inputShape, begin, end);
|
|
}
|
|
|
|
TEST_P(Layer_Test_Slice, variable_input_shape)
|
|
{
|
|
int backendId = get<0>(GetParam());
|
|
int targetId = get<1>(GetParam());
|
|
|
|
int begin[] = {0, 0, 0, 0};
|
|
int end[] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX};
|
|
|
|
Net net;
|
|
LayerParams lp;
|
|
lp.type = "Slice";
|
|
lp.name = "testLayer";
|
|
lp.set("begin", DictValue::arrayInt<int*>(&begin[0], 4));
|
|
lp.set("end", DictValue::arrayInt<int*>(&end[0], 4));
|
|
net.addLayerToPrev(lp.name, lp.type, lp);
|
|
|
|
for (int i = 0; i < 2; ++i)
|
|
{
|
|
Mat inp(4 + i, 5 + i, CV_8UC1);
|
|
randu(inp, 0, 255);
|
|
inp = blobFromImage(inp);
|
|
|
|
net.setInput(inp);
|
|
net.setPreferableBackend(backendId);
|
|
net.setPreferableTarget(targetId);
|
|
Mat out = net.forward();
|
|
|
|
normAssert(out, inp);
|
|
}
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_Slice, dnnBackendsAndTargets());
|
|
|
|
typedef testing::TestWithParam<tuple<Backend, Target> > Layer_Test_BatchNorm;
|
|
TEST_P(Layer_Test_BatchNorm, fusion)
|
|
{
|
|
// This tests reinitializes network by forwarding different batch size input.
|
|
// We check BatchNorm layer weights restoring after fusion.
|
|
int backendId = get<0>(GetParam());
|
|
int targetId = get<1>(GetParam());
|
|
const int ch = 4;
|
|
|
|
Mat mean(1, ch, CV_32F), var(1, ch, CV_32F), weights(1, ch, CV_32F);
|
|
randu(mean, 0, 1);
|
|
randu(var, 0, 1);
|
|
randu(weights, 0, 1);
|
|
|
|
Net net;
|
|
{
|
|
LayerParams lp;
|
|
lp.type = "BatchNorm";
|
|
lp.name = "bn";
|
|
lp.set("has_weight", false);
|
|
lp.set("has_bias", false);
|
|
lp.blobs.push_back(mean);
|
|
lp.blobs.push_back(var);
|
|
net.addLayerToPrev(lp.name, lp.type, lp);
|
|
}
|
|
{
|
|
LayerParams lp;
|
|
lp.type = "Scale";
|
|
lp.name = "scale";
|
|
lp.set("has_bias", false);
|
|
lp.blobs.push_back(weights);
|
|
net.addLayerToPrev(lp.name, lp.type, lp);
|
|
}
|
|
|
|
Mat inp(4, 5, CV_32FC(ch));
|
|
randu(inp, 0, 1);
|
|
|
|
net.setPreferableBackend(backendId);
|
|
net.setPreferableTarget(targetId);
|
|
|
|
net.setInput(blobFromImage(inp));
|
|
Mat ref = net.forward();
|
|
|
|
net.setInput(blobFromImages(std::vector<Mat>(2, inp)));
|
|
Mat out = net.forward();
|
|
|
|
for (int i = 0; i < 2; ++i)
|
|
{
|
|
std::vector<Range> ranges(4, Range::all());
|
|
ranges[0].start = i;
|
|
ranges[0].end = i + 1;
|
|
normAssert(out(ranges), ref);
|
|
}
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_BatchNorm, dnnBackendsAndTargets());
|
|
|
|
class TestLayerFusion : public DNNTestLayer {
|
|
public:
|
|
static void makeDefaultTestConvolutionLayer(LayerParams& convParams, int in_channels, int num_filters, bool bias_term)
|
|
{
|
|
const int kernel_h = 3, kernel_w = 3;
|
|
const int pad_h = kernel_h / 2, pad_w = kernel_w / 2;
|
|
|
|
convParams.set("kernel_h", kernel_h);
|
|
convParams.set("kernel_w", kernel_w);
|
|
convParams.set("pad_h", pad_h);
|
|
convParams.set("pad_w", pad_w);
|
|
convParams.set("num_output", num_filters);
|
|
convParams.set("bias_term", bias_term);
|
|
convParams.type = "Convolution";
|
|
convParams.name = "convolution";
|
|
|
|
float conv_init_magnitude = 1.0f / in_channels / kernel_h / kernel_w;
|
|
int weightsShape[] = {num_filters, in_channels, kernel_h, kernel_w};
|
|
Mat weights(4, &weightsShape[0], CV_32F);
|
|
randu(weights, -conv_init_magnitude, conv_init_magnitude);
|
|
convParams.blobs.push_back(weights);
|
|
if (bias_term)
|
|
{
|
|
Mat bias(1, num_filters, CV_32F);
|
|
randu(bias, -1.0f, 1.0f);
|
|
convParams.blobs.push_back(bias);
|
|
}
|
|
}
|
|
|
|
static void makeDefaultTestActivationLayer(LayerParams& activationParams, const std::string& type, int in_channels)
|
|
{
|
|
activationParams.type = type;
|
|
activationParams.name = "activation";
|
|
if (activationParams.type == "ReLU")
|
|
activationParams.set("negative_slope", 0.1f);
|
|
else if (activationParams.type == "Power")
|
|
{
|
|
activationParams.set("power", 2.0f);
|
|
activationParams.set("scale", 0.5f);
|
|
activationParams.set("shift", 0.3f);
|
|
}
|
|
else if (activationParams.type == "ReLU6")
|
|
{
|
|
activationParams.set("min_value", -1.0f);
|
|
activationParams.set("max_value", 1.0f);
|
|
}
|
|
else if (activationParams.type == "ChannelsPReLU")
|
|
{
|
|
Mat scales(1, in_channels, CV_32F);
|
|
randu(scales, -1.0f, 1.0f);
|
|
activationParams.blobs.push_back(scales);
|
|
}
|
|
else if (activationParams.type == "Exp")
|
|
{
|
|
activationParams.set("base", -1.0f);
|
|
activationParams.set("scale", 0.3f);
|
|
activationParams.set("shift", 0.6f);
|
|
}
|
|
}
|
|
|
|
static void makeDefaultTestEltwiseLayer(LayerParams& eltwiseParams, const std::string& op, bool withCoefficients)
|
|
{
|
|
eltwiseParams.type = "Eltwise";
|
|
eltwiseParams.name = "eltwise";
|
|
eltwiseParams.set("operation", op);
|
|
if (withCoefficients)
|
|
{
|
|
float coeff[] = {0.3f, 0.5f};
|
|
eltwiseParams.set("coeff", DictValue::arrayReal<float*>(coeff, 2));
|
|
}
|
|
}
|
|
|
|
static void test(Mat& input, Net& net, Backend backendId, Target targetId, std::vector<int> expectedFusedLayers = std::vector<int>(), double l1 = 0.0, double lInf = 0.0)
|
|
{
|
|
DNNTestLayer::checkBackend(backendId, targetId);
|
|
|
|
net.enableFusion(false);
|
|
net.setPreferableBackend(DNN_BACKEND_OPENCV);
|
|
net.setPreferableTarget(DNN_TARGET_CPU);
|
|
net.setInput(input);
|
|
Mat outputReference = net.forward().clone();
|
|
std::vector<double> refTimings;
|
|
net.getPerfProfile(refTimings);
|
|
for (int i = 0; i < refTimings.size(); i++)
|
|
{
|
|
CV_Assert(refTimings[i] != 0.0);
|
|
}
|
|
|
|
net.enableFusion(true);
|
|
net.setPreferableBackend(backendId);
|
|
net.setPreferableTarget(targetId);
|
|
net.setInput(input);
|
|
Mat outputTest = net.forward().clone();
|
|
std::vector<double> testTimings;
|
|
net.getPerfProfile(testTimings);
|
|
for (int i = 0; i < testTimings.size(); i++)
|
|
{
|
|
if(std::find(expectedFusedLayers.begin(), expectedFusedLayers.end(), i + 1) != expectedFusedLayers.end())
|
|
{
|
|
EXPECT_EQ(testTimings[i], 0.0);
|
|
}
|
|
else
|
|
{
|
|
EXPECT_NE(testTimings[i], 0.0);
|
|
}
|
|
}
|
|
|
|
// double ref_max_value, ref_min_value;
|
|
// minMaxLoc(outputReference.reshape(1, 1), &ref_min_value, &ref_max_value);
|
|
// std::cout << "reference range: " << ref_min_value << ' ' << ref_max_value << std::endl;
|
|
|
|
double default_l1, default_lInf;
|
|
DNNTestLayer::getDefaultThresholds(backendId, targetId, &default_l1, &default_lInf);
|
|
if (l1 == 0.0)
|
|
l1 = default_l1;
|
|
if (lInf == 0.0)
|
|
lInf = default_lInf;
|
|
normAssert(outputReference, outputTest, "", l1, lInf);
|
|
}
|
|
|
|
static testing::internal::ParamGenerator<std::string> eltwiseOpList()
|
|
{
|
|
// TODO: automate list generation
|
|
return Values("sum", "max", "min", "prod", "div");
|
|
}
|
|
|
|
static testing::internal::ParamGenerator<std::string> activationLayersList()
|
|
{
|
|
// TODO: automate list generation
|
|
return Values("ReLU", "ReLU6", "ChannelsPReLU", "TanH", "Swish", "Mish", "Sigmoid", "ELU", "AbsVal", "BNLL", "Power", "Exp");
|
|
}
|
|
|
|
static testing::internal::ParamGenerator<tuple<Backend, Target> > dnnBackendsAndTargetsForFusionTests()
|
|
{
|
|
return dnnBackendsAndTargets(false, false, true, false, true, false); // OCV OpenCL + OCV CPU + CUDA
|
|
}
|
|
};
|
|
|
|
typedef TestWithParam<tuple<bool, std::string, tuple<Backend, Target> > > ConvolutionActivationFusion;
|
|
TEST_P(ConvolutionActivationFusion, Accuracy)
|
|
{
|
|
// input
|
|
// |
|
|
// -----------------------
|
|
// | convolution |
|
|
// -----------------------
|
|
// |
|
|
// -----------------------
|
|
// | activation |
|
|
// -----------------------
|
|
// |
|
|
// output
|
|
|
|
const int batch_size = 2, in_channels = 16;
|
|
const int in_height = 16, in_width = 16;
|
|
int inputShape[] = {batch_size, in_channels, in_height, in_width};
|
|
Mat input(4, &inputShape[0], CV_32F);
|
|
randu(input, 1.0f, 2.0f);
|
|
|
|
bool bias_term = get<0>(GetParam());
|
|
LayerParams convParams;
|
|
TestLayerFusion::makeDefaultTestConvolutionLayer(convParams, in_channels, in_channels, bias_term);
|
|
|
|
std::string actType = get<1>(GetParam());
|
|
LayerParams activationParams;
|
|
TestLayerFusion::makeDefaultTestActivationLayer(activationParams, actType, in_channels);
|
|
|
|
Backend backendId = get<0>(get<2>(GetParam()));
|
|
Target targetId = get<1>(get<2>(GetParam()));
|
|
|
|
Net net;
|
|
int convId = net.addLayer(convParams.name, convParams.type, convParams);
|
|
int activId = net.addLayerToPrev(activationParams.name, activationParams.type, activationParams);
|
|
net.connect(0, 0, convId, 0);
|
|
|
|
std::vector<int> expectedFusedLayers;
|
|
if (backendId == DNN_BACKEND_OPENCV)
|
|
{
|
|
if (targetId == DNN_TARGET_CPU || targetId == DNN_TARGET_CPU_FP16)
|
|
expectedFusedLayers.push_back(activId); // all activations are fused
|
|
else if (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16)
|
|
{
|
|
if (actType == "ReLU" || actType == "ChannelsPReLU" || actType == "ReLU6" || actType == "TanH" /*|| actType == "Power"*/)
|
|
expectedFusedLayers.push_back(activId);
|
|
}
|
|
}
|
|
else if (backendId == DNN_BACKEND_CUDA)
|
|
{
|
|
if (actType == "ReLU" || actType == "ReLU6" || actType == "TanH" || actType == "Swish" ||
|
|
actType == "Mish" || actType == "Sigmoid" || actType == "Power")
|
|
expectedFusedLayers.push_back(activId);
|
|
}
|
|
TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers);
|
|
}
|
|
INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionActivationFusion, Combine(
|
|
/* bias */ testing::Bool(),
|
|
/* activation */ TestLayerFusion::activationLayersList(),
|
|
TestLayerFusion::dnnBackendsAndTargetsForFusionTests()
|
|
));
|
|
|
|
typedef TestWithParam<tuple<bool, std::string, bool, tuple<Backend, Target> > > ConvolutionEltwiseFusion;
|
|
TEST_P(ConvolutionEltwiseFusion, Accuracy)
|
|
{
|
|
// input
|
|
// |
|
|
// -------------------------------
|
|
// | |
|
|
// | ---------------
|
|
// | | convolution |
|
|
// | ---------------
|
|
// | |
|
|
// | ---------------- |
|
|
// --------| eltwise op |-------
|
|
// ----------------
|
|
// |
|
|
// output
|
|
|
|
const int batch_size = 2, in_channels = 16;
|
|
const int in_height = 16, in_width = 16;
|
|
int inputShape[] = {batch_size, in_channels, in_height, in_width};
|
|
Mat input(4, &inputShape[0], CV_32F);
|
|
randu(input, 1.0f, 2.0f); // avoid small values to test eltwise div
|
|
|
|
bool bias_term = get<0>(GetParam());
|
|
LayerParams convParams;
|
|
TestLayerFusion::makeDefaultTestConvolutionLayer(convParams, in_channels, in_channels, bias_term);
|
|
|
|
std::string eltwiseOp = get<1>(GetParam());
|
|
bool weightedEltwise = get<2>(GetParam());
|
|
if (eltwiseOp != "sum" && weightedEltwise)
|
|
throw SkipTestException("weighted eltwise not supported");
|
|
LayerParams eltwiseParams;
|
|
TestLayerFusion::makeDefaultTestEltwiseLayer(eltwiseParams, eltwiseOp, weightedEltwise);
|
|
|
|
Net net;
|
|
int convId = net.addLayer(convParams.name, convParams.type, convParams);
|
|
int eltwiseId = net.addLayer(eltwiseParams.name, eltwiseParams.type, eltwiseParams);
|
|
net.connect(0, 0, convId, 0);
|
|
net.connect(convId, 0, eltwiseId, 0);
|
|
net.connect(0, 0, eltwiseId, 1);
|
|
|
|
Backend backendId = get<0>(get<3>(GetParam()));
|
|
Target targetId = get<1>(get<3>(GetParam()));
|
|
|
|
std::vector<int> expectedFusedLayers;
|
|
if (backendId == DNN_BACKEND_CUDA && eltwiseOp == "sum" && !weightedEltwise)
|
|
expectedFusedLayers.push_back(eltwiseId);
|
|
TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers);
|
|
}
|
|
INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionEltwiseFusion, Combine(
|
|
/* bias */ testing::Bool(),
|
|
/* eltwise op */ TestLayerFusion::eltwiseOpList(),
|
|
/* eltwise weighted */ testing::Bool(),
|
|
TestLayerFusion::dnnBackendsAndTargetsForFusionTests()
|
|
));
|
|
|
|
typedef TestWithParam<tuple<bool, std::string, bool, std::string, tuple<Backend, Target> > > ConvolutionEltwiseActivationFusion;
|
|
TEST_P(ConvolutionEltwiseActivationFusion, Accuracy)
|
|
{
|
|
// input
|
|
// |
|
|
// -------------------------------
|
|
// | |
|
|
// | ---------------
|
|
// | | convolution |
|
|
// | ---------------
|
|
// | |
|
|
// | ---------------- |
|
|
// --------| eltwise op |-------
|
|
// ----------------
|
|
// |
|
|
// ----------------
|
|
// | activation |
|
|
// ----------------
|
|
// |
|
|
// output
|
|
|
|
const int batch_size = 2, in_channels = 16;
|
|
const int in_height = 16, in_width = 16;
|
|
int inputShape[] = {batch_size, in_channels, in_height, in_width};
|
|
Mat input(4, &inputShape[0], CV_32F);
|
|
randu(input, 1.0f, 2.0f); // avoid small values to test eltwise div
|
|
|
|
bool bias_term = get<0>(GetParam());
|
|
LayerParams convParams;
|
|
TestLayerFusion::makeDefaultTestConvolutionLayer(convParams, in_channels, in_channels, bias_term);
|
|
|
|
std::string eltwiseOp = get<1>(GetParam());
|
|
bool weightedEltwise = get<2>(GetParam());
|
|
if (eltwiseOp != "sum" && weightedEltwise)
|
|
throw SkipTestException("weighted eltwise not supported");
|
|
LayerParams eltwiseParams;
|
|
TestLayerFusion::makeDefaultTestEltwiseLayer(eltwiseParams, eltwiseOp, weightedEltwise);
|
|
|
|
std::string actType = get<3>(GetParam());
|
|
LayerParams activationParams;
|
|
TestLayerFusion::makeDefaultTestActivationLayer(activationParams, actType, in_channels);
|
|
|
|
Backend backendId = get<0>(get<4>(GetParam()));
|
|
Target targetId = get<1>(get<4>(GetParam()));
|
|
|
|
Net net;
|
|
int convId = net.addLayer(convParams.name, convParams.type, convParams);
|
|
int eltwiseId = net.addLayer(eltwiseParams.name, eltwiseParams.type, eltwiseParams);
|
|
int activId = net.addLayer(activationParams.name, activationParams.type, activationParams);
|
|
net.connect(0, 0, convId, 0);
|
|
net.connect(convId, 0, eltwiseId, 0);
|
|
net.connect(0, 0, eltwiseId, 1);
|
|
net.connect(eltwiseId, 0, activId, 0);
|
|
|
|
std::vector<int> expectedFusedLayers;
|
|
if (backendId == DNN_BACKEND_OPENCV)
|
|
{
|
|
if (targetId == DNN_TARGET_CPU || targetId == DNN_TARGET_CPU_FP16)
|
|
expectedFusedLayers.push_back(activId); // activation is fused with eltwise layer
|
|
else if (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16)
|
|
{
|
|
if (eltwiseOp == "sum" && !weightedEltwise &&
|
|
(actType == "ReLU" || actType == "ChannelsPReLU" /*|| actType == "Power"*/)
|
|
)
|
|
{
|
|
expectedFusedLayers.push_back(eltwiseId);
|
|
expectedFusedLayers.push_back(activId);
|
|
}
|
|
}
|
|
}
|
|
else if(backendId == DNN_BACKEND_CUDA)
|
|
{
|
|
if (eltwiseOp == "sum" && !weightedEltwise)
|
|
{
|
|
expectedFusedLayers.push_back(eltwiseId);
|
|
if (actType == "ReLU" || actType == "ReLU6" || actType == "TanH" || actType == "Swish" ||
|
|
actType == "Mish" || actType == "Sigmoid" || actType == "Power")
|
|
expectedFusedLayers.push_back(activId);
|
|
}
|
|
}
|
|
TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers);
|
|
}
|
|
INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionEltwiseActivationFusion, Combine(
|
|
/* bias */ testing::Bool(),
|
|
/* eltwise op */ TestLayerFusion::eltwiseOpList(),
|
|
/* eltwise weighted */ testing::Bool(),
|
|
/* activation */ TestLayerFusion::activationLayersList(),
|
|
TestLayerFusion::dnnBackendsAndTargetsForFusionTests()
|
|
));
|
|
|
|
typedef TestWithParam<tuple<bool, std::string, std::string, bool, tuple<Backend, Target> > > ConvolutionActivationEltwiseFusion;
|
|
TEST_P(ConvolutionActivationEltwiseFusion, Accuracy)
|
|
{
|
|
// input
|
|
// |
|
|
// -------------------------------
|
|
// | |
|
|
// | ----------------
|
|
// | | convolution |
|
|
// | ----------------
|
|
// | |
|
|
// | ----------------
|
|
// | | activation |
|
|
// | ----------------
|
|
// | |
|
|
// | ---------------- |
|
|
// --------| eltwise sum |-------
|
|
// ----------------
|
|
// |
|
|
|
|
const int batch_size = 2, in_channels = 16;
|
|
const int in_height = 16, in_width = 16;
|
|
int inputShape[] = {batch_size, in_channels, in_height, in_width};
|
|
Mat input(4, &inputShape[0], CV_32F);
|
|
randu(input, 1.0f, 2.0f); // avoid small values to test eltwise div
|
|
|
|
bool bias_term = get<0>(GetParam());
|
|
LayerParams convParams;
|
|
TestLayerFusion::makeDefaultTestConvolutionLayer(convParams, in_channels, in_channels, bias_term);
|
|
|
|
std::string actType = get<1>(GetParam());
|
|
LayerParams activationParams;
|
|
TestLayerFusion::makeDefaultTestActivationLayer(activationParams, actType, in_channels);
|
|
|
|
std::string eltwiseOp = get<2>(GetParam());
|
|
bool weightedEltwise = get<3>(GetParam());
|
|
if (eltwiseOp != "sum" && weightedEltwise)
|
|
throw SkipTestException("weighted eltwise not supported");
|
|
LayerParams eltwiseParams;
|
|
TestLayerFusion::makeDefaultTestEltwiseLayer(eltwiseParams, eltwiseOp, weightedEltwise);
|
|
|
|
Backend backendId = get<0>(get<4>(GetParam()));
|
|
Target targetId = get<1>(get<4>(GetParam()));
|
|
|
|
Net net;
|
|
int convId = net.addLayer(convParams.name, convParams.type, convParams);
|
|
int activId = net.addLayer(activationParams.name, activationParams.type, activationParams);
|
|
int eltwiseId = net.addLayer(eltwiseParams.name, eltwiseParams.type, eltwiseParams);
|
|
net.connect(0, 0, convId, 0);
|
|
net.connect(convId, 0, activId, 0);
|
|
net.connect(activId, 0, eltwiseId, 0);
|
|
net.connect(0, 0, eltwiseId, 1);
|
|
|
|
std::vector<int> expectedFusedLayers;
|
|
if (backendId == DNN_BACKEND_OPENCV)
|
|
{
|
|
if (targetId == DNN_TARGET_CPU || targetId == DNN_TARGET_CPU_FP16)
|
|
expectedFusedLayers.push_back(activId); // activation fused with convolution
|
|
else if (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16)
|
|
{
|
|
if (actType == "ReLU" || actType == "ChannelsPReLU" || actType == "ReLU6" || actType == "TanH" /*|| actType == "Power"*/)
|
|
expectedFusedLayers.push_back(activId); // activation fused with convolution
|
|
}
|
|
}
|
|
else if(backendId == DNN_BACKEND_CUDA)
|
|
{
|
|
if (actType == "ReLU" || actType == "ReLU6" || actType == "TanH" || actType == "Swish" ||
|
|
actType == "Mish" || actType == "Sigmoid" || actType == "Power")
|
|
{
|
|
expectedFusedLayers.push_back(activId);
|
|
if (eltwiseOp == "sum" && !weightedEltwise)
|
|
expectedFusedLayers.push_back(eltwiseId);
|
|
}
|
|
}
|
|
TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers);
|
|
}
|
|
INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionActivationEltwiseFusion, Combine(
|
|
/* bias */ testing::Bool(),
|
|
/* activation */ TestLayerFusion::activationLayersList(),
|
|
/* eltwise op */ TestLayerFusion::eltwiseOpList(),
|
|
/* eltwise weighted */ testing::Bool(),
|
|
TestLayerFusion::dnnBackendsAndTargetsForFusionTests()
|
|
));
|
|
|
|
}} // namespace
|