From 1aacb9bb1513315267b42ecdc1bb05eb3b38a38b Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 10 Sep 2021 12:42:28 +0000 Subject: [PATCH 01/12] dnn(perf): update convolution tests --- modules/dnn/perf/perf_convolution.cpp | 232 +++++++++++++++++++++++++- 1 file changed, 226 insertions(+), 6 deletions(-) diff --git a/modules/dnn/perf/perf_convolution.cpp b/modules/dnn/perf/perf_convolution.cpp index c2a3a66ab9..bb890c6a00 100644 --- a/modules/dnn/perf/perf_convolution.cpp +++ b/modules/dnn/perf/perf_convolution.cpp @@ -26,25 +26,90 @@ struct ConvParam_t { double declared_flops; }; // Details: #12142 +// Last update: 2021-09 static const ConvParam_t testConvolutionConfigs[] = { + /* GFLOPS 3.398 x 20 = 67.956 */ {{7, 7}, {{1, 128, 46, 46}}, 128, 1, {1, 1}, {1, 1}, {3, 3}, {0, 0}, "", true, 3397788160.}, + /* GFLOPS 16.987 x 3 = 50.962 */ {{5, 5}, {{1, 1152, 16, 16}}, 1152, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 16987226112.}, + /* GFLOPS 23.122 x 2 = 46.244 */ {{5, 5}, {{1, 672, 32, 32}}, 672, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 23121788928.}, + /* GFLOPS 9.987 x 3 = 29.960 */ {{3, 3}, {{1, 256, 92, 92}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 9986707456.}, + /* GFLOPS 1.595 x 16 = 25.524 */ {{3, 3}, {{1, 256, 26, 26}}, 512, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 1595230208.}, + /* GFLOPS 4.566 x 5 = 22.828 */ {{7, 7}, {{1, 172, 46, 46}}, 128, 1, {1, 1}, {1, 1}, {3, 3}, {0, 0}, "", true, 4565684736.}, + /* GFLOPS 1.596 x 14 = 22.338 */ {{3, 3}, {{1, 128, 52, 52}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 1595576320.}, + /* GFLOPS 1.595 x 12 = 19.141 */ {{3, 3}, {{1, 512, 13, 13}}, 1024, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 1595057152.}, + /* GFLOPS 6.814 x 2 = 13.629 */ {{3, 3}, {{1, 512, 38, 38}}, 512, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 6814386176.}, + /* GFLOPS 6.637 x 2 = 13.274 */ {{3, 3}, {{1, 256, 75, 75}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 6636960000.}, + /* GFLOPS 11.797 x 1 = 11.797 */ {{5, 5}, {{1, 240, 64, 64}}, 240, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 11797463040.}, + /* GFLOPS 11.797 x 1 = 11.797 */ {{5, 5}, {{1, 480, 32, 32}}, 480, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 11796971520.}, + /* GFLOPS 10.701 x 1 = 10.701 */ {{3, 3}, {{1, 512, 38, 38}}, 804, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 10700715792.}, /* GFLOPS 10.087 x 1 = 10.087 */ {{3, 3}, {{1, 576, 38, 50}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 10086963200.}, + /* GFLOPS 9.993 x 1 = 9.993 */ {{3, 3}, {{1, 64, 368, 368}}, 64, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 9993207808.}, + /* GFLOPS 9.989 x 1 = 9.989 */ {{3, 3}, {{1, 128, 184, 184}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 9988874240.}, + /* GFLOPS 9.986 x 1 = 9.986 */ {{3, 3}, {{1, 512, 46, 46}}, 512, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 9985624064.}, /* GFLOPS 1.704 x 5 = 8.518 */ {{3, 3}, {{1, 512, 19, 19}}, 512, 512, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 1703596544.}, /* GFLOPS 1.704 x 5 = 8.518 */ {{3, 3}, {{1, 512, 19, 19}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1703596544.}, + /* GFLOPS 4.247 x 2 = 8.494 */ {{3, 3}, {{1, 480, 32, 32}}, 480, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 4247224320.}, + /* GFLOPS 8.025 x 1 = 8.025 */ {{3, 3}, {{1, 1024, 19, 19}}, 1206, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 8025101478.}, + /* GFLOPS 0.798 x 9 = 7.180 */ {{3, 3}, {{1, 128, 52, 52}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 797788160.}, + /* GFLOPS 0.798 x 9 = 7.179 */ {{3, 3}, {{1, 256, 26, 26}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 797615104.}, + /* GFLOPS 6.641 x 1 = 6.641 */ {{3, 3}, {{1, 64, 300, 300}}, 64, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 6641280000.}, /* GFLOPS 6.641 x 1 = 6.641 */ {{3, 3}, {{1, 64, 150, 200}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 6641280000.}, + /* GFLOPS 6.638 x 1 = 6.638 */ {{3, 3}, {{1, 128, 150, 150}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 6638400000.}, + /* GFLOPS 6.118 x 1 = 6.118 */ {{3, 3}, {{1, 144, 128, 128}}, 144, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 6117654528.}, + /* GFLOPS 6.116 x 1 = 6.116 */ {{3, 3}, {{1, 1152, 16, 16}}, 1152, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 6115590144.}, + /* GFLOPS 5.780 x 1 = 5.780 */ {{5, 5}, {{1, 672, 32, 32}}, 672, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 5780447232.}, + /* GFLOPS 1.704 x 3 = 5.111 */ {{3, 3}, {{1, 512, 19, 19}}, 512, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 1703596544.}, + /* GFLOPS 4.997 x 1 = 4.997 */ {{3, 3}, {{1, 64, 184, 184}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 4996603904.}, + /* GFLOPS 4.994 x 1 = 4.994 */ {{3, 3}, {{1, 128, 92, 92}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 4994437120.}, + /* GFLOPS 4.993 x 1 = 4.993 */ {{3, 3}, {{1, 256, 46, 46}}, 512, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 4993353728.}, + /* GFLOPS 4.993 x 1 = 4.993 */ {{3, 3}, {{1, 512, 46, 46}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 4992812032.}, /* GFLOPS 1.659 x 3 = 4.977 */ {{3, 3}, {{1, 960, 10, 10}}, 960, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1658976000.}, /* GFLOPS 2.156 x 2 = 4.312 */ {{3, 3}, {{1, 576, 19, 19}}, 576, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 2156088384.}, + /* GFLOPS 4.247 x 1 = 4.247 */ {{5, 5}, {{1, 144, 128, 128}}, 144, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 4247322624.}, + /* GFLOPS 0.798 x 5 = 3.988 */ {{3, 3}, {{1, 512, 13, 13}}, 512, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 797528576.}, /* GFLOPS 0.958 x 4 = 3.833 */ {{3, 3}, {{1, 384, 19, 19}}, 384, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 958307712.}, + /* GFLOPS 0.624 x 6 = 3.746 */ {{3, 3}, {{1, 128, 46, 46}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 624304640.}, + /* GFLOPS 3.408 x 1 = 3.408 */ {{3, 3}, {{1, 256, 38, 38}}, 512, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 3407562752.}, + /* GFLOPS 3.407 x 1 = 3.407 */ {{3, 3}, {{1, 512, 19, 19}}, 1024, 1, {1, 1}, {6, 6}, {6, 6}, {0, 0}, "", true, 3407193088.}, + /* GFLOPS 0.177 x 19 = 3.370 */ {{1, 1}, {{1, 512, 26, 26}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 177382400.}, + /* GFLOPS 0.302 x 11 = 3.325 */ {{3, 3}, {{1, 64, 64, 64}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 302252032.}, + /* GFLOPS 3.321 x 1 = 3.321 */ {{3, 3}, {{1, 64, 150, 150}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 3320640000.}, /* GFLOPS 0.830 x 4 = 3.321 */ {{3, 3}, {{1, 64, 75, 100}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 830160000.}, + /* GFLOPS 3.319 x 1 = 3.319 */ {{3, 3}, {{1, 128, 75, 75}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 3319200000.}, + /* GFLOPS 1.598 x 2 = 3.195 */ {{3, 3}, {{1, 32, 416, 416}}, 64, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 1597652992.}, + /* GFLOPS 1.598 x 2 = 3.195 */ {{3, 3}, {{1, 32, 208, 208}}, 64, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 1597652992.}, + /* GFLOPS 1.596 x 2 = 3.193 */ {{3, 3}, {{1, 64, 208, 208}}, 128, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 1596268544.}, + /* GFLOPS 1.596 x 2 = 3.193 */ {{3, 3}, {{1, 64, 104, 104}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 1596268544.}, + /* GFLOPS 1.596 x 2 = 3.191 */ {{3, 3}, {{1, 128, 104, 104}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 1595576320.}, + /* GFLOPS 1.595 x 2 = 3.190 */ {{3, 3}, {{1, 256, 52, 52}}, 512, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 1595230208.}, + /* GFLOPS 1.595 x 2 = 3.190 */ {{3, 3}, {{1, 512, 26, 26}}, 1024, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 1595057152.}, + /* GFLOPS 0.178 x 16 = 2.841 */ {{1, 1}, {{1, 256, 52, 52}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 177555456.}, + /* GFLOPS 2.719 x 1 = 2.719 */ {{3, 3}, {{1, 96, 256, 256}}, 96, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 2719481856.}, + /* GFLOPS 0.177 x 15 = 2.659 */ {{1, 1}, {{1, 1024, 13, 13}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 177295872.}, /* GFLOPS 1.245 x 2 = 2.490 */ {{3, 3}, {{1, 96, 75, 100}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1244880000.}, + /* GFLOPS 0.798 x 3 = 2.394 */ {{3, 3}, {{1, 64, 104, 104}}, 64, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 798134272.}, + /* GFLOPS 0.472 x 5 = 2.360 */ {{3, 3}, {{1, 256, 20, 20}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 471961600.}, + /* GFLOPS 2.255 x 1 = 2.255 */ {{3, 3}, {{1, 128, 80, 100}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 2255285760.}, + /* GFLOPS 2.153 x 1 = 2.153 */ {{3, 3}, {{1, 128, 78, 98}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 2152611840.}, /* GFLOPS 2.100 x 1 = 2.100 */ {{3, 3}, {{1, 144, 75, 75}}, 144, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 2100330000.}, + /* GFLOPS 2.052 x 1 = 2.052 */ {{3, 3}, {{1, 128, 76, 96}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 2052298240.}, /* GFLOPS 1.022 x 2 = 2.044 */ {{3, 3}, {{1, 576, 19, 19}}, 273, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1021896057.}, + /* GFLOPS 1.995 x 1 = 1.995 */ {{9, 9}, {{1, 3, 320, 400}}, 32, 1, {1, 1}, {1, 1}, {4, 4}, {0, 0}, "", true, 1994752000.}, + /* GFLOPS 1.954 x 1 = 1.954 */ {{3, 3}, {{1, 128, 74, 94}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1954344960.}, /* GFLOPS 0.958 x 2 = 1.917 */ {{3, 3}, {{1, 192, 38, 38}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 958446336.}, /* GFLOPS 1.888 x 1 = 1.888 */ {{3, 3}, {{1, 1024, 10, 10}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1887539200.}, /* GFLOPS 1.888 x 1 = 1.888 */ {{3, 3}, {{1, 1024, 10, 10}}, 1024, 1024, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 1887539200.}, + /* GFLOPS 1.859 x 1 = 1.859 */ {{3, 3}, {{1, 128, 72, 92}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1858752000.}, + /* GFLOPS 1.766 x 1 = 1.766 */ {{3, 3}, {{1, 128, 70, 90}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1765519360.}, /* GFLOPS 1.704 x 1 = 1.704 */ {{3, 3}, {{1, 256, 38, 38}}, 256, 256, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 1703781376.}, /* GFLOPS 1.704 x 1 = 1.704 */ {{3, 3}, {{1, 256, 38, 38}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1703781376.}, + /* GFLOPS 1.675 x 1 = 1.675 */ {{3, 3}, {{1, 128, 68, 88}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1674647040.}, /* GFLOPS 1.660 x 1 = 1.660 */ {{3, 3}, {{1, 128, 75, 75}}, 128, 128, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 1659600000.}, /* GFLOPS 1.660 x 1 = 1.660 */ {{3, 3}, {{1, 128, 75, 75}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1659600000.}, + /* GFLOPS 1.586 x 1 = 1.586 */ {{3, 3}, {{1, 128, 66, 86}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1586135040.}, + /* GFLOPS 1.500 x 1 = 1.500 */ {{3, 3}, {{1, 128, 64, 84}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1499983360.}, + /* GFLOPS 1.416 x 1 = 1.416 */ {{3, 3}, {{1, 128, 62, 82}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1416192000.}, + /* GFLOPS 0.472 x 3 = 1.416 */ {{3, 3}, {{1, 128, 40, 40}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 472064000.}, + /* GFLOPS 0.472 x 3 = 1.416 */ {{3, 3}, {{1, 512, 10, 10}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 471910400.}, /* GFLOPS 0.280 x 5 = 1.402 */ {{1, 1}, {{1, 576, 38, 50}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 280409600.}, /* GFLOPS 0.701 x 2 = 1.401 */ {{3, 3}, {{1, 128, 38, 50}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 700720000.}, /* GFLOPS 0.231 x 6 = 1.388 */ {{3, 3}, {{1, 128, 56, 56}}, 32, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 231311360.}, @@ -53,20 +118,39 @@ static const ConvParam_t testConvolutionConfigs[] = { /* GFLOPS 0.420 x 3 = 1.261 */ {{3, 3}, {{1, 96, 38, 50}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 420492800.}, /* GFLOPS 1.261 x 1 = 1.261 */ {{3, 3}, {{1, 192, 38, 50}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1261113600.}, /* GFLOPS 1.258 x 1 = 1.258 */ {{3, 3}, {{1, 1280, 10, 10}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1258038600.}, + /* GFLOPS 1.248 x 1 = 1.248 */ {{3, 3}, {{1, 256, 46, 46}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 1248338432.}, /* GFLOPS 1.245 x 1 = 1.245 */ {{3, 3}, {{1, 64, 75, 75}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1245240000.}, + /* GFLOPS 1.210 x 1 = 1.210 */ {{3, 3}, {{1, 32, 256, 256}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1210056704.}, + /* GFLOPS 1.196 x 1 = 1.196 */ {{3, 3}, {{1, 384, 26, 26}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 1196336128.}, + /* GFLOPS 1.195 x 1 = 1.195 */ {{9, 9}, {{1, 32, 240, 320}}, 3, 1, {1, 1}, {1, 1}, {4, 4}, {0, 0}, "", true, 1194624000.}, + /* GFLOPS 1.182 x 1 = 1.182 */ {{3, 3}, {{1, 32, 320, 400}}, 64, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 1181696000.}, + /* GFLOPS 1.181 x 1 = 1.181 */ {{3, 3}, {{1, 64, 160, 200}}, 128, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 1180672000.}, /* GFLOPS 0.561 x 2 = 1.121 */ {{3, 3}, {{1, 128, 38, 50}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 560576000.}, + /* GFLOPS 1.112 x 1 = 1.112 */ {{3, 3}, {{1, 512, 10, 10}}, 1206, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 1111570200.}, + /* GFLOPS 0.357 x 3 = 1.072 */ {{1, 1}, {{1, 64, 208, 208}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 357187584.}, + /* GFLOPS 1.062 x 1 = 1.062 */ {{3, 3}, {{1, 240, 64, 64}}, 240, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1061928960.}, + /* GFLOPS 0.076 x 14 = 1.058 */ {{3, 3}, {{1, 64, 32, 32}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 75563008.}, /* GFLOPS 1.051 x 1 = 1.051 */ {{3, 3}, {{1, 160, 38, 50}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1050988800.}, + /* GFLOPS 0.210 x 5 = 1.051 */ {{1, 1}, {{1, 256, 20, 20}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 210124800.}, + /* GFLOPS 0.210 x 5 = 1.049 */ {{1, 1}, {{1, 1024, 20, 20}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 209817600.}, /* GFLOPS 1.006 x 1 = 1.006 */ {{3, 3}, {{1, 1024, 10, 10}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1006441800.}, /* GFLOPS 0.246 x 4 = 0.985 */ {{1, 1}, {{1, 256, 75, 100}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 246240000.}, /* GFLOPS 0.189 x 5 = 0.947 */ {{1, 1}, {{1, 512, 19, 19}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 189452800.}, /* GFLOPS 0.189 x 5 = 0.947 */ {{1, 1}, {{1, 512, 19, 19}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 189452800.}, + /* GFLOPS 0.472 x 2 = 0.945 */ {{3, 3}, {{1, 64, 80, 80}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 472268800.}, /* GFLOPS 0.934 x 1 = 0.934 */ {{3, 3}, {{1, 96, 150, 150}}, 96, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 933660000.}, /* GFLOPS 0.231 x 4 = 0.925 */ {{3, 3}, {{1, 128, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 231311360.}, /* GFLOPS 0.896 x 1 = 0.896 */ {{5, 5}, {{1, 96, 27, 27}}, 256, 2, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 895981824.}, + /* GFLOPS 0.089 x 10 = 0.890 */ {{1, 1}, {{1, 128, 52, 52}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 88950784.}, + /* GFLOPS 0.089 x 10 = 0.888 */ {{1, 1}, {{1, 256, 26, 26}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 88777728.}, /* GFLOPS 0.876 x 1 = 0.876 */ {{3, 3}, {{1, 160, 38, 50}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 875824000.}, /* GFLOPS 0.850 x 1 = 0.850 */ {{7, 7}, {{1, 3, 600, 800}}, 24, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 849600000.}, /* GFLOPS 0.841 x 1 = 0.841 */ {{3, 3}, {{1, 128, 38, 50}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 840864000.}, /* GFLOPS 0.415 x 2 = 0.831 */ {{3, 3}, {{1, 32, 150, 150}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 415440000.}, + /* GFLOPS 0.757 x 1 = 0.757 */ {{1, 1}, {{1, 1024, 19, 19}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 757441536.}, + /* GFLOPS 0.712 x 1 = 0.712 */ {{1, 1}, {{1, 128, 208, 208}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 711606272.}, + /* GFLOPS 0.178 x 4 = 0.712 */ {{1, 1}, {{1, 128, 104, 104}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 177901568.}, + /* GFLOPS 0.354 x 2 = 0.707 */ {{1, 1}, {{1, 256, 52, 52}}, 255, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 353723760.}, /* GFLOPS 0.351 x 2 = 0.701 */ {{1, 1}, {{1, 576, 38, 50}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 350512000.}, /* GFLOPS 0.701 x 1 = 0.701 */ {{3, 3}, {{1, 128, 75, 100}}, 160, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 700720000.}, /* GFLOPS 0.694 x 1 = 0.694 */ {{3, 3}, {{1, 64, 56, 56}}, 192, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 694235136.}, @@ -75,19 +159,31 @@ static const ConvParam_t testConvolutionConfigs[] = { /* GFLOPS 0.058 x 12 = 0.694 */ {{3, 3}, {{1, 128, 28, 28}}, 32, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 57827840.}, /* GFLOPS 0.231 x 3 = 0.694 */ {{3, 3}, {{1, 512, 7, 7}}, 512, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 231236096.}, /* GFLOPS 0.160 x 4 = 0.639 */ {{3, 3}, {{1, 64, 38, 38}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 159833472.}, + /* GFLOPS 0.211 x 3 = 0.634 */ {{1, 1}, {{1, 64, 80, 80}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 211353600.}, + /* GFLOPS 0.211 x 3 = 0.632 */ {{1, 1}, {{1, 128, 40, 40}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 210534400.}, + /* GFLOPS 0.210 x 3 = 0.630 */ {{1, 1}, {{1, 512, 40, 40}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 209920000.}, + /* GFLOPS 0.210 x 3 = 0.630 */ {{1, 1}, {{1, 512, 10, 10}}, 2048, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 209920000.}, /* GFLOPS 0.103 x 6 = 0.618 */ {{1, 1}, {{1, 256, 14, 14}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 102961152.}, /* GFLOPS 0.615 x 1 = 0.615 */ {{1, 1}, {{1, 320, 75, 100}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 615360000.}, + /* GFLOPS 0.305 x 2 = 0.609 */ {{3, 3}, {{1, 3, 416, 416}}, 32, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 304578560.}, /* GFLOPS 0.597 x 1 = 0.597 */ {{3, 3}, {{1, 576, 19, 19}}, 576, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 597254400.}, + /* GFLOPS 0.278 x 2 = 0.557 */ {{1, 1}, {{1, 128, 46, 46}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 278431744.}, /* GFLOPS 0.185 x 3 = 0.554 */ {{1, 1}, {{1, 192, 75, 100}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 184800000.}, /* GFLOPS 0.553 x 1 = 0.553 */ {{3, 3}, {{1, 64, 75, 100}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 553440000.}, /* GFLOPS 0.539 x 1 = 0.539 */ {{3, 3}, {{1, 144, 75, 75}}, 144, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 539178048.}, /* GFLOPS 0.103 x 5 = 0.514 */ {{1, 1}, {{1, 1024, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 102810624.}, /* GFLOPS 0.491 x 1 = 0.491 */ {{1, 1}, {{1, 576, 38, 50}}, 224, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 490716800.}, + /* GFLOPS 0.483 x 1 = 0.483 */ {{7, 7}, {{1, 3, 320, 320}}, 64, 1, {2, 2}, {1, 1}, {3, 3}, {0, 0}, "", false, 483328000.}, /* GFLOPS 0.240 x 2 = 0.479 */ {{3, 3}, {{1, 96, 38, 38}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 239680896.}, + /* GFLOPS 0.477 x 1 = 0.477 */ {{3, 3}, {{1, 3, 368, 368}}, 64, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 476692480.}, /* GFLOPS 0.237 x 2 = 0.474 */ {{7, 7}, {{1, 3, 224, 224}}, 64, 1, {2, 2}, {1, 1}, {3, 3}, {0, 0}, "", true, 236830720.}, /* GFLOPS 0.472 x 1 = 0.472 */ {{3, 3}, {{1, 512, 19, 19}}, 512, 512, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 471910400.}, /* GFLOPS 0.472 x 1 = 0.472 */ {{3, 3}, {{1, 512, 19, 19}}, 512, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 471910400.}, + /* GFLOPS 0.155 x 3 = 0.464 */ {{1, 1}, {{1, 112, 32, 32}}, 672, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 154828800.}, + /* GFLOPS 0.114 x 4 = 0.454 */ {{1, 1}, {{1, 192, 16, 16}}, 1152, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 113541120.}, /* GFLOPS 0.449 x 1 = 0.449 */ {{3, 3}, {{1, 384, 13, 13}}, 384, 2, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 448626048.}, + /* GFLOPS 0.089 x 5 = 0.443 */ {{1, 1}, {{1, 512, 13, 13}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 88691200.}, + /* GFLOPS 0.428 x 1 = 0.428 */ {{1, 1}, {{1, 64, 64, 64}}, 810, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 427991040.}, /* GFLOPS 0.426 x 1 = 0.426 */ {{3, 3}, {{1, 128, 75, 75}}, 128, 128, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 426037760.}, /* GFLOPS 0.426 x 1 = 0.426 */ {{3, 3}, {{1, 128, 75, 75}}, 128, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 426037760.}, /* GFLOPS 0.426 x 1 = 0.426 */ {{3, 3}, {{1, 128, 38, 38}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 426037760.}, @@ -95,46 +191,81 @@ static const ConvParam_t testConvolutionConfigs[] = { /* GFLOPS 0.426 x 1 = 0.426 */ {{3, 3}, {{1, 256, 38, 38}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 425945344.}, /* GFLOPS 0.426 x 1 = 0.426 */ {{3, 3}, {{1, 256, 19, 19}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 425945344.}, /* GFLOPS 0.421 x 1 = 0.421 */ {{1, 1}, {{1, 576, 38, 50}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 420614400.}, + /* GFLOPS 0.420 x 1 = 0.420 */ {{1, 1}, {{1, 256, 40, 40}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 420249600.}, + /* GFLOPS 0.210 x 2 = 0.420 */ {{1, 1}, {{1, 256, 80, 80}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 210124800.}, + /* GFLOPS 0.420 x 1 = 0.420 */ {{1, 1}, {{1, 512, 20, 20}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 419840000.}, + /* GFLOPS 0.420 x 1 = 0.420 */ {{1, 1}, {{1, 1024, 10, 10}}, 2048, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 419635200.}, + /* GFLOPS 0.210 x 2 = 0.420 */ {{1, 1}, {{1, 2048, 10, 10}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 209766400.}, /* GFLOPS 0.415 x 1 = 0.415 */ {{3, 3}, {{1, 32, 150, 150}}, 32, 32, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 415440000.}, /* GFLOPS 0.415 x 1 = 0.415 */ {{3, 3}, {{1, 64, 150, 150}}, 64, 64, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 415080000.}, /* GFLOPS 0.415 x 1 = 0.415 */ {{3, 3}, {{1, 64, 150, 150}}, 64, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 415080000.}, /* GFLOPS 0.104 x 4 = 0.414 */ {{1, 1}, {{1, 64, 56, 56}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 103563264.}, /* GFLOPS 0.103 x 4 = 0.413 */ {{1, 1}, {{1, 128, 28, 28}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 103161856.}, + /* GFLOPS 0.399 x 1 = 0.399 */ {{3, 3}, {{1, 32, 208, 208}}, 64, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 399413248.}, + /* GFLOPS 0.200 x 2 = 0.399 */ {{3, 3}, {{1, 32, 104, 104}}, 32, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 199706624.}, + /* GFLOPS 0.200 x 2 = 0.399 */ {{3, 3}, {{1, 64, 52, 52}}, 64, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 199533568.}, + /* GFLOPS 0.399 x 1 = 0.399 */ {{3, 3}, {{1, 128, 52, 52}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 398894080.}, + /* GFLOPS 0.199 x 2 = 0.399 */ {{3, 3}, {{1, 128, 26, 26}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 199447040.}, + /* GFLOPS 0.399 x 1 = 0.399 */ {{3, 3}, {{1, 256, 26, 26}}, 512, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 398807552.}, + /* GFLOPS 0.399 x 1 = 0.399 */ {{3, 3}, {{1, 256, 13, 13}}, 512, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 398807552.}, /* GFLOPS 0.376 x 1 = 0.376 */ {{1, 1}, {{1, 24, 300, 400}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 376320000.}, + /* GFLOPS 0.179 x 2 = 0.357 */ {{1, 1}, {{1, 64, 208, 208}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 178593792.}, + /* GFLOPS 0.089 x 4 = 0.357 */ {{1, 1}, {{1, 64, 104, 104}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 89296896.}, + /* GFLOPS 0.356 x 1 = 0.356 */ {{1, 1}, {{1, 128, 104, 104}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 355803136.}, + /* GFLOPS 0.355 x 1 = 0.355 */ {{1, 1}, {{1, 256, 52, 52}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 355110912.}, + /* GFLOPS 0.355 x 1 = 0.355 */ {{1, 1}, {{1, 512, 26, 26}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 354764800.}, + /* GFLOPS 0.355 x 1 = 0.355 */ {{1, 1}, {{1, 1024, 13, 13}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 354591744.}, + /* GFLOPS 0.355 x 1 = 0.355 */ {{1, 1}, {{1, 2048, 13, 13}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 354505216.}, + /* GFLOPS 0.177 x 2 = 0.353 */ {{1, 1}, {{1, 512, 26, 26}}, 255, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 176689500.}, + /* GFLOPS 0.070 x 5 = 0.348 */ {{1, 1}, {{1, 128, 46, 46}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 69607936.}, /* GFLOPS 0.347 x 1 = 0.347 */ {{3, 3}, {{1, 128, 28, 28}}, 192, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 346967040.}, /* GFLOPS 0.347 x 1 = 0.347 */ {{3, 3}, {{1, 128, 28, 28}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 346967040.}, /* GFLOPS 0.014 x 24 = 0.347 */ {{3, 3}, {{1, 128, 14, 14}}, 32, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 14456960.}, + /* GFLOPS 0.113 x 3 = 0.340 */ {{1, 1}, {{1, 1152, 16, 16}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 113295360.}, /* GFLOPS 0.053 x 6 = 0.320 */ {{1, 1}, {{1, 576, 19, 19}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 53277824.}, /* GFLOPS 0.319 x 1 = 0.319 */ {{3, 3}, {{1, 192, 19, 19}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 319482112.}, + /* GFLOPS 0.317 x 1 = 0.317 */ {{3, 3}, {{1, 3, 300, 300}}, 64, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 316800000.}, /* GFLOPS 0.315 x 1 = 0.315 */ {{3, 3}, {{1, 96, 75, 100}}, 96, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 315369600.}, /* GFLOPS 0.103 x 3 = 0.309 */ {{1, 1}, {{1, 512, 7, 7}}, 2048, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 102860800.}, /* GFLOPS 0.103 x 3 = 0.309 */ {{1, 1}, {{1, 512, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 102860800.}, + /* GFLOPS 0.154 x 2 = 0.309 */ {{1, 1}, {{1, 672, 32, 32}}, 112, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 154255360.}, /* GFLOPS 0.308 x 1 = 0.308 */ {{1, 1}, {{1, 320, 75, 100}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 307680000.}, + /* GFLOPS 0.034 x 9 = 0.304 */ {{1, 1}, {{1, 64, 64, 64}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 33816576.}, /* GFLOPS 0.299 x 1 = 0.299 */ {{3, 3}, {{1, 256, 13, 13}}, 384, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 299105664.}, /* GFLOPS 0.299 x 1 = 0.299 */ {{3, 3}, {{1, 384, 13, 13}}, 256, 2, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 299084032.}, /* GFLOPS 0.017 x 17 = 0.290 */ {{1, 1}, {{1, 32, 32, 64}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 17039360.}, /* GFLOPS 0.017 x 16 = 0.269 */ {{1, 1}, {{1, 128, 32, 64}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 16842752.}, /* GFLOPS 0.133 x 2 = 0.266 */ {{3, 3}, {{1, 128, 19, 19}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 133136800.}, + /* GFLOPS 0.266 x 1 = 0.266 */ {{1, 1}, {{1, 384, 52, 52}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 266160128.}, + /* GFLOPS 0.266 x 1 = 0.266 */ {{1, 1}, {{1, 768, 26, 26}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 265987072.}, /* GFLOPS 0.038 x 7 = 0.265 */ {{3, 3}, {{1, 16, 64, 128}}, 16, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 37879808.}, + /* GFLOPS 0.019 x 14 = 0.264 */ {{3, 3}, {{1, 64, 16, 16}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 18890752.}, + /* GFLOPS 0.262 x 1 = 0.262 */ {{1, 1}, {{1, 2560, 20, 20}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 262195200.}, /* GFLOPS 0.126 x 2 = 0.252 */ {{3, 3}, {{1, 512, 5, 5}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 125812050.}, /* GFLOPS 0.248 x 1 = 0.248 */ {{1, 1}, {{1, 64, 150, 200}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 247680000.}, /* GFLOPS 0.040 x 6 = 0.240 */ {{1, 1}, {{1, 576, 19, 19}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 39958368.}, /* GFLOPS 0.080 x 3 = 0.240 */ {{3, 3}, {{1, 96, 19, 19}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 79893632.}, /* GFLOPS 0.240 x 1 = 0.240 */ {{3, 3}, {{1, 192, 38, 38}}, 192, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 239611584.}, /* GFLOPS 0.240 x 1 = 0.240 */ {{3, 3}, {{1, 192, 19, 19}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 239611584.}, + /* GFLOPS 0.079 x 3 = 0.237 */ {{1, 1}, {{1, 80, 32, 32}}, 480, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 79134720.}, /* GFLOPS 0.237 x 1 = 0.237 */ {{7, 7}, {{1, 3, 224, 224}}, 64, 1, {2, 2}, {1, 1}, {3, 3}, {0, 0}, "", false, 236830720.}, /* GFLOPS 0.237 x 1 = 0.237 */ {{7, 7}, {{1, 3, 224, 224}}, 64, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 236830720.}, + /* GFLOPS 0.118 x 2 = 0.236 */ {{3, 3}, {{1, 32, 80, 80}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 118169600.}, + /* GFLOPS 0.236 x 1 = 0.236 */ {{3, 3}, {{1, 256, 19, 19}}, 512, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 235980800.}, + /* GFLOPS 0.116 x 2 = 0.231 */ {{1, 1}, {{1, 24, 128, 128}}, 144, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 115605504.}, /* GFLOPS 0.111 x 2 = 0.221 */ {{3, 3}, {{1, 192, 10, 10}}, 320, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 110624000.}, /* GFLOPS 0.213 x 1 = 0.213 */ {{3, 3}, {{1, 128, 38, 38}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 213018880.}, /* GFLOPS 0.213 x 1 = 0.213 */ {{3, 3}, {{1, 128, 19, 19}}, 256, 1, {1, 1}, {2, 2}, {2, 2}, {0, 0}, "", false, 213018880.}, /* GFLOPS 0.107 x 2 = 0.213 */ {{3, 3}, {{1, 128, 19, 19}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 106509440.}, /* GFLOPS 0.213 x 1 = 0.213 */ {{3, 3}, {{1, 256, 19, 19}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 212972672.}, + /* GFLOPS 0.213 x 1 = 0.213 */ {{3, 3}, {{1, 512, 38, 38}}, 16, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 212949568.}, /* GFLOPS 0.212 x 1 = 0.212 */ {{7, 7}, {{1, 3, 300, 300}}, 32, 1, {2, 2}, {1, 1}, {3, 3}, {0, 0}, "", true, 212400000.}, /* GFLOPS 0.211 x 1 = 0.211 */ {{11, 11}, {{1, 3, 227, 227}}, 96, 1, {4, 4}, {1, 1}, {0, 0}, {0, 0}, "", true, 211120800.}, /* GFLOPS 0.210 x 1 = 0.210 */ {{3, 3}, {{1, 64, 38, 50}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 210307200.}, /* GFLOPS 0.210 x 1 = 0.210 */ {{1, 1}, {{1, 1024, 10, 10}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 209817600.}, /* GFLOPS 0.210 x 1 = 0.210 */ {{1, 1}, {{1, 1024, 10, 10}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 209817600.}, /* GFLOPS 0.104 x 2 = 0.208 */ {{3, 3}, {{1, 32, 75, 75}}, 32, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 103860000.}, + /* GFLOPS 0.208 x 1 = 0.208 */ {{1, 1}, {{1, 16, 256, 256}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 207618048.}, /* GFLOPS 0.206 x 1 = 0.206 */ {{1, 1}, {{1, 256, 56, 56}}, 512, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 205922304.}, /* GFLOPS 0.206 x 1 = 0.206 */ {{1, 1}, {{1, 256, 56, 56}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 205922304.}, /* GFLOPS 0.103 x 2 = 0.206 */ {{1, 1}, {{1, 256, 56, 56}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 102961152.}, @@ -148,27 +279,35 @@ static const ConvParam_t testConvolutionConfigs[] = { /* GFLOPS 0.190 x 1 = 0.190 */ {{1, 1}, {{1, 256, 38, 38}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 189637632.}, /* GFLOPS 0.190 x 1 = 0.190 */ {{1, 1}, {{1, 256, 38, 38}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 189637632.}, /* GFLOPS 0.047 x 4 = 0.190 */ {{1, 1}, {{1, 256, 38, 38}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 47409408.}, + /* GFLOPS 0.189 x 1 = 0.189 */ {{1, 1}, {{1, 1024, 19, 19}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 189360384.}, /* GFLOPS 0.038 x 5 = 0.189 */ {{3, 3}, {{1, 32, 32, 64}}, 32, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 37814272.}, + /* GFLOPS 0.189 x 1 = 0.189 */ {{1, 1}, {{1, 1152, 16, 16}}, 320, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 188825600.}, /* GFLOPS 0.185 x 1 = 0.185 */ {{1, 1}, {{1, 128, 75, 75}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 185040000.}, /* GFLOPS 0.185 x 1 = 0.185 */ {{1, 1}, {{1, 128, 75, 75}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 185040000.}, /* GFLOPS 0.181 x 1 = 0.181 */ {{3, 3}, {{1, 160, 14, 14}}, 320, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 180696320.}, /* GFLOPS 0.181 x 1 = 0.181 */ {{3, 3}, {{1, 160, 14, 14}}, 320, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 180696320.}, /* GFLOPS 0.090 x 2 = 0.181 */ {{3, 3}, {{1, 224, 10, 10}}, 224, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 90339200.}, /* GFLOPS 0.180 x 1 = 0.180 */ {{1, 1}, {{1, 224, 56, 56}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 180232192.}, + /* GFLOPS 0.088 x 2 = 0.177 */ {{1, 1}, {{1, 1024, 13, 13}}, 255, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 88301655.}, /* GFLOPS 0.174 x 1 = 0.174 */ {{3, 3}, {{1, 96, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 173508608.}, /* GFLOPS 0.174 x 1 = 0.174 */ {{3, 3}, {{1, 96, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 173508608.}, /* GFLOPS 0.166 x 1 = 0.166 */ {{3, 3}, {{1, 160, 19, 19}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 166406560.}, /* GFLOPS 0.080 x 2 = 0.160 */ {{1, 1}, {{1, 576, 19, 19}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 79916736.}, /* GFLOPS 0.160 x 1 = 0.160 */ {{3, 3}, {{1, 128, 19, 19}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 159764160.}, + /* GFLOPS 0.160 x 1 = 0.160 */ {{3, 3}, {{1, 1024, 19, 19}}, 24, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 159703512.}, /* GFLOPS 0.159 x 1 = 0.159 */ {{7, 7}, {{1, 3, 300, 300}}, 24, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 159300000.}, + /* GFLOPS 0.080 x 2 = 0.159 */ {{1, 1}, {{1, 40, 64, 64}}, 240, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 79626240.}, + /* GFLOPS 0.079 x 2 = 0.157 */ {{1, 1}, {{1, 480, 32, 32}}, 80, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 78725120.}, /* GFLOPS 0.155 x 1 = 0.155 */ {{1, 1}, {{1, 192, 56, 56}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 154542080.}, /* GFLOPS 0.146 x 1 = 0.146 */ {{3, 3}, {{1, 144, 14, 14}}, 288, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 146369664.}, /* GFLOPS 0.146 x 1 = 0.146 */ {{3, 3}, {{1, 144, 14, 14}}, 288, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 146369664.}, /* GFLOPS 0.072 x 2 = 0.144 */ {{1, 1}, {{1, 1024, 10, 10}}, 352, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 72124800.}, /* GFLOPS 0.140 x 1 = 0.140 */ {{1, 1}, {{1, 576, 38, 50}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 140204800.}, + /* GFLOPS 0.139 x 1 = 0.139 */ {{3, 3}, {{1, 256, 5, 5}}, 1206, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 138961350.}, /* GFLOPS 0.017 x 8 = 0.138 */ {{1, 1}, {{1, 16, 64, 128}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 17301504.}, /* GFLOPS 0.067 x 2 = 0.133 */ {{1, 1}, {{1, 576, 19, 19}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 66597280.}, /* GFLOPS 0.133 x 1 = 0.133 */ {{3, 3}, {{1, 128, 38, 38}}, 160, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 133136800.}, + /* GFLOPS 0.044 x 3 = 0.133 */ {{1, 1}, {{1, 512, 13, 13}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 44345600.}, /* GFLOPS 0.129 x 1 = 0.129 */ {{1, 1}, {{1, 160, 56, 56}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 128851968.}, /* GFLOPS 0.128 x 1 = 0.128 */ {{3, 3}, {{1, 64, 24, 24}}, 192, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 127512576.}, /* GFLOPS 0.120 x 1 = 0.120 */ {{5, 5}, {{1, 32, 28, 28}}, 96, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 120497664.}, @@ -176,22 +315,35 @@ static const ConvParam_t testConvolutionConfigs[] = { /* GFLOPS 0.040 x 3 = 0.120 */ {{1, 1}, {{1, 96, 19, 19}}, 576, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 40131648.}, /* GFLOPS 0.118 x 1 = 0.118 */ {{1, 1}, {{1, 320, 38, 38}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 118477312.}, /* GFLOPS 0.017 x 7 = 0.118 */ {{1, 1}, {{1, 64, 64, 128}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 16908288.}, + /* GFLOPS 0.118 x 1 = 0.118 */ {{3, 3}, {{1, 64, 80, 80}}, 64, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 118067200.}, + /* GFLOPS 0.118 x 1 = 0.118 */ {{3, 3}, {{1, 64, 40, 40}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 118067200.}, /* GFLOPS 0.039 x 3 = 0.118 */ {{1, 1}, {{1, 1024, 10, 10}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 39340800.}, + /* GFLOPS 0.118 x 1 = 0.118 */ {{3, 3}, {{1, 128, 40, 40}}, 128, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 118016000.}, + /* GFLOPS 0.118 x 1 = 0.118 */ {{3, 3}, {{1, 128, 20, 20}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 118016000.}, + /* GFLOPS 0.118 x 1 = 0.118 */ {{3, 3}, {{1, 256, 20, 20}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 117990400.}, /* GFLOPS 0.118 x 1 = 0.118 */ {{3, 3}, {{1, 256, 19, 19}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 117990400.}, /* GFLOPS 0.058 x 2 = 0.116 */ {{3, 3}, {{1, 16, 56, 56}}, 64, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 58003456.}, /* GFLOPS 0.058 x 2 = 0.116 */ {{3, 3}, {{1, 32, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 57903104.}, /* GFLOPS 0.058 x 2 = 0.116 */ {{3, 3}, {{1, 64, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 57852928.}, /* GFLOPS 0.116 x 1 = 0.116 */ {{3, 3}, {{1, 128, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 115655680.}, /* GFLOPS 0.116 x 1 = 0.116 */ {{3, 3}, {{1, 128, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 115655680.}, + /* GFLOPS 0.115 x 1 = 0.115 */ {{3, 3}, {{1, 3, 512, 512}}, 32, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 115343360.}, + /* GFLOPS 0.114 x 1 = 0.114 */ {{1, 1}, {{1, 144, 128, 128}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 113639424.}, /* GFLOPS 0.112 x 1 = 0.112 */ {{1, 1}, {{1, 1024, 10, 10}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 111875400.}, + /* GFLOPS 0.110 x 1 = 0.110 */ {{1, 1}, {{1, 480, 32, 32}}, 112, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 110215168.}, + /* GFLOPS 0.107 x 1 = 0.107 */ {{1, 1}, {{1, 64, 32, 32}}, 810, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 106997760.}, /* GFLOPS 0.036 x 3 = 0.107 */ {{1, 1}, {{1, 192, 38, 38}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 35580160.}, /* GFLOPS 0.107 x 1 = 0.107 */ {{3, 3}, {{1, 32, 75, 75}}, 128, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 106648064.}, /* GFLOPS 0.107 x 1 = 0.107 */ {{3, 3}, {{1, 64, 38, 38}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 106555648.}, + /* GFLOPS 0.105 x 1 = 0.105 */ {{1, 1}, {{1, 256, 40, 40}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 105062400.}, + /* GFLOPS 0.105 x 1 = 0.105 */ {{1, 1}, {{1, 512, 20, 20}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 104960000.}, /* GFLOPS 0.105 x 1 = 0.105 */ {{1, 1}, {{1, 512, 10, 10}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 104960000.}, /* GFLOPS 0.105 x 1 = 0.105 */ {{1, 1}, {{1, 512, 10, 10}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 104960000.}, + /* GFLOPS 0.105 x 1 = 0.105 */ {{1, 1}, {{1, 1024, 10, 10}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 104908800.}, /* GFLOPS 0.103 x 1 = 0.103 */ {{1, 1}, {{1, 128, 56, 56}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 103161856.}, /* GFLOPS 0.051 x 2 = 0.103 */ {{1, 1}, {{1, 256, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 51480576.}, /* GFLOPS 0.051 x 2 = 0.103 */ {{1, 1}, {{1, 256, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 51480576.}, + /* GFLOPS 0.008 x 12 = 0.101 */ {{1, 1}, {{1, 64, 32, 32}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 8454144.}, /* GFLOPS 0.101 x 1 = 0.101 */ {{1, 1}, {{1, 512, 19, 19}}, 273, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 101016825.}, /* GFLOPS 0.096 x 1 = 0.096 */ {{1, 1}, {{1, 480, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 96438272.}, /* GFLOPS 0.095 x 1 = 0.095 */ {{1, 1}, {{1, 128, 38, 38}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 95003648.}, @@ -208,8 +360,10 @@ static const ConvParam_t testConvolutionConfigs[] = { /* GFLOPS 0.092 x 1 = 0.092 */ {{1, 1}, {{1, 192, 75, 100}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 92400000.}, /* GFLOPS 0.090 x 1 = 0.090 */ {{1, 1}, {{1, 448, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 90015744.}, /* GFLOPS 0.045 x 2 = 0.090 */ {{3, 3}, {{1, 576, 19, 19}}, 12, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 44918508.}, + /* GFLOPS 0.044 x 2 = 0.089 */ {{1, 1}, {{1, 256, 26, 26}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 44388864.}, /* GFLOPS 0.089 x 1 = 0.089 */ {{3, 3}, {{1, 112, 14, 14}}, 224, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 88554368.}, /* GFLOPS 0.089 x 1 = 0.089 */ {{3, 3}, {{1, 112, 14, 14}}, 224, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 88554368.}, + /* GFLOPS 0.088 x 1 = 0.088 */ {{1, 1}, {{1, 256, 26, 26}}, 255, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 88430940.}, /* GFLOPS 0.021 x 4 = 0.084 */ {{5, 1}, {{1, 32, 32, 64}}, 32, 1, {1, 1}, {1, 1}, {2, 0}, {0, 0}, "", false, 21037056.}, /* GFLOPS 0.021 x 4 = 0.084 */ {{1, 5}, {{1, 32, 32, 64}}, 32, 1, {1, 1}, {1, 1}, {0, 2}, {0, 0}, "", true, 21037056.}, /* GFLOPS 0.084 x 1 = 0.084 */ {{1, 1}, {{1, 416, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 83593216.}, @@ -217,9 +371,13 @@ static const ConvParam_t testConvolutionConfigs[] = { /* GFLOPS 0.040 x 2 = 0.080 */ {{1, 1}, {{1, 576, 19, 19}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 39958368.}, /* GFLOPS 0.040 x 2 = 0.079 */ {{1, 1}, {{1, 24, 75, 75}}, 144, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 39690000.}, /* GFLOPS 0.040 x 2 = 0.079 */ {{3, 3}, {{1, 3, 300, 300}}, 32, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 39600000.}, + /* GFLOPS 0.079 x 1 = 0.079 */ {{1, 1}, {{1, 240, 64, 64}}, 40, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 78807040.}, + /* GFLOPS 0.079 x 1 = 0.079 */ {{1, 1}, {{1, 384, 40, 40}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 78745600.}, /* GFLOPS 0.077 x 1 = 0.077 */ {{1, 1}, {{1, 96, 56, 56}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 77471744.}, /* GFLOPS 0.077 x 1 = 0.077 */ {{3, 3}, {{1, 192, 10, 10}}, 224, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 77436800.}, /* GFLOPS 0.077 x 1 = 0.077 */ {{1, 1}, {{1, 384, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 77170688.}, + /* GFLOPS 0.076 x 1 = 0.076 */ {{3, 3}, {{1, 3, 416, 416}}, 32, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 76144640.}, + /* GFLOPS 0.076 x 1 = 0.076 */ {{1, 1}, {{1, 96, 128, 128}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 75890688.}, /* GFLOPS 0.038 x 2 = 0.076 */ {{3, 3}, {{1, 32, 32, 64}}, 32, 1, {1, 1}, {8, 8}, {8, 8}, {0, 0}, "", true, 37814272.}, /* GFLOPS 0.038 x 2 = 0.076 */ {{3, 3}, {{1, 32, 32, 64}}, 32, 1, {1, 1}, {4, 4}, {4, 4}, {0, 0}, "", true, 37814272.}, /* GFLOPS 0.038 x 2 = 0.076 */ {{3, 3}, {{1, 32, 32, 64}}, 32, 1, {1, 1}, {2, 2}, {2, 2}, {0, 0}, "", true, 37814272.}, @@ -230,6 +388,9 @@ static const ConvParam_t testConvolutionConfigs[] = { /* GFLOPS 0.071 x 1 = 0.071 */ {{1, 1}, {{1, 24, 150, 150}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 70560000.}, /* GFLOPS 0.070 x 1 = 0.070 */ {{3, 3}, {{1, 96, 14, 14}}, 208, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 70487872.}, /* GFLOPS 0.069 x 1 = 0.069 */ {{3, 3}, {{1, 96, 14, 14}}, 204, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 69132336.}, + /* GFLOPS 0.068 x 1 = 0.068 */ {{1, 1}, {{1, 32, 256, 256}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 68157440.}, + /* GFLOPS 0.005 x 14 = 0.066 */ {{3, 3}, {{1, 64, 8, 8}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 4722688.}, + /* GFLOPS 0.066 x 1 = 0.066 */ {{1, 1}, {{1, 672, 16, 16}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 66109440.}, /* GFLOPS 0.066 x 1 = 0.066 */ {{1, 1}, {{1, 1280, 10, 10}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 65561600.}, /* GFLOPS 0.033 x 2 = 0.065 */ {{3, 3}, {{1, 48, 14, 14}}, 192, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 32551680.}, /* GFLOPS 0.065 x 1 = 0.065 */ {{3, 3}, {{1, 192, 7, 7}}, 384, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 65046912.}, @@ -239,6 +400,7 @@ static const ConvParam_t testConvolutionConfigs[] = { /* GFLOPS 0.032 x 2 = 0.064 */ {{3, 3}, {{1, 96, 12, 12}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 31868928.}, /* GFLOPS 0.061 x 1 = 0.061 */ {{1, 1}, {{1, 960, 10, 10}}, 320, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 61472000.}, /* GFLOPS 0.031 x 2 = 0.061 */ {{1, 1}, {{1, 960, 10, 10}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 30736000.}, + /* GFLOPS 0.061 x 1 = 0.061 */ {{1, 1}, {{1, 512, 46, 46}}, 28, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 60729200.}, /* GFLOPS 0.060 x 1 = 0.060 */ {{3, 3}, {{1, 96, 38, 38}}, 96, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 59920224.}, /* GFLOPS 0.059 x 1 = 0.059 */ {{1, 1}, {{1, 320, 38, 38}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 59238656.}, /* GFLOPS 0.059 x 1 = 0.059 */ {{3, 3}, {{1, 128, 19, 19}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 59008000.}, @@ -253,6 +415,11 @@ static const ConvParam_t testConvolutionConfigs[] = { /* GFLOPS 0.053 x 1 = 0.053 */ {{3, 3}, {{1, 128, 38, 38}}, 16, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 53254720.}, /* GFLOPS 0.053 x 1 = 0.053 */ {{1, 1}, {{1, 528, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 53036032.}, /* GFLOPS 0.053 x 1 = 0.053 */ {{1, 1}, {{1, 528, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 53036032.}, + /* GFLOPS 0.053 x 1 = 0.053 */ {{1, 1}, {{1, 64, 80, 80}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 52838400.}, + /* GFLOPS 0.053 x 1 = 0.053 */ {{1, 1}, {{1, 64, 40, 40}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 52838400.}, + /* GFLOPS 0.053 x 1 = 0.053 */ {{1, 1}, {{1, 128, 80, 80}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 52633600.}, + /* GFLOPS 0.053 x 1 = 0.053 */ {{1, 1}, {{1, 128, 20, 20}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 52633600.}, + /* GFLOPS 0.053 x 1 = 0.053 */ {{1, 1}, {{1, 256, 10, 10}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 52531200.}, /* GFLOPS 0.052 x 1 = 0.052 */ {{1, 1}, {{1, 1024, 10, 10}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 52454400.}, /* GFLOPS 0.052 x 1 = 0.052 */ {{1, 1}, {{1, 1024, 10, 10}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 52454400.}, /* GFLOPS 0.052 x 1 = 0.052 */ {{1, 1}, {{1, 1024, 10, 10}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 52454400.}, @@ -268,6 +435,7 @@ static const ConvParam_t testConvolutionConfigs[] = { /* GFLOPS 0.050 x 1 = 0.050 */ {{1, 1}, {{1, 992, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 49799680.}, /* GFLOPS 0.048 x 1 = 0.048 */ {{1, 1}, {{1, 960, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 48194048.}, /* GFLOPS 0.047 x 1 = 0.047 */ {{1, 1}, {{1, 256, 19, 19}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 47409408.}, + /* GFLOPS 0.047 x 1 = 0.047 */ {{1, 1}, {{1, 144, 64, 64}}, 40, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 47349760.}, /* GFLOPS 0.047 x 1 = 0.047 */ {{1, 1}, {{1, 512, 38, 50}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 46740000.}, /* GFLOPS 0.047 x 1 = 0.047 */ {{1, 1}, {{1, 928, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 46588416.}, /* GFLOPS 0.046 x 1 = 0.046 */ {{1, 1}, {{1, 64, 75, 75}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 46440000.}, @@ -280,6 +448,7 @@ static const ConvParam_t testConvolutionConfigs[] = { /* GFLOPS 0.045 x 1 = 0.045 */ {{3, 3}, {{1, 3, 227, 227}}, 64, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", true, 44946880.}, /* GFLOPS 0.044 x 1 = 0.044 */ {{3, 3}, {{1, 128, 19, 19}}, 192, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 44256000.}, /* GFLOPS 0.044 x 1 = 0.044 */ {{3, 3}, {{1, 1024, 10, 10}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 44239200.}, + /* GFLOPS 0.044 x 1 = 0.044 */ {{1, 1}, {{1, 512, 13, 13}}, 255, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 44172375.}, /* GFLOPS 0.043 x 1 = 0.043 */ {{7, 7}, {{1, 3, 96, 96}}, 64, 1, {2, 2}, {1, 1}, {3, 3}, {0, 0}, "", true, 43499520.}, /* GFLOPS 0.043 x 1 = 0.043 */ {{1, 1}, {{1, 864, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 43377152.}, /* GFLOPS 0.042 x 1 = 0.042 */ {{1, 1}, {{1, 832, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 41771520.}, @@ -289,6 +458,7 @@ static const ConvParam_t testConvolutionConfigs[] = { /* GFLOPS 0.040 x 1 = 0.040 */ {{3, 3}, {{1, 64, 19, 19}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 39958368.}, /* GFLOPS 0.040 x 1 = 0.040 */ {{3, 3}, {{1, 256, 19, 19}}, 24, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 39932376.}, /* GFLOPS 0.040 x 1 = 0.040 */ {{3, 3}, {{1, 3, 300, 300}}, 32, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 39600000.}, + /* GFLOPS 0.039 x 1 = 0.039 */ {{1, 1}, {{1, 240, 32, 32}}, 80, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 39403520.}, /* GFLOPS 0.039 x 1 = 0.039 */ {{1, 1}, {{1, 144, 75, 75}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 39015000.}, /* GFLOPS 0.039 x 1 = 0.039 */ {{1, 1}, {{1, 192, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 38635520.}, /* GFLOPS 0.039 x 1 = 0.039 */ {{1, 1}, {{1, 768, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 38560256.}, @@ -297,9 +467,11 @@ static const ConvParam_t testConvolutionConfigs[] = { /* GFLOPS 0.036 x 1 = 0.036 */ {{1, 1}, {{1, 480, 14, 14}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 36164352.}, /* GFLOPS 0.018 x 2 = 0.036 */ {{1, 1}, {{1, 192, 38, 38}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 17790080.}, /* GFLOPS 0.035 x 1 = 0.035 */ {{1, 1}, {{1, 704, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 35348992.}, + /* GFLOPS 0.035 x 1 = 0.035 */ {{1, 1}, {{1, 512, 46, 46}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 34702400.}, /* GFLOPS 0.034 x 1 = 0.034 */ {{1, 1}, {{1, 672, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 33743360.}, /* GFLOPS 0.034 x 1 = 0.034 */ {{1, 1}, {{1, 128, 32, 64}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 33685504.}, /* GFLOPS 0.034 x 1 = 0.034 */ {{2, 2}, {{1, 64, 64, 128}}, 32, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 33619968.}, + /* GFLOPS 0.033 x 1 = 0.033 */ {{3, 3}, {{1, 256, 3, 3}}, 804, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 33350724.}, /* GFLOPS 0.033 x 1 = 0.033 */ {{1, 1}, {{1, 528, 14, 14}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 33147520.}, /* GFLOPS 0.033 x 1 = 0.033 */ {{1, 1}, {{1, 528, 14, 14}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 33147520.}, /* GFLOPS 0.033 x 1 = 0.033 */ {{1, 1}, {{1, 1024, 10, 10}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 32784000.}, @@ -307,24 +479,29 @@ static const ConvParam_t testConvolutionConfigs[] = { /* GFLOPS 0.032 x 1 = 0.032 */ {{1, 1}, {{1, 512, 14, 14}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 32144000.}, /* GFLOPS 0.032 x 1 = 0.032 */ {{1, 1}, {{1, 640, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 32137728.}, /* GFLOPS 0.032 x 1 = 0.032 */ {{1, 1}, {{1, 508, 14, 14}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 31893120.}, + /* GFLOPS 0.011 x 3 = 0.032 */ {{1, 1}, {{1, 320, 16, 16}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 10502144.}, /* GFLOPS 0.031 x 1 = 0.031 */ {{1, 1}, {{1, 832, 7, 7}}, 384, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 31328640.}, /* GFLOPS 0.031 x 1 = 0.031 */ {{1, 1}, {{1, 832, 7, 7}}, 384, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 31328640.}, /* GFLOPS 0.031 x 1 = 0.031 */ {{1, 1}, {{1, 608, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 30532096.}, + /* GFLOPS 0.015 x 2 = 0.030 */ {{1, 1}, {{1, 128, 46, 46}}, 28, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 15226736.}, /* GFLOPS 0.015 x 2 = 0.030 */ {{5, 5}, {{1, 24, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 15065344.}, /* GFLOPS 0.015 x 2 = 0.030 */ {{5, 5}, {{1, 24, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 15065344.}, /* GFLOPS 0.015 x 2 = 0.030 */ {{5, 5}, {{1, 48, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 15059072.}, /* GFLOPS 0.029 x 1 = 0.029 */ {{3, 3}, {{1, 256, 10, 10}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 29497600.}, + /* GFLOPS 0.015 x 2 = 0.029 */ {{1, 1}, {{1, 112, 32, 32}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 14745600.}, /* GFLOPS 0.029 x 1 = 0.029 */ {{1, 1}, {{1, 192, 28, 28}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 28976640.}, /* GFLOPS 0.029 x 1 = 0.029 */ {{1, 1}, {{1, 192, 28, 28}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 28976640.}, /* GFLOPS 0.029 x 1 = 0.029 */ {{1, 1}, {{1, 512, 14, 14}}, 144, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 28929600.}, /* GFLOPS 0.029 x 1 = 0.029 */ {{1, 1}, {{1, 512, 14, 14}}, 144, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 28929600.}, /* GFLOPS 0.029 x 1 = 0.029 */ {{1, 1}, {{1, 576, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 28926464.}, /* GFLOPS 0.027 x 1 = 0.027 */ {{1, 1}, {{1, 544, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 27320832.}, + /* GFLOPS 0.027 x 1 = 0.027 */ {{1, 1}, {{1, 64, 16, 16}}, 810, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 26749440.}, /* GFLOPS 0.027 x 1 = 0.027 */ {{1, 1}, {{1, 384, 19, 19}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 26650464.}, /* GFLOPS 0.027 x 1 = 0.027 */ {{1, 1}, {{1, 576, 19, 19}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 26638912.}, /* GFLOPS 0.027 x 1 = 0.027 */ {{3, 3}, {{1, 128, 38, 38}}, 8, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 26627360.}, /* GFLOPS 0.027 x 1 = 0.027 */ {{1, 1}, {{1, 528, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 26518016.}, /* GFLOPS 0.027 x 1 = 0.027 */ {{1, 1}, {{1, 528, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 26518016.}, + /* GFLOPS 0.009 x 3 = 0.026 */ {{1, 1}, {{1, 128, 46, 46}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 8700992.}, /* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 96, 75, 75}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 26055000.}, /* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 64, 56, 56}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 25890816.}, /* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 64, 56, 56}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 25890816.}, @@ -336,6 +513,7 @@ static const ConvParam_t testConvolutionConfigs[] = { /* GFLOPS 0.013 x 2 = 0.026 */ {{1, 1}, {{1, 256, 28, 28}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 12870144.}, /* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 512, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 25715200.}, /* GFLOPS 0.013 x 2 = 0.026 */ {{1, 1}, {{1, 512, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 12857600.}, + /* GFLOPS 0.002 x 12 = 0.025 */ {{1, 1}, {{1, 64, 16, 16}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 2113536.}, /* GFLOPS 0.024 x 1 = 0.024 */ {{1, 1}, {{1, 480, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 24109568.}, /* GFLOPS 0.024 x 1 = 0.024 */ {{1, 1}, {{1, 128, 38, 38}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 23750912.}, /* GFLOPS 0.024 x 1 = 0.024 */ {{1, 1}, {{1, 256, 19, 19}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 23704704.}, @@ -345,7 +523,9 @@ static const ConvParam_t testConvolutionConfigs[] = { /* GFLOPS 0.023 x 1 = 0.023 */ {{1, 1}, {{1, 448, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 22503936.}, /* GFLOPS 0.023 x 1 = 0.023 */ {{1, 1}, {{1, 512, 14, 14}}, 112, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 22500800.}, /* GFLOPS 0.022 x 1 = 0.022 */ {{1, 1}, {{1, 508, 14, 14}}, 112, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 22325184.}, + /* GFLOPS 0.022 x 1 = 0.022 */ {{3, 3}, {{1, 512, 10, 10}}, 24, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 22120800.}, /* GFLOPS 0.021 x 1 = 0.021 */ {{3, 3}, {{1, 128, 12, 12}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 21242880.}, + /* GFLOPS 0.021 x 1 = 0.021 */ {{1, 1}, {{1, 40, 64, 64}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 21233664.}, /* GFLOPS 0.021 x 1 = 0.021 */ {{1, 1}, {{1, 416, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 20898304.}, /* GFLOPS 0.021 x 1 = 0.021 */ {{1, 1}, {{1, 832, 7, 7}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 20885760.}, /* GFLOPS 0.021 x 1 = 0.021 */ {{1, 1}, {{1, 832, 7, 7}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 20885760.}, @@ -360,6 +540,7 @@ static const ConvParam_t testConvolutionConfigs[] = { /* GFLOPS 0.019 x 1 = 0.019 */ {{1, 1}, {{1, 192, 28, 28}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 19317760.}, /* GFLOPS 0.019 x 1 = 0.019 */ {{1, 1}, {{1, 192, 28, 28}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 19317760.}, /* GFLOPS 0.019 x 1 = 0.019 */ {{1, 1}, {{1, 384, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 19292672.}, + /* GFLOPS 0.019 x 1 = 0.019 */ {{1, 1}, {{1, 64, 64, 64}}, 36, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 19021824.}, /* GFLOPS 0.018 x 1 = 0.018 */ {{1, 1}, {{1, 576, 10, 10}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 18448000.}, /* GFLOPS 0.018 x 1 = 0.018 */ {{1, 1}, {{1, 480, 14, 14}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 18082176.}, /* GFLOPS 0.018 x 1 = 0.018 */ {{1, 1}, {{1, 480, 14, 14}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 18082176.}, @@ -371,13 +552,16 @@ static const ConvParam_t testConvolutionConfigs[] = { /* GFLOPS 0.016 x 1 = 0.016 */ {{1, 1}, {{1, 832, 7, 7}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 15664320.}, /* GFLOPS 0.015 x 1 = 0.015 */ {{5, 5}, {{1, 48, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 15059072.}, /* GFLOPS 0.015 x 1 = 0.015 */ {{5, 5}, {{1, 32, 12, 12}}, 64, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 14754816.}, + /* GFLOPS 0.015 x 1 = 0.015 */ {{3, 3}, {{1, 128, 10, 10}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 14752000.}, /* GFLOPS 0.014 x 1 = 0.014 */ {{1, 1}, {{1, 288, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 14475776.}, /* GFLOPS 0.014 x 1 = 0.014 */ {{1, 1}, {{1, 512, 5, 5}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 13991250.}, /* GFLOPS 0.013 x 1 = 0.013 */ {{1, 1}, {{1, 144, 38, 38}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 13354112.}, /* GFLOPS 0.007 x 2 = 0.013 */ {{1, 1}, {{1, 16, 56, 56}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6623232.}, + /* GFLOPS 0.013 x 1 = 0.013 */ {{1, 1}, {{1, 512, 10, 10}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 13120000.}, /* GFLOPS 0.013 x 1 = 0.013 */ {{1, 1}, {{1, 832, 7, 7}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 13053600.}, /* GFLOPS 0.013 x 1 = 0.013 */ {{1, 1}, {{1, 832, 7, 7}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 13053600.}, /* GFLOPS 0.007 x 2 = 0.013 */ {{1, 1}, {{1, 32, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6522880.}, + /* GFLOPS 0.001 x 11 = 0.013 */ {{3, 3}, {{1, 64, 4, 4}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1180672.}, /* GFLOPS 0.006 x 2 = 0.013 */ {{1, 1}, {{1, 64, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6472704.}, /* GFLOPS 0.013 x 1 = 0.013 */ {{1, 1}, {{1, 128, 56, 56}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 12895232.}, /* GFLOPS 0.013 x 1 = 0.013 */ {{1, 1}, {{1, 256, 28, 28}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 12870144.}, @@ -394,6 +578,7 @@ static const ConvParam_t testConvolutionConfigs[] = { /* GFLOPS 0.012 x 1 = 0.012 */ {{1, 1}, {{1, 640, 6, 6}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 11805696.}, /* GFLOPS 0.012 x 1 = 0.012 */ {{1, 1}, {{1, 928, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 11647104.}, /* GFLOPS 0.011 x 1 = 0.011 */ {{1, 1}, {{1, 896, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 11245696.}, + /* GFLOPS 0.011 x 1 = 0.011 */ {{1, 1}, {{1, 256, 13, 13}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 11097216.}, /* GFLOPS 0.011 x 1 = 0.011 */ {{3, 3}, {{1, 256, 10, 10}}, 24, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 11061600.}, /* GFLOPS 0.006 x 2 = 0.011 */ {{3, 3}, {{1, 512, 5, 5}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 5530200.}, /* GFLOPS 0.011 x 1 = 0.011 */ {{1, 1}, {{1, 864, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 10844288.}, @@ -417,13 +602,13 @@ static const ConvParam_t testConvolutionConfigs[] = { /* GFLOPS 0.008 x 1 = 0.008 */ {{1, 1}, {{1, 608, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 7633024.}, /* GFLOPS 0.008 x 1 = 0.008 */ {{5, 5}, {{1, 16, 14, 14}}, 48, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 7535808.}, /* GFLOPS 0.008 x 1 = 0.008 */ {{5, 5}, {{1, 16, 14, 14}}, 48, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 7535808.}, - /* GFLOPS 0.004 x 2 = 0.007 */ {{3, 3}, {{1, 64, 5, 5}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 3689600.}, /* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 640, 6, 6}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 7378560.}, /* GFLOPS 0.004 x 2 = 0.007 */ {{1, 1}, {{1, 48, 14, 14}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 3650304.}, /* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 384, 14, 14}}, 48, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 7234752.}, /* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 576, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 7231616.}, /* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 256, 12, 12}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 7091712.}, /* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 544, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 6830208.}, + /* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 64, 8, 8}}, 810, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 6687360.}, /* GFLOPS 0.007 x 1 = 0.007 */ {{3, 3}, {{1, 160, 6, 6}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 6637824.}, /* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 528, 14, 14}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6629504.}, /* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 528, 14, 14}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 6629504.}, @@ -434,11 +619,13 @@ static const ConvParam_t testConvolutionConfigs[] = { /* GFLOPS 0.006 x 1 = 0.006 */ {{1, 1}, {{1, 512, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 6428800.}, /* GFLOPS 0.006 x 1 = 0.006 */ {{1, 1}, {{1, 512, 14, 14}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6428800.}, /* GFLOPS 0.006 x 1 = 0.006 */ {{1, 1}, {{1, 512, 14, 14}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 6428800.}, + /* GFLOPS 0.001 x 12 = 0.006 */ {{1, 1}, {{1, 64, 8, 8}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 528384.}, /* GFLOPS 0.006 x 1 = 0.006 */ {{3, 3}, {{1, 256, 10, 10}}, 12, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 5530800.}, /* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 192, 12, 12}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 5322240.}, /* GFLOPS 0.005 x 1 = 0.005 */ {{3, 3}, {{1, 128, 5, 5}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 5310720.}, /* GFLOPS 0.005 x 1 = 0.005 */ {{3, 3}, {{1, 128, 5, 5}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 5310720.}, /* GFLOPS 0.005 x 1 = 0.005 */ {{3, 3}, {{1, 128, 5, 5}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 5310720.}, + /* GFLOPS 0.005 x 1 = 0.005 */ {{3, 3}, {{1, 128, 5, 5}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 5310720.}, /* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 1024, 10, 10}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4917600.}, /* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 1024, 10, 10}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 4917600.}, /* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 192, 28, 28}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4829440.}, @@ -446,6 +633,7 @@ static const ConvParam_t testConvolutionConfigs[] = { /* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 256, 14, 14}}, 48, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4826304.}, /* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 512, 14, 14}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 4821600.}, /* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 508, 14, 14}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 4783968.}, + /* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 64, 32, 32}}, 36, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 4755456.}, /* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 64, 24, 24}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4755456.}, /* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 256, 12, 12}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4727808.}, /* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 1024, 3, 3}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4720896.}, @@ -455,6 +643,7 @@ static const ConvParam_t testConvolutionConfigs[] = { /* GFLOPS 0.004 x 1 = 0.004 */ {{1, 1}, {{1, 16, 128, 256}}, 4, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 4325376.}, /* GFLOPS 0.004 x 1 = 0.004 */ {{1, 1}, {{1, 64, 64, 128}}, 4, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 4227072.}, /* GFLOPS 0.004 x 1 = 0.004 */ {{1, 1}, {{1, 832, 7, 7}}, 48, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 3916080.}, + /* GFLOPS 0.004 x 1 = 0.004 */ {{3, 3}, {{1, 256, 1, 1}}, 804, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 3705636.}, /* GFLOPS 0.004 x 1 = 0.004 */ {{5, 5}, {{1, 16, 12, 12}}, 32, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 3691008.}, /* GFLOPS 0.004 x 1 = 0.004 */ {{3, 3}, {{1, 64, 10, 10}}, 128, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 3689600.}, /* GFLOPS 0.004 x 1 = 0.004 */ {{5, 5}, {{1, 32, 6, 6}}, 64, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 3688704.}, @@ -470,6 +659,7 @@ static const ConvParam_t testConvolutionConfigs[] = { /* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 480, 14, 14}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 3013696.}, /* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 320, 12, 12}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 2953728.}, /* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 640, 6, 6}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 2951424.}, + /* GFLOPS 0.003 x 1 = 0.003 */ {{3, 3}, {{1, 256, 5, 5}}, 24, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 2765400.}, /* GFLOPS 0.003 x 1 = 0.003 */ {{3, 3}, {{1, 128, 5, 5}}, 128, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 2655360.}, /* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 832, 7, 7}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 2610720.}, /* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 256, 3, 3}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 2520882.}, @@ -482,32 +672,46 @@ static const ConvParam_t testConvolutionConfigs[] = { /* GFLOPS 0.002 x 1 = 0.002 */ {{1, 1}, {{1, 508, 4, 4}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 2082816.}, /* GFLOPS 0.002 x 1 = 0.002 */ {{1, 1}, {{1, 1024, 1, 1}}, 1000, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 2049000.}, /* GFLOPS 0.001 x 2 = 0.002 */ {{3, 3}, {{1, 256, 3, 3}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 995544.}, - /* GFLOPS 0.001 x 2 = 0.002 */ {{3, 3}, {{1, 128, 5, 5}}, 16, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 922000.}, /* GFLOPS 0.002 x 1 = 0.002 */ {{1, 1}, {{1, 1024, 3, 3}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1770336.}, + /* GFLOPS 0.002 x 1 = 0.002 */ {{1, 1}, {{1, 64, 4, 4}}, 810, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 1671840.}, + /* GFLOPS 0.002 x 1 = 0.002 */ {{1, 1}, {{1, 32, 80, 80}}, 4, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1664000.}, + /* GFLOPS 0.002 x 1 = 0.002 */ {{1, 1}, {{1, 256, 5, 5}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1641600.}, /* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 640, 6, 6}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1475712.}, /* GFLOPS 0.001 x 1 = 0.001 */ {{3, 3}, {{1, 128, 5, 5}}, 24, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 1383000.}, + /* GFLOPS 0.001 x 1 = 0.001 */ {{3, 3}, {{1, 64, 5, 5}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1328256.}, /* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 736, 3, 3}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1272672.}, + /* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 64, 16, 16}}, 36, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 1188864.}, + /* GFLOPS 0.000 x 9 = 0.001 */ {{1, 1}, {{1, 64, 4, 4}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 132096.}, + /* GFLOPS 0.001 x 2 = 0.001 */ {{1, 1}, {{1, 256, 3, 3}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 590976.}, /* GFLOPS 0.001 x 2 = 0.001 */ {{1, 1}, {{1, 256, 3, 3}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 590976.}, /* GFLOPS 0.001 x 1 = 0.001 */ {{3, 3}, {{1, 128, 3, 3}}, 128, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1180160.}, /* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 256, 2, 2}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1120392.}, - /* GFLOPS 0.000 x 2 = 0.001 */ {{3, 3}, {{1, 128, 5, 5}}, 8, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 461000.}, /* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 192, 12, 12}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 887040.}, /* GFLOPS 0.000 x 2 = 0.001 */ {{3, 3}, {{1, 256, 2, 2}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 442464.}, - /* GFLOPS 0.000 x 2 = 0.001 */ {{1, 1}, {{1, 128, 5, 5}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 411200.}, + /* GFLOPS 0.000 x 2 = 0.001 */ {{1, 1}, {{1, 32, 80, 80}}, 1, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 416000.}, /* GFLOPS 0.001 x 1 = 0.001 */ {{3, 3}, {{1, 128, 5, 5}}, 12, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 691500.}, + /* GFLOPS 0.001 x 1 = 0.001 */ {{3, 3}, {{1, 256, 3, 3}}, 16, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 663696.}, /* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 640, 2, 2}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 655872.}, /* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 512, 5, 5}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 615000.}, /* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 512, 5, 5}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 615000.}, /* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 128, 3, 3}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 592128.}, - /* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 256, 3, 3}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 590976.}, /* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 256, 3, 3}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 590976.}, + /* GFLOPS 0.001 x 1 = 0.001 */ {{3, 3}, {{1, 128, 3, 3}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 590080.}, /* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 256, 3, 3}}, 126, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 581742.}, /* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 256, 4, 4}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 525312.}, + /* GFLOPS 0.000 x 4 = 0.000 */ {{1, 1}, {{1, 48, 1, 1}}, 1152, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 111744.}, + /* GFLOPS 0.000 x 4 = 0.000 */ {{1, 1}, {{1, 1152, 1, 1}}, 48, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 110640.}, + /* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 128, 5, 5}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 411200.}, + /* GFLOPS 0.000 x 1 = 0.000 */ {{3, 3}, {{1, 128, 3, 3}}, 16, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 331920.}, /* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 192, 5, 5}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 308000.}, + /* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 64, 8, 8}}, 36, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 297216.}, /* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 128, 2, 2}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 263168.}, /* GFLOPS 0.000 x 2 = 0.000 */ {{1, 1}, {{1, 256, 2, 2}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 131328.}, /* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 256, 2, 2}}, 126, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 258552.}, /* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 1024, 1, 1}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 196704.}, + /* GFLOPS 0.000 x 1 = 0.000 */ {{3, 3}, {{1, 128, 3, 3}}, 8, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 165960.}, + /* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 128, 3, 3}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 148032.}, + /* GFLOPS 0.000 x 1 = 0.000 */ {{3, 3}, {{1, 64, 3, 3}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 147584.}, /* GFLOPS 0.000 x 1 = 0.000 */ {{3, 3}, {{1, 64, 2, 2}}, 128, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 147584.}, /* GFLOPS 0.000 x 1 = 0.000 */ {{3, 3}, {{1, 64, 2, 2}}, 128, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 147584.}, /* GFLOPS 0.000 x 1 = 0.000 */ {{3, 3}, {{1, 64, 2, 2}}, 128, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 147584.}, @@ -515,16 +719,32 @@ static const ConvParam_t testConvolutionConfigs[] = { /* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 128, 1, 1}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 140322.}, /* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 256, 2, 2}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 131328.}, /* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 256, 2, 2}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 131328.}, + /* GFLOPS 0.000 x 3 = 0.000 */ {{1, 1}, {{1, 28, 1, 1}}, 672, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 38304.}, + /* GFLOPS 0.000 x 3 = 0.000 */ {{1, 1}, {{1, 672, 1, 1}}, 28, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 37660.}, /* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 256, 3, 3}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 110808.}, /* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 256, 3, 3}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 110808.}, /* GFLOPS 0.000 x 2 = 0.000 */ {{3, 3}, {{1, 128, 1, 1}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 55320.}, + /* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 64, 4, 4}}, 36, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 74304.}, /* GFLOPS 0.000 x 1 = 0.000 */ {{3, 3}, {{1, 64, 2, 2}}, 64, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 73792.}, + /* GFLOPS 0.000 x 1 = 0.000 */ {{3, 3}, {{1, 256, 1, 1}}, 16, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 73744.}, + /* GFLOPS 0.000 x 3 = 0.000 */ {{1, 1}, {{1, 20, 1, 1}}, 480, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 19680.}, + /* GFLOPS 0.000 x 3 = 0.000 */ {{1, 1}, {{1, 480, 1, 1}}, 20, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 19220.}, /* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 256, 2, 2}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 49248.}, /* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 256, 2, 2}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 49248.}, + /* GFLOPS 0.000 x 1 = 0.000 */ {{3, 3}, {{1, 128, 1, 1}}, 16, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 36880.}, /* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 128, 1, 1}}, 126, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 32382.}, + /* GFLOPS 0.000 x 1 = 0.000 */ {{3, 3}, {{1, 128, 1, 1}}, 8, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 18440.}, /* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 64, 1, 1}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 16512.}, + /* GFLOPS 0.000 x 2 = 0.000 */ {{1, 1}, {{1, 10, 1, 1}}, 240, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 5040.}, + /* GFLOPS 0.000 x 2 = 0.000 */ {{1, 1}, {{1, 240, 1, 1}}, 10, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 4810.}, /* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 128, 1, 1}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6168.}, - /* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 128, 1, 1}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 6168.} + /* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 128, 1, 1}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 6168.}, + /* GFLOPS 0.000 x 2 = 0.000 */ {{1, 1}, {{1, 6, 1, 1}}, 144, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1872.}, + /* GFLOPS 0.000 x 2 = 0.000 */ {{1, 1}, {{1, 144, 1, 1}}, 6, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1734.}, + /* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 4, 1, 1}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 864.}, + /* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 96, 1, 1}}, 4, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 772.}, + /* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 8, 1, 1}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 544.}, + /* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 32, 1, 1}}, 8, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 520.} }; struct ConvParamID { From 9d1e8b1e1ddef6b591d9cce41b625a0c8fb992a5 Mon Sep 17 00:00:00 2001 From: Suleyman TURKMEN Date: Sat, 11 Sep 2021 22:58:45 +0300 Subject: [PATCH 02/12] Update convexhull.cpp --- samples/cpp/convexhull.cpp | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/samples/cpp/convexhull.cpp b/samples/cpp/convexhull.cpp index bb34be2469..d839b8061f 100644 --- a/samples/cpp/convexhull.cpp +++ b/samples/cpp/convexhull.cpp @@ -38,23 +38,14 @@ int main( int argc, char** argv ) points.push_back(pt); } - vector hull; - convexHull(Mat(points), hull, true); + vector hull; + convexHull(points, hull, true); img = Scalar::all(0); for( i = 0; i < count; i++ ) circle(img, points[i], 3, Scalar(0, 0, 255), FILLED, LINE_AA); - int hullcount = (int)hull.size(); - Point pt0 = points[hull[hullcount-1]]; - - for( i = 0; i < hullcount; i++ ) - { - Point pt = points[hull[i]]; - line(img, pt0, pt, Scalar(0, 255, 0), 1,LINE_AA); - pt0 = pt; - } - + polylines(img, hull, true, Scalar(0, 255, 0), 1, LINE_AA); imshow("hull", img); char key = (char)waitKey(); From 6e66a9222a071892ea9f1c9c7951fdcc85e08f7b Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sat, 11 Sep 2021 22:26:52 +0000 Subject: [PATCH 03/12] dnn(onnx): fix format specifier --- modules/dnn/src/onnx/onnx_importer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 955c79c0fa..2cd36dc94d 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -443,7 +443,7 @@ void ONNXImporter::expandMid(const std::string& prefix, opencv_onnx::NodeProto& for (size_t j = 0; j < n; j++) { LayerParams copyLP; - copyLP.name = format("%s/copy_%d", prefix.c_str(), j); + copyLP.name = format("%s/copy_%d", prefix.c_str(), (int)j); copyLP.type = "Identity"; CV_Assert((layer_id.find(copyLP.name) == layer_id.end()) && "Couldn't copy the node: generated name already exists in the graph."); From 1618c963e4c89a816dfa6bfebb3e94764b357d61 Mon Sep 17 00:00:00 2001 From: Alexander Panov Date: Mon, 13 Sep 2021 19:27:00 +0300 Subject: [PATCH 04/12] Merge pull request #20676 from AleksandrPanov:delete_createConvexHull_convertTo * deleted dublicated createConvexHull and convertTo * replaced checkVector(2) with points.empty() --- .../imgproc/src/min_enclosing_triangle.cpp | 22 ++----------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/modules/imgproc/src/min_enclosing_triangle.cpp b/modules/imgproc/src/min_enclosing_triangle.cpp index 4853a755d9..2651871412 100644 --- a/modules/imgproc/src/min_enclosing_triangle.cpp +++ b/modules/imgproc/src/min_enclosing_triangle.cpp @@ -129,7 +129,6 @@ static bool areOnTheSameSideOfLine(const cv::Point2f &p1, const cv::Point2f &p2, static double areaOfTriangle(const cv::Point2f &a, const cv::Point2f &b, const cv::Point2f &c); -static void createConvexHull(cv::InputArray points, std::vector &polygon); static double distanceBtwPoints(const cv::Point2f &a, const cv::Point2f &b); @@ -319,29 +318,12 @@ namespace minEnclosingTriangle { static void findMinEnclosingTriangle(cv::InputArray points, CV_OUT cv::OutputArray triangle, CV_OUT double &area) { std::vector resultingTriangle, polygon; - - createConvexHull(points, polygon); + CV_Assert(!points.empty()); + convexHull(points, polygon, true, true); findMinEnclosingTriangle(polygon, resultingTriangle, area); cv::Mat(resultingTriangle).copyTo(triangle); } -//! Create the convex hull of the given set of points -/*! -* @param points The provided set of points -* @param polygon The polygon representing the convex hull of the points -*/ -static void createConvexHull(cv::InputArray points, std::vector &polygon) { - cv::Mat pointsMat = points.getMat(); - std::vector pointsVector; - - CV_Assert((pointsMat.checkVector(2) > 0) && - ((pointsMat.depth() == CV_32F) || (pointsMat.depth() == CV_32S))); - - pointsMat.convertTo(pointsVector, CV_32F); - - convexHull(pointsVector, polygon, true, true); -} - //! Find the minimum enclosing triangle and its area /*! * The overall complexity of the algorithm is theta(n) where "n" represents the number From 3385d386481962fb4b0619780c87fbf1c78d5c81 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Tue, 14 Sep 2021 04:15:35 +0000 Subject: [PATCH 05/12] cmake: fix handling of INF_ENGINE_RELEASE - default value should be handled earlier --- cmake/OpenCVDetectInferenceEngine.cmake | 27 +++++++++++++------------ 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/cmake/OpenCVDetectInferenceEngine.cmake b/cmake/OpenCVDetectInferenceEngine.cmake index 41951b710a..35640fa719 100644 --- a/cmake/OpenCVDetectInferenceEngine.cmake +++ b/cmake/OpenCVDetectInferenceEngine.cmake @@ -105,6 +105,20 @@ if(InferenceEngine_FOUND) message(STATUS "Detected InferenceEngine: cmake package (${InferenceEngine_VERSION})") endif() +if(DEFINED InferenceEngine_VERSION) + message(STATUS "InferenceEngine: ${InferenceEngine_VERSION}") + if(NOT INF_ENGINE_RELEASE AND NOT (InferenceEngine_VERSION VERSION_LESS "2021.4")) + math(EXPR INF_ENGINE_RELEASE_INIT "${InferenceEngine_VERSION_MAJOR} * 1000000 + ${InferenceEngine_VERSION_MINOR} * 10000 + ${InferenceEngine_VERSION_PATCH} * 100") + endif() +endif() +if(NOT INF_ENGINE_RELEASE AND NOT INF_ENGINE_RELEASE_INIT) + message(STATUS "WARNING: InferenceEngine version has not been set, 2021.4.1 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.") + set(INF_ENGINE_RELEASE_INIT "2021040100") +elseif(DEFINED INF_ENGINE_RELEASE) + set(INF_ENGINE_RELEASE_INIT "${INF_ENGINE_RELEASE}") +endif() +set(INF_ENGINE_RELEASE "${INF_ENGINE_RELEASE_INIT}" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)") + if(NOT INF_ENGINE_TARGET AND INF_ENGINE_LIB_DIRS AND INF_ENGINE_INCLUDE_DIRS) find_path(ie_custom_inc "inference_engine.hpp" PATHS "${INF_ENGINE_INCLUDE_DIRS}" NO_DEFAULT_PATH) if(CMAKE_BUILD_TYPE STREQUAL "Debug") @@ -140,19 +154,6 @@ endif() # Add more features to the target if(INF_ENGINE_TARGET) - if(DEFINED InferenceEngine_VERSION) - message(STATUS "InferenceEngine: ${InferenceEngine_VERSION}") - if(NOT INF_ENGINE_RELEASE AND NOT (InferenceEngine_VERSION VERSION_LESS "2021.4")) - math(EXPR INF_ENGINE_RELEASE_INIT "${InferenceEngine_VERSION_MAJOR} * 1000000 + ${InferenceEngine_VERSION_MINOR} * 10000 + ${InferenceEngine_VERSION_PATCH} * 100") - endif() - endif() - if(NOT INF_ENGINE_RELEASE AND NOT INF_ENGINE_RELEASE_INIT) - message(WARNING "InferenceEngine version has not been set, 2021.4.1 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.") - set(INF_ENGINE_RELEASE_INIT "2021040100") - elseif(DEFINED INF_ENGINE_RELEASE) - set(INF_ENGINE_RELEASE_INIT "${INF_ENGINE_RELEASE}") - endif() - set(INF_ENGINE_RELEASE "${INF_ENGINE_RELEASE_INIT}" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)") set_target_properties(${INF_ENGINE_TARGET} PROPERTIES INTERFACE_COMPILE_DEFINITIONS "HAVE_INF_ENGINE=1;INF_ENGINE_RELEASE=${INF_ENGINE_RELEASE}" ) From c410d7a97d03f2749d737b9e9232cd738fe09ed2 Mon Sep 17 00:00:00 2001 From: rogday Date: Tue, 14 Sep 2021 20:49:49 +0300 Subject: [PATCH 06/12] Merge pull request #20671 from rogday:yolov4x-mish Add support for YOLOv4x-mish * backport to 3.4 for supporting yolov4x-mish * add YOLOv4x-mish test * address review comments Co-authored-by: Guo Xu --- modules/dnn/src/darknet/darknet_io.cpp | 6 +- modules/dnn/src/layers/region_layer.cpp | 88 +++++++++++++++------- modules/dnn/test/test_darknet_importer.cpp | 72 ++++++++++++++++++ 3 files changed, 135 insertions(+), 31 deletions(-) diff --git a/modules/dnn/src/darknet/darknet_io.cpp b/modules/dnn/src/darknet/darknet_io.cpp index 4915538ff7..99715df829 100644 --- a/modules/dnn/src/darknet/darknet_io.cpp +++ b/modules/dnn/src/darknet/darknet_io.cpp @@ -470,7 +470,7 @@ namespace cv { fused_layer_names.push_back(last_layer); } - void setYolo(int classes, const std::vector& mask, const std::vector& anchors, float thresh, float nms_threshold, float scale_x_y) + void setYolo(int classes, const std::vector& mask, const std::vector& anchors, float thresh, float nms_threshold, float scale_x_y, int new_coords) { cv::dnn::LayerParams region_param; region_param.name = "Region-name"; @@ -484,6 +484,7 @@ namespace cv { region_param.set("thresh", thresh); region_param.set("nms_threshold", nms_threshold); region_param.set("scale_x_y", scale_x_y); + region_param.set("new_coords", new_coords); std::vector usedAnchors(numAnchors * 2); for (int i = 0; i < numAnchors; ++i) @@ -882,6 +883,7 @@ namespace cv { float thresh = getParam(layer_params, "thresh", 0.2); float nms_threshold = getParam(layer_params, "nms_threshold", 0.0); float scale_x_y = getParam(layer_params, "scale_x_y", 1.0); + int new_coords = getParam(layer_params, "new_coords", 0); std::string anchors_values = getParam(layer_params, "anchors", std::string()); CV_Assert(!anchors_values.empty()); @@ -894,7 +896,7 @@ namespace cv { CV_Assert(classes > 0 && num_of_anchors > 0 && (num_of_anchors * 2) == anchors_vec.size()); setParams.setPermute(false); - setParams.setYolo(classes, mask_vec, anchors_vec, thresh, nms_threshold, scale_x_y); + setParams.setYolo(classes, mask_vec, anchors_vec, thresh, nms_threshold, scale_x_y, new_coords); } else { CV_Error(cv::Error::StsParseError, "Unknown layer type: " + layer_type); diff --git a/modules/dnn/src/layers/region_layer.cpp b/modules/dnn/src/layers/region_layer.cpp index 4a8cb724d6..578b0d7dec 100644 --- a/modules/dnn/src/layers/region_layer.cpp +++ b/modules/dnn/src/layers/region_layer.cpp @@ -64,6 +64,7 @@ class RegionLayerImpl CV_FINAL : public RegionLayer public: int coords, classes, anchors, classfix; float thresh, nmsThreshold, scale_x_y; + int new_coords; bool useSoftmax, useLogistic; #ifdef HAVE_OPENCL UMat blob_umat; @@ -83,6 +84,7 @@ public: useLogistic = params.get("logistic", false); nmsThreshold = params.get("nms_threshold", 0.4); scale_x_y = params.get("scale_x_y", 1.0); // Yolov4 + new_coords = params.get("new_coords", 0); // Yolov4x-mish CV_Assert(nmsThreshold >= 0.); CV_Assert(coords == 4); @@ -113,7 +115,7 @@ public: { #ifdef HAVE_DNN_NGRAPH if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - return INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2020_2) && preferableTarget != DNN_TARGET_MYRIAD; + return INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2020_2) && preferableTarget != DNN_TARGET_MYRIAD && new_coords == 0; #endif return backendId == DNN_BACKEND_OPENCV; } @@ -259,26 +261,28 @@ public: const float *srcData = inpBlob.ptr(); float *dstData = outBlob.ptr(); - // logistic activation for t0, for each grid cell (X x Y x Anchor-index) - for (int i = 0; i < batch_size*rows*cols*anchors; ++i) { - int index = cell_size*i; - float x = srcData[index + 4]; - dstData[index + 4] = logistic_activate(x); // logistic activation - } - - if (useSoftmax) { // Yolo v2 + if (new_coords == 0) { + // logistic activation for t0, for each grid cell (X x Y x Anchor-index) for (int i = 0; i < batch_size*rows*cols*anchors; ++i) { int index = cell_size*i; - softmax_activate(srcData + index + 5, classes, 1, dstData + index + 5); + float x = srcData[index + 4]; + dstData[index + 4] = logistic_activate(x); // logistic activation } - } - else if (useLogistic) { // Yolo v3 - for (int i = 0; i < batch_size*rows*cols*anchors; ++i){ - int index = cell_size*i; - const float* input = srcData + index + 5; - float* output = dstData + index + 5; - for (int c = 0; c < classes; ++c) - output[c] = logistic_activate(input[c]); + + if (useSoftmax) { // Yolo v2 + for (int i = 0; i < batch_size*rows*cols*anchors; ++i) { + int index = cell_size*i; + softmax_activate(srcData + index + 5, classes, 1, dstData + index + 5); + } + } + else if (useLogistic) { // Yolo v3 + for (int i = 0; i < batch_size*rows*cols*anchors; ++i){ + int index = cell_size*i; + const float* input = srcData + index + 5; + float* output = dstData + index + 5; + for (int c = 0; c < classes; ++c) + output[c] = logistic_activate(input[c]); + } } } for (int b = 0; b < batch_size; ++b) @@ -290,20 +294,46 @@ public: int index = (y*cols + x)*anchors + a; // index for each grid-cell & anchor int p_index = index_sample_offset + index * cell_size + 4; float scale = dstData[p_index]; - if (classfix == -1 && scale < .5) scale = 0; // if(t0 < 0.5) t0 = 0; + if (classfix == -1 && scale < .5) + { + scale = 0; // if(t0 < 0.5) t0 = 0; + } int box_index = index_sample_offset + index * cell_size; - float x_tmp = (logistic_activate(srcData[box_index + 0]) - 0.5f) * scale_x_y + 0.5f; - float y_tmp = (logistic_activate(srcData[box_index + 1]) - 0.5f) * scale_x_y + 0.5f; - dstData[box_index + 0] = (x + x_tmp) / cols; - dstData[box_index + 1] = (y + y_tmp) / rows; - dstData[box_index + 2] = exp(srcData[box_index + 2]) * biasData[2 * a] / wNorm; - dstData[box_index + 3] = exp(srcData[box_index + 3]) * biasData[2 * a + 1] / hNorm; + if (new_coords == 1) { + float x_tmp = (srcData[box_index + 0] - 0.5f) * scale_x_y + 0.5f; + float y_tmp = (srcData[box_index + 1] - 0.5f) * scale_x_y + 0.5f; + dstData[box_index + 0] = (x + x_tmp) / cols; + dstData[box_index + 1] = (y + y_tmp) / rows; + dstData[box_index + 2] = (srcData[box_index + 2]) * (srcData[box_index + 2]) * 4 * biasData[2 * a] / wNorm; + dstData[box_index + 3] = (srcData[box_index + 3]) * (srcData[box_index + 3]) * 4 * biasData[2 * a + 1] / hNorm; - int class_index = index_sample_offset + index * cell_size + 5; - for (int j = 0; j < classes; ++j) { - float prob = scale*dstData[class_index + j]; // prob = IoU(box, object) = t0 * class-probability - dstData[class_index + j] = (prob > thresh) ? prob : 0; // if (IoU < threshold) IoU = 0; + scale = srcData[p_index]; + if (classfix == -1 && scale < thresh) + { + scale = 0; // if(t0 < 0.5) t0 = 0; + } + + int class_index = index_sample_offset + index * cell_size + 5; + for (int j = 0; j < classes; ++j) { + float prob = scale*srcData[class_index + j]; // prob = IoU(box, object) = t0 * class-probability + dstData[class_index + j] = (prob > thresh) ? prob : 0; // if (IoU < threshold) IoU = 0; + } + } + else + { + float x_tmp = (logistic_activate(srcData[box_index + 0]) - 0.5f) * scale_x_y + 0.5f; + float y_tmp = (logistic_activate(srcData[box_index + 1]) - 0.5f) * scale_x_y + 0.5f; + dstData[box_index + 0] = (x + x_tmp) / cols; + dstData[box_index + 1] = (y + y_tmp) / rows; + dstData[box_index + 2] = exp(srcData[box_index + 2]) * biasData[2 * a] / wNorm; + dstData[box_index + 3] = exp(srcData[box_index + 3]) * biasData[2 * a + 1] / hNorm; + + int class_index = index_sample_offset + index * cell_size + 5; + for (int j = 0; j < classes; ++j) { + float prob = scale*dstData[class_index + j]; // prob = IoU(box, object) = t0 * class-probability + dstData[class_index + j] = (prob > thresh) ? prob : 0; // if (IoU < threshold) IoU = 0; + } } } if (nmsThreshold > 0) { diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp index 9983d99ef5..ea700573e6 100644 --- a/modules/dnn/test/test_darknet_importer.cpp +++ b/modules/dnn/test/test_darknet_importer.cpp @@ -681,6 +681,78 @@ TEST_P(Test_Darknet_nets, YOLOv4_tiny) #endif } +TEST_P(Test_Darknet_nets, YOLOv4x_mish) +{ + applyTestTag(CV_TEST_TAG_LONG, (target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_1GB : CV_TEST_TAG_MEMORY_2GB)); + +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif +#if defined(INF_ENGINE_RELEASE) + if (target == DNN_TARGET_MYRIAD) // NC_OUT_OF_MEMORY + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + + // batchId, classId, confidence, left, top, right, bottom + const int N0 = 3; + const int N1 = 5; + static const float ref_[/* (N0 + N1) * 7 */] = { +0, 16, 0.925536f, 0.17188f, 0.386832f, 0.406138f, 0.941696f, +0, 1, 0.912028f, 0.162125f, 0.208863f, 0.741316f, 0.729332f, +0, 7, 0.841018f, 0.608953f, 0.128653f, 0.900692f, 0.295657f, + +1, 2, 0.925697f, 0.650438f, 0.458118f, 0.813927f, 0.661775f, +1, 0, 0.882156f, 0.203644f, 0.365763f, 0.265473f, 0.632195f, +1, 2, 0.848857f, 0.451044f, 0.462997f, 0.496629f, 0.522719f, +1, 9, 0.736015f, 0.374503f, 0.316029f, 0.399358f, 0.392883f, +1, 9, 0.727129f, 0.662469f, 0.373687f, 0.687877f, 0.441335f, + }; + Mat ref(N0 + N1, 7, CV_32FC1, (void*)ref_); + + double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.006 : 8e-5; + double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.042 : 3e-4; + + std::string config_file = "yolov4x-mish.cfg"; + std::string weights_file = "yolov4x-mish.weights"; + +#if defined(INF_ENGINE_RELEASE) + if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || + backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD && + getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) + { + scoreDiff = 0.04; + iouDiff = 0.2; + } +#endif + + { + SCOPED_TRACE("batch size 1"); + testDarknetModel(config_file, weights_file, ref.rowRange(0, N0), scoreDiff, iouDiff); + } + + { + SCOPED_TRACE("batch size 2"); + +#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + if (target == DNN_TARGET_OPENCL) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + else if (target == DNN_TARGET_OPENCL_FP16 && INF_ENGINE_VER_MAJOR_LE(202010000)) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + else if (target == DNN_TARGET_MYRIAD && + getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X); + } +#endif + + testDarknetModel(config_file, weights_file, ref, scoreDiff, iouDiff); + } +} + INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_nets, dnnBackendsAndTargets()); From 9c5d7716e273284d4973eefdef079436c7caaa9d Mon Sep 17 00:00:00 2001 From: SamFC10 Date: Fri, 17 Sep 2021 17:40:57 +0530 Subject: [PATCH 07/12] fix for unsqueeze opset version 13 --- modules/dnn/src/onnx/onnx_importer.cpp | 12 ++++++++++-- modules/dnn/test/test_onnx_importer.cpp | 1 + 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 2cd36dc94d..5343c05361 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -1693,8 +1693,16 @@ void ONNXImporter::parseFlatten(LayerParams& layerParams, const opencv_onnx::Nod void ONNXImporter::parseUnsqueeze(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { - CV_Assert(node_proto.input_size() == 1); - DictValue axes = layerParams.get("axes"); + CV_Assert(node_proto.input_size() == 1 || node_proto.input_size() == 2); + DictValue axes; + if (node_proto.input_size() == 2) + { + Mat blob = getBlob(node_proto, 1); + axes = DictValue::arrayInt(blob.ptr(), blob.total()); + } + else + axes = layerParams.get("axes"); + if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) { // Constant input. diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index f55510ec7b..b4ecd4601c 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -605,6 +605,7 @@ TEST_P(Test_ONNX_layers, DynamicReshape) TEST_P(Test_ONNX_layers, Reshape) { testONNXModels("unsqueeze"); + testONNXModels("unsqueeze_opset_13"); } TEST_P(Test_ONNX_layers, Squeeze) From f7b4b750d8930b5bb6696cea6d609dc70a0597db Mon Sep 17 00:00:00 2001 From: mikael Date: Tue, 21 Sep 2021 19:46:33 +0200 Subject: [PATCH 08/12] Detect FP16 on FreeBSD aarch64 --- modules/core/src/system.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index d8b8f67559..df9e8a0ce7 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -533,7 +533,7 @@ struct HWFeatures } #endif // CV_CPUID_X86 - #if defined __ANDROID__ || defined __linux__ + #if defined __ANDROID__ || defined __linux__ || defined __FreeBSD__ #ifdef __aarch64__ have[CV_CPU_NEON] = true; have[CV_CPU_FP16] = true; @@ -559,7 +559,7 @@ struct HWFeatures CV_LOG_INFO(NULL, "- FP16 instructions is NOT enabled via build flags"); #endif #endif - #elif defined __arm__ + #elif defined __arm__ && !defined __FreeBSD__ int cpufile = open("/proc/self/auxv", O_RDONLY); if (cpufile >= 0) From 86a51015b1b5e1d2c555aff6abdf0f50d4ed3f5a Mon Sep 17 00:00:00 2001 From: Amir Tulegenov Date: Wed, 22 Sep 2021 20:42:32 +0600 Subject: [PATCH 09/12] Merge pull request #19554 from amirtu:OCV-215_cvtColorTwoPlane_wrong_output_when_Y_Plane_Mat_has_step different paddings in cvtColorTwoPlane() for biplane YUV420 * Different paddings support in cvtColorTwoPlane() for biplane YUV420 * Build fix for dispatch case. * Resoted old behaviour for y.step==uv.step to exclude perf regressions. Co-authored-by: amir.tulegenov Co-authored-by: Alexander Smorkalov --- 3rdparty/carotene/hal/tegra_hal.hpp | 39 ++++++++------- 3rdparty/openvx/hal/openvx_hal.cpp | 14 ++++-- 3rdparty/openvx/hal/openvx_hal.hpp | 3 ++ .../include/opencv2/imgproc/hal/hal.hpp | 5 ++ modules/imgproc/src/color_yuv.dispatch.cpp | 40 +++++++++++---- modules/imgproc/src/color_yuv.simd.hpp | 49 ++++++++----------- modules/imgproc/src/hal_replacement.hpp | 35 +++++++++++++ modules/imgproc/test/test_color.cpp | 34 +++++++++---- 8 files changed, 149 insertions(+), 70 deletions(-) diff --git a/3rdparty/carotene/hal/tegra_hal.hpp b/3rdparty/carotene/hal/tegra_hal.hpp index 2dfa30b619..7fcca975fe 100644 --- a/3rdparty/carotene/hal/tegra_hal.hpp +++ b/3rdparty/carotene/hal/tegra_hal.hpp @@ -1778,30 +1778,30 @@ TegraCvtColor_Invoker(bgrx2hsvf, bgrx2hsv, src_data + static_cast(range. : CV_HAL_ERROR_NOT_IMPLEMENTED \ ) -#define TEGRA_CVT2PYUVTOBGR(src_data, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx) \ +#define TEGRA_CVT2PYUVTOBGR_EX(y_data, y_step, uv_data, uv_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx) \ ( \ CAROTENE_NS::isSupportedConfiguration() ? \ dcn == 3 ? \ uIdx == 0 ? \ (swapBlue ? \ CAROTENE_NS::yuv420i2rgb(CAROTENE_NS::Size2D(dst_width, dst_height), \ - src_data, src_step, \ - src_data + src_step * dst_height, src_step, \ + y_data, y_step, \ + uv_data, uv_step, \ dst_data, dst_step) : \ CAROTENE_NS::yuv420i2bgr(CAROTENE_NS::Size2D(dst_width, dst_height), \ - src_data, src_step, \ - src_data + src_step * dst_height, src_step, \ + y_data, y_step, \ + uv_data, uv_step, \ dst_data, dst_step)), \ CV_HAL_ERROR_OK : \ uIdx == 1 ? \ (swapBlue ? \ CAROTENE_NS::yuv420sp2rgb(CAROTENE_NS::Size2D(dst_width, dst_height), \ - src_data, src_step, \ - src_data + src_step * dst_height, src_step, \ + y_data, y_step, \ + uv_data, uv_step, \ dst_data, dst_step) : \ CAROTENE_NS::yuv420sp2bgr(CAROTENE_NS::Size2D(dst_width, dst_height), \ - src_data, src_step, \ - src_data + src_step * dst_height, src_step, \ + y_data, y_step, \ + uv_data, uv_step, \ dst_data, dst_step)), \ CV_HAL_ERROR_OK : \ CV_HAL_ERROR_NOT_IMPLEMENTED : \ @@ -1809,29 +1809,32 @@ TegraCvtColor_Invoker(bgrx2hsvf, bgrx2hsv, src_data + static_cast(range. uIdx == 0 ? \ (swapBlue ? \ CAROTENE_NS::yuv420i2rgbx(CAROTENE_NS::Size2D(dst_width, dst_height), \ - src_data, src_step, \ - src_data + src_step * dst_height, src_step, \ + y_data, y_step, \ + uv_data, uv_step, \ dst_data, dst_step) : \ CAROTENE_NS::yuv420i2bgrx(CAROTENE_NS::Size2D(dst_width, dst_height), \ - src_data, src_step, \ - src_data + src_step * dst_height, src_step, \ + y_data, y_step, \ + uv_data, uv_step, \ dst_data, dst_step)), \ CV_HAL_ERROR_OK : \ uIdx == 1 ? \ (swapBlue ? \ CAROTENE_NS::yuv420sp2rgbx(CAROTENE_NS::Size2D(dst_width, dst_height), \ - src_data, src_step, \ - src_data + src_step * dst_height, src_step, \ + y_data, y_step, \ + uv_data, uv_step, \ dst_data, dst_step) : \ CAROTENE_NS::yuv420sp2bgrx(CAROTENE_NS::Size2D(dst_width, dst_height), \ - src_data, src_step, \ - src_data + src_step * dst_height, src_step, \ + y_data, y_step, \ + uv_data, uv_step, \ dst_data, dst_step)), \ CV_HAL_ERROR_OK : \ CV_HAL_ERROR_NOT_IMPLEMENTED : \ CV_HAL_ERROR_NOT_IMPLEMENTED \ : CV_HAL_ERROR_NOT_IMPLEMENTED \ ) +#define TEGRA_CVT2PYUVTOBGR(src_data, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx) \ + TEGRA_CVT2PYUVTOBGR_EX(src_data, src_step, src_data + src_step * dst_height, src_step, dst_data, dst_step, \ + dst_width, dst_height, dcn, swapBlue, uIdx); #undef cv_hal_cvtBGRtoBGR #define cv_hal_cvtBGRtoBGR TEGRA_CVTBGRTOBGR @@ -1847,6 +1850,8 @@ TegraCvtColor_Invoker(bgrx2hsvf, bgrx2hsv, src_data + static_cast(range. #define cv_hal_cvtBGRtoHSV TEGRA_CVTBGRTOHSV #undef cv_hal_cvtTwoPlaneYUVtoBGR #define cv_hal_cvtTwoPlaneYUVtoBGR TEGRA_CVT2PYUVTOBGR +#undef cv_hal_cvtTwoPlaneYUVtoBGREx +#define cv_hal_cvtTwoPlaneYUVtoBGREx TEGRA_CVT2PYUVTOBGR_EX #endif // OPENCV_IMGPROC_HAL_INTERFACE_H diff --git a/3rdparty/openvx/hal/openvx_hal.cpp b/3rdparty/openvx/hal/openvx_hal.cpp index 53a2711d52..4987482392 100644 --- a/3rdparty/openvx/hal/openvx_hal.cpp +++ b/3rdparty/openvx/hal/openvx_hal.cpp @@ -923,6 +923,11 @@ int ovx_hal_cvtGraytoBGR(const uchar * a, size_t astep, uchar * b, size_t bstep, } int ovx_hal_cvtTwoPlaneYUVtoBGR(const uchar * a, size_t astep, uchar * b, size_t bstep, int w, int h, int bcn, bool swapBlue, int uIdx) +{ + return ovx_hal_cvtTwoPlaneYUVtoBGREx(a, astep, a + h * astep, astep, b, bstep, w, h, bcn, swapBlue, uIdx); +} + +int ovx_hal_cvtTwoPlaneYUVtoBGREx(const uchar * a, size_t astep, const uchar * b, size_t bstep, uchar * c, size_t cstep, int w, int h, int bcn, bool swapBlue, int uIdx) { if (skipSmallImages(w, h)) return CV_HAL_ERROR_NOT_IMPLEMENTED; @@ -933,8 +938,7 @@ int ovx_hal_cvtTwoPlaneYUVtoBGR(const uchar * a, size_t astep, uchar * b, size_t if (w & 1 || h & 1) // It's not described in spec but sample implementation unable to convert odd sized images return CV_HAL_ERROR_NOT_IMPLEMENTED; - refineStep(w, h, uIdx ? VX_DF_IMAGE_NV21 : VX_DF_IMAGE_NV12, astep); - refineStep(w, h, bcn == 3 ? VX_DF_IMAGE_RGB : VX_DF_IMAGE_RGBX, bstep); + try { ivx::Context ctx = getOpenVXHALContext(); @@ -943,8 +947,8 @@ int ovx_hal_cvtTwoPlaneYUVtoBGR(const uchar * a, size_t astep, uchar * b, size_t std::vector ptrs; addr.push_back(ivx::Image::createAddressing(w, h, 1, (vx_int32)astep)); ptrs.push_back((void*)a); - addr.push_back(ivx::Image::createAddressing(w / 2, h / 2, 2, (vx_int32)astep)); - ptrs.push_back((void*)(a + h * astep)); + addr.push_back(ivx::Image::createAddressing(w / 2, h / 2, 2, (vx_int32)bstep)); + ptrs.push_back((void*)b); vxImage ia = ivx::Image::createFromHandle(ctx, uIdx ? VX_DF_IMAGE_NV21 : VX_DF_IMAGE_NV12, addr, ptrs); @@ -952,7 +956,7 @@ int ovx_hal_cvtTwoPlaneYUVtoBGR(const uchar * a, size_t astep, uchar * b, size_t return CV_HAL_ERROR_NOT_IMPLEMENTED; // OpenCV store NV12/NV21 as RANGE_RESTRICTED while OpenVX expect RANGE_FULL vxImage ib = ivx::Image::createFromHandle(ctx, bcn == 3 ? VX_DF_IMAGE_RGB : VX_DF_IMAGE_RGBX, - ivx::Image::createAddressing(w, h, bcn, (vx_int32)bstep), b); + ivx::Image::createAddressing(w, h, bcn, (vx_int32)cstep), c); ivx::IVX_CHECK_STATUS(vxuColorConvert(ctx, ia, ib)); } catch (ivx::RuntimeError & e) diff --git a/3rdparty/openvx/hal/openvx_hal.hpp b/3rdparty/openvx/hal/openvx_hal.hpp index c94cde3158..8b5a22f7de 100644 --- a/3rdparty/openvx/hal/openvx_hal.hpp +++ b/3rdparty/openvx/hal/openvx_hal.hpp @@ -49,6 +49,7 @@ int ovx_hal_morph(cvhalFilter2D *filter_context, uchar *a, size_t astep, uchar * int ovx_hal_cvtBGRtoBGR(const uchar * a, size_t astep, uchar * b, size_t bstep, int w, int h, int depth, int acn, int bcn, bool swapBlue); int ovx_hal_cvtGraytoBGR(const uchar * a, size_t astep, uchar * b, size_t bstep, int w, int h, int depth, int bcn); int ovx_hal_cvtTwoPlaneYUVtoBGR(const uchar * a, size_t astep, uchar * b, size_t bstep, int w, int h, int bcn, bool swapBlue, int uIdx); +int ovx_hal_cvtTwoPlaneYUVtoBGREx(const uchar * a, size_t astep, const uchar * b, size_t bstep, uchar * c, size_t cstep, int w, int h, int bcn, bool swapBlue, int uIdx); int ovx_hal_cvtThreePlaneYUVtoBGR(const uchar * a, size_t astep, uchar * b, size_t bstep, int w, int h, int bcn, bool swapBlue, int uIdx); int ovx_hal_cvtBGRtoThreePlaneYUV(const uchar * a, size_t astep, uchar * b, size_t bstep, int w, int h, int acn, bool swapBlue, int uIdx); int ovx_hal_cvtOnePlaneYUVtoBGR(const uchar * a, size_t astep, uchar * b, size_t bstep, int w, int h, int bcn, bool swapBlue, int uIdx, int ycn); @@ -130,6 +131,8 @@ int ovx_hal_integral(int depth, int sdepth, int, const uchar * a, size_t astep, #define cv_hal_cvtGraytoBGR ovx_hal_cvtGraytoBGR #undef cv_hal_cvtTwoPlaneYUVtoBGR #define cv_hal_cvtTwoPlaneYUVtoBGR ovx_hal_cvtTwoPlaneYUVtoBGR +#undef cv_hal_cvtTwoPlaneYUVtoBGREx +#define cv_hal_cvtTwoPlaneYUVtoBGREx ovx_hal_cvtTwoPlaneYUVtoBGREx #undef cv_hal_cvtThreePlaneYUVtoBGR #define cv_hal_cvtThreePlaneYUVtoBGR ovx_hal_cvtThreePlaneYUVtoBGR #undef cv_hal_cvtBGRtoThreePlaneYUV diff --git a/modules/imgproc/include/opencv2/imgproc/hal/hal.hpp b/modules/imgproc/include/opencv2/imgproc/hal/hal.hpp index a435fd6b85..9e5728d0c6 100644 --- a/modules/imgproc/include/opencv2/imgproc/hal/hal.hpp +++ b/modules/imgproc/include/opencv2/imgproc/hal/hal.hpp @@ -198,6 +198,11 @@ CV_EXPORTS void cvtTwoPlaneYUVtoBGR(const uchar * y_data, const uchar * uv_data, int dst_width, int dst_height, int dcn, bool swapBlue, int uIdx); +CV_EXPORTS void cvtTwoPlaneYUVtoBGR(const uchar * y_data, size_t y_step, const uchar * uv_data, size_t uv_step, + uchar * dst_data, size_t dst_step, + int dst_width, int dst_height, + int dcn, bool swapBlue, int uIdx); + CV_EXPORTS void cvtThreePlaneYUVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int dst_width, int dst_height, diff --git a/modules/imgproc/src/color_yuv.dispatch.cpp b/modules/imgproc/src/color_yuv.dispatch.cpp index 18b2096680..cac4fa1b41 100644 --- a/modules/imgproc/src/color_yuv.dispatch.cpp +++ b/modules/imgproc/src/color_yuv.dispatch.cpp @@ -124,8 +124,9 @@ void cvtTwoPlaneYUVtoBGR(const uchar * src_data, size_t src_step, CALL_HAL(cvtTwoPlaneYUVtoBGR, cv_hal_cvtTwoPlaneYUVtoBGR, src_data, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx); - CV_CPU_DISPATCH(cvtTwoPlaneYUVtoBGR, (src_data, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx), - CV_CPU_DISPATCH_MODES_ALL); + cvtTwoPlaneYUVtoBGR( + src_data, src_step, src_data + src_step * dst_height, src_step, dst_data, dst_step, + dst_width, dst_height, dcn, swapBlue, uIdx); } void cvtTwoPlaneYUVtoBGR(const uchar * y_data, const uchar * uv_data, size_t src_step, @@ -135,7 +136,20 @@ void cvtTwoPlaneYUVtoBGR(const uchar * y_data, const uchar * uv_data, size_t src { CV_INSTRUMENT_REGION(); - CV_CPU_DISPATCH(cvtTwoPlaneYUVtoBGR, (y_data, uv_data, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx), + cvtTwoPlaneYUVtoBGR(y_data, src_step, uv_data, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx); +} + +void cvtTwoPlaneYUVtoBGR(const uchar * y_data, size_t y_step, const uchar * uv_data, size_t uv_step, + uchar * dst_data, size_t dst_step, + int dst_width, int dst_height, + int dcn, bool swapBlue, int uIdx) +{ + CV_INSTRUMENT_REGION(); + + CALL_HAL(cvtTwoPlaneYUVtoBGREx, cv_hal_cvtTwoPlaneYUVtoBGREx, + y_data, y_step, uv_data, uv_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx); + + CV_CPU_DISPATCH(cvtTwoPlaneYUVtoBGR, (y_data, y_step, uv_data, uv_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx), CV_CPU_DISPATCH_MODES_ALL); } @@ -172,7 +186,8 @@ void cvtBGRtoTwoPlaneYUV(const uchar * src_data, size_t src_step, { CV_INSTRUMENT_REGION(); - // TODO: add hal replacement method + CALL_HAL(cvtBGRtoTwoPlaneYUV, cv_hal_cvtBGRtoTwoPlaneYUV, + src_data, src_step, y_data, dst_step, uv_data, dst_step, width, height, scn, swapBlue, uIdx); CV_CPU_DISPATCH(cvtBGRtoTwoPlaneYUV, (src_data, src_step, y_data, uv_data, dst_step, width, height, scn, swapBlue, uIdx), CV_CPU_DISPATCH_MODES_ALL); @@ -406,14 +421,21 @@ void cvtColorTwoPlaneYUV2BGRpair( InputArray _ysrc, InputArray _uvsrc, OutputArr Mat ysrc = _ysrc.getMat(), uvsrc = _uvsrc.getMat(); - CV_CheckEQ(ysrc.step, uvsrc.step, ""); - _dst.create( ysz, CV_MAKETYPE(depth, dcn)); Mat dst = _dst.getMat(); - hal::cvtTwoPlaneYUVtoBGR(ysrc.data, uvsrc.data, ysrc.step, - dst.data, dst.step, dst.cols, dst.rows, - dcn, swapb, uidx); + if(ysrc.step == uvsrc.step) + { + hal::cvtTwoPlaneYUVtoBGR(ysrc.data, uvsrc.data, ysrc.step, + dst.data, dst.step, dst.cols, dst.rows, + dcn, swapb, uidx); + } + else + { + hal::cvtTwoPlaneYUVtoBGR(ysrc.data, ysrc.step, uvsrc.data, uvsrc.step, + dst.data, dst.step, dst.cols, dst.rows, + dcn, swapb, uidx); + } } } // namespace cv diff --git a/modules/imgproc/src/color_yuv.simd.hpp b/modules/imgproc/src/color_yuv.simd.hpp index 076d1a4bd5..196a03b995 100644 --- a/modules/imgproc/src/color_yuv.simd.hpp +++ b/modules/imgproc/src/color_yuv.simd.hpp @@ -17,11 +17,7 @@ void cvtYUVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int dcn, bool swapBlue, bool isCbCr); -void cvtTwoPlaneYUVtoBGR(const uchar * src_data, size_t src_step, - uchar * dst_data, size_t dst_step, - int dst_width, int dst_height, - int dcn, bool swapBlue, int uIdx); -void cvtTwoPlaneYUVtoBGR(const uchar * y_data, const uchar * uv_data, size_t src_step, +void cvtTwoPlaneYUVtoBGR(const uchar * y_data, size_t y_step, const uchar * uv_data, size_t uv_step, uchar * dst_data, size_t dst_step, int dst_width, int dst_height, int dcn, bool swapBlue, int uIdx); @@ -1177,24 +1173,28 @@ struct YUV420sp2RGB8Invoker : ParallelLoopBody uchar * dst_data; size_t dst_step; int width; - const uchar* my1, *muv; - size_t stride; + const uchar* my1; + size_t my1_step; + const uchar* muv; + size_t muv_step; - YUV420sp2RGB8Invoker(uchar * _dst_data, size_t _dst_step, int _dst_width, size_t _stride, const uchar* _y1, const uchar* _uv) - : dst_data(_dst_data), dst_step(_dst_step), width(_dst_width), my1(_y1), muv(_uv), stride(_stride) {} + YUV420sp2RGB8Invoker(uchar * _dst_data, size_t _dst_step, int _dst_width, + const uchar* _y1, size_t _y1_step, const uchar* _uv, size_t _uv_step) : + dst_data(_dst_data), dst_step(_dst_step), width(_dst_width), + my1(_y1), my1_step(_y1_step), muv(_uv), muv_step(_uv_step) {} void operator()(const Range& range) const CV_OVERRIDE { const int rangeBegin = range.start * 2; const int rangeEnd = range.end * 2; - const uchar* y1 = my1 + rangeBegin * stride, *uv = muv + rangeBegin * stride / 2; + const uchar* y1 = my1 + rangeBegin * my1_step, *uv = muv + rangeBegin * muv_step / 2; - for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += stride * 2, uv += stride) + for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += my1_step * 2, uv += muv_step) { uchar* row1 = dst_data + dst_step * j; uchar* row2 = dst_data + dst_step * (j + 1); - const uchar* y2 = y1 + stride; + const uchar* y2 = y1 + my1_step; int i = 0; #if CV_SIMD @@ -1395,9 +1395,10 @@ struct YUV420p2RGB8Invoker : ParallelLoopBody #define MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION (320*240) template -inline void cvtYUV420sp2RGB(uchar * dst_data, size_t dst_step, int dst_width, int dst_height, size_t _stride, const uchar* _y1, const uchar* _uv) +inline void cvtYUV420sp2RGB(uchar * dst_data, size_t dst_step, int dst_width, int dst_height, + const uchar* _y1, size_t _y1_step, const uchar* _uv, size_t _uv_step) { - YUV420sp2RGB8Invoker converter(dst_data, dst_step, dst_width, _stride, _y1, _uv); + YUV420sp2RGB8Invoker converter(dst_data, dst_step, dst_width, _y1, _y1_step, _uv, _uv_step); if (dst_width * dst_height >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION) parallel_for_(Range(0, dst_height/2), converter); else @@ -1817,26 +1818,16 @@ void cvtYUVtoBGR(const uchar * src_data, size_t src_step, CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, YCrCb2RGB_f(dcn, blueIdx, isCbCr)); } -void cvtTwoPlaneYUVtoBGR(const uchar * src_data, size_t src_step, - uchar * dst_data, size_t dst_step, - int dst_width, int dst_height, - int dcn, bool swapBlue, int uIdx) -{ - CV_INSTRUMENT_REGION(); - - const uchar* uv = src_data + src_step * static_cast(dst_height); - cvtTwoPlaneYUVtoBGR(src_data, uv, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx); -} - typedef void (*cvt_2plane_yuv_ptr_t)(uchar * /* dst_data*/, size_t /* dst_step */, int /* dst_width */, int /* dst_height */, - size_t /* _stride */, const uchar* /* _y1 */, - const uchar* /* _uv */); + size_t /* _y1_step */, + const uchar* /* _uv */, + size_t /* _uv_step */); -void cvtTwoPlaneYUVtoBGR(const uchar * y_data, const uchar * uv_data, size_t src_step, +void cvtTwoPlaneYUVtoBGR(const uchar * y_data, size_t y_step, const uchar * uv_data, size_t uv_step, uchar * dst_data, size_t dst_step, int dst_width, int dst_height, int dcn, bool swapBlue, int uIdx) @@ -1859,7 +1850,7 @@ void cvtTwoPlaneYUVtoBGR(const uchar * y_data, const uchar * uv_data, size_t src default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); break; }; - cvtPtr(dst_data, dst_step, dst_width, dst_height, src_step, y_data, uv_data); + cvtPtr(dst_data, dst_step, dst_width, dst_height, y_data, y_step, uv_data, uv_step); } typedef void (*cvt_3plane_yuv_ptr_t)(uchar * /* dst_data */, diff --git a/modules/imgproc/src/hal_replacement.hpp b/modules/imgproc/src/hal_replacement.hpp index d6b1d78273..3368093c56 100644 --- a/modules/imgproc/src/hal_replacement.hpp +++ b/modules/imgproc/src/hal_replacement.hpp @@ -498,6 +498,39 @@ inline int hal_ni_cvtLabtoBGR(const uchar * src_data, size_t src_step, uchar * d */ inline int hal_ni_cvtTwoPlaneYUVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int dst_width, int dst_height, int dcn, bool swapBlue, int uIdx) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** + @brief Extended version of hal_cvtTwoPlaneYUVtoBGR. + @param y_data,y_step source image data and step (Y-plane) + @param uv_data,uv_step source image data and step (UV-plane) + @param dst_data,dst_step destination image data and step + @param dst_width,dst_height destination image size + @param dcn destination image channels (3 or 4) + @param swapBlue if set to true B and R destination channels will be swapped (write RGB) + @param uIdx U-channel index in the interleaved U/V plane (0 or 1) + Convert from YUV (YUV420sp (or NV12/NV21) - Y plane followed by interleaved U/V plane) to BGR, RGB, BGRA or RGBA. + Only for CV_8U. + */ +inline int hal_ni_cvtTwoPlaneYUVtoBGREx(const uchar * y_data, size_t y_step, const uchar * uv_data, size_t uv_step, + uchar * dst_data, size_t dst_step, int dst_width, int dst_height, + int dcn, bool swapBlue, int uIdx) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } + +/** + @brief hal_cvtBGRtoTwoPlaneYUV + @param src_data,src_step source image data and step + @param y_data,y_step destination image data and step (Y-plane) + @param uv_data,uv_step destination image data and step (UV-plane) + @param width,height image size + @param scn source image channels (3 or 4) + @param swapBlue if set to true B and R source channels will be swapped (treat as RGB) + @param uIdx U-channel plane index (0 or 1) + Convert from BGR, RGB, BGRA or RGBA to YUV (YUV420sp (or NV12/NV21) - Y plane followed by interleaved U/V plane). + Only for CV_8U. + */ +inline int hal_ni_cvtBGRtoTwoPlaneYUV(const uchar * src_data, size_t src_step, + uchar * y_data, size_t y_step, uchar * uv_data, size_t uv_step, + int width, int height, + int scn, bool swapBlue, int uIdx) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } + /** @brief hal_cvtThreePlaneYUVtoBGR @param src_data,src_step source image data and step @@ -576,6 +609,8 @@ inline int hal_ni_cvtMultipliedRGBAtoRGBA(const uchar * src_data, size_t src_ste #define cv_hal_cvtBGRtoLab hal_ni_cvtBGRtoLab #define cv_hal_cvtLabtoBGR hal_ni_cvtLabtoBGR #define cv_hal_cvtTwoPlaneYUVtoBGR hal_ni_cvtTwoPlaneYUVtoBGR +#define cv_hal_cvtTwoPlaneYUVtoBGREx hal_ni_cvtTwoPlaneYUVtoBGREx +#define cv_hal_cvtBGRtoTwoPlaneYUV hal_ni_cvtBGRtoTwoPlaneYUV #define cv_hal_cvtThreePlaneYUVtoBGR hal_ni_cvtThreePlaneYUVtoBGR #define cv_hal_cvtBGRtoThreePlaneYUV hal_ni_cvtBGRtoThreePlaneYUV #define cv_hal_cvtOnePlaneYUVtoBGR hal_ni_cvtOnePlaneYUVtoBGR diff --git a/modules/imgproc/test/test_color.cpp b/modules/imgproc/test/test_color.cpp index 450cf57923..204203d053 100644 --- a/modules/imgproc/test/test_color.cpp +++ b/modules/imgproc/test/test_color.cpp @@ -3072,20 +3072,34 @@ TEST(ImgProc_RGB2YUV, regression_13668) EXPECT_EQ(res, ref); } -TEST(ImgProc_cvtColorTwoPlane, missing_check_17036) // test can be removed if required feature is implemented +TEST(ImgProc_cvtColorTwoPlane, y_plane_padding_differs_from_uv_plane_padding_17036) { - std::vector y_data(700 * 480); - std::vector uv_data(640 * 240); + RNG &rng = theRNG(); - Mat y_plane_padding(480, 640, CV_8UC1, y_data.data(), 700); // with stride - Mat uv_plane(240, 320, CV_8UC2, uv_data.data()); + std::vector y_reference(640 * 480); + std::vector uv_reference(640 * 240); + std::vector y_padded(700 * 480); + std::vector uv_padded(700 * 240); - Mat result; + Mat y_reference_mat(480, 640, CV_8UC1, y_reference.data()); + Mat uv_reference_mat(240, 320, CV_8UC2, uv_reference.data()); + Mat y_padded_mat(480, 640, CV_8UC1, y_padded.data(), 700); + Mat uv_padded_mat(240, 320, CV_8UC2, uv_padded.data(), 700); - EXPECT_THROW( - cvtColorTwoPlane(y_plane_padding, uv_plane, result, COLOR_YUV2RGB_NV21); - , cv::Exception - ); + rng.fill(y_reference_mat, RNG::UNIFORM, 16, 235 + 1); + rng.fill(uv_reference_mat, RNG::UNIFORM, 16, 240 + 1); + + y_reference_mat.copyTo(y_padded_mat(Rect(0, 0, y_reference_mat.cols, y_reference_mat.rows))); + uv_reference_mat.copyTo(uv_padded_mat(Rect(0, 0, uv_reference_mat.cols, uv_reference_mat.rows))); + + Mat rgb_reference_mat, rgb_y_padded_mat, rgb_uv_padded_mat; + + cvtColorTwoPlane(y_reference_mat, uv_reference_mat, rgb_reference_mat, COLOR_YUV2RGB_NV21); + cvtColorTwoPlane(y_padded_mat, uv_reference_mat, rgb_y_padded_mat, COLOR_YUV2RGB_NV21); + cvtColorTwoPlane(y_reference_mat, uv_padded_mat, rgb_uv_padded_mat, COLOR_YUV2RGB_NV21); + + EXPECT_DOUBLE_EQ(cvtest::norm(rgb_reference_mat, rgb_y_padded_mat, NORM_INF), .0); + EXPECT_DOUBLE_EQ(cvtest::norm(rgb_reference_mat, rgb_uv_padded_mat, NORM_INF), .0); } From f9bd83c8547054357b0ecc55dadda787e5422dad Mon Sep 17 00:00:00 2001 From: Tsukasa Sugiura Date: Fri, 24 Sep 2021 15:35:42 +0900 Subject: [PATCH 10/12] fix cast in text detection sample --- samples/dnn/text_detection.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/dnn/text_detection.py b/samples/dnn/text_detection.py index 7014a80148..22c4f6b5ec 100644 --- a/samples/dnn/text_detection.py +++ b/samples/dnn/text_detection.py @@ -214,8 +214,8 @@ def main(): 0.5, (255, 0, 0)) for j in range(4): - p1 = (vertices[j][0], vertices[j][1]) - p2 = (vertices[(j + 1) % 4][0], vertices[(j + 1) % 4][1]) + p1 = (int(vertices[j][0]), int(vertices[j][1])) + p2 = (int(vertices[(j + 1) % 4][0]), int(vertices[(j + 1) % 4][1])) cv.line(frame, p1, p2, (0, 255, 0), 1) # Put efficiency information From 91ff45fbde176ff749a7d4c1b265c73370e2ea7e Mon Sep 17 00:00:00 2001 From: easonycwang Date: Fri, 24 Sep 2021 17:44:58 +0800 Subject: [PATCH 11/12] Tile: This submission is used to improve the performance of the inpaint algorithm for 3 channels images(RGB or BGR). Reason: The original algorithm implementation did not consider the cache hits. The loop of channels is outside the core loop, so the perfmance is not very good. Moving the channel loop inside the core loop can significantly improve cache hits, thereby improving performance. Performance: 360P, about >= 30% improvement iphone8P: 5.52ms -> 3.75ms iphone6s: 14.04ms -> 9.15ms --- modules/photo/src/inpaint.cpp | 90 ++++++++++++++++++++--------------- 1 file changed, 51 insertions(+), 39 deletions(-) diff --git a/modules/photo/src/inpaint.cpp b/modules/photo/src/inpaint.cpp index 14c178e248..74e006588b 100644 --- a/modules/photo/src/inpaint.cpp +++ b/modules/photo/src/inpaint.cpp @@ -308,50 +308,58 @@ icvTeleaInpaintFMM(const CvMat *f, CvMat *t, CvMat *out, int range, CvPriorityQu FastMarching_solve(i+1,j,i,j+1,f,t)); CV_MAT_ELEM(*t,float,i,j) = dist; + cv::Point2f gradT[3]; for (color=0; color<=2; color++) { - cv::Point2f gradI,gradT,r; - float Ia=0,Jx=0,Jy=0,s=1.0e-20f,w,dst,lev,dir,sat; - if (CV_MAT_ELEM(*f,uchar,i,j+1)!=INSIDE) { if (CV_MAT_ELEM(*f,uchar,i,j-1)!=INSIDE) { - gradT.x=(float)((CV_MAT_ELEM(*t,float,i,j+1)-CV_MAT_ELEM(*t,float,i,j-1)))*0.5f; + gradT[color].x=(float)((CV_MAT_ELEM(*t,float,i,j+1)-CV_MAT_ELEM(*t,float,i,j-1)))*0.5f; } else { - gradT.x=(float)((CV_MAT_ELEM(*t,float,i,j+1)-CV_MAT_ELEM(*t,float,i,j))); + gradT[color].x=(float)((CV_MAT_ELEM(*t,float,i,j+1)-CV_MAT_ELEM(*t,float,i,j))); } } else { if (CV_MAT_ELEM(*f,uchar,i,j-1)!=INSIDE) { - gradT.x=(float)((CV_MAT_ELEM(*t,float,i,j)-CV_MAT_ELEM(*t,float,i,j-1))); + gradT[color].x=(float)((CV_MAT_ELEM(*t,float,i,j)-CV_MAT_ELEM(*t,float,i,j-1))); } else { - gradT.x=0; + gradT[color].x=0; } } if (CV_MAT_ELEM(*f,uchar,i+1,j)!=INSIDE) { if (CV_MAT_ELEM(*f,uchar,i-1,j)!=INSIDE) { - gradT.y=(float)((CV_MAT_ELEM(*t,float,i+1,j)-CV_MAT_ELEM(*t,float,i-1,j)))*0.5f; + gradT[color].y=(float)((CV_MAT_ELEM(*t,float,i+1,j)-CV_MAT_ELEM(*t,float,i-1,j)))*0.5f; } else { - gradT.y=(float)((CV_MAT_ELEM(*t,float,i+1,j)-CV_MAT_ELEM(*t,float,i,j))); + gradT[color].y=(float)((CV_MAT_ELEM(*t,float,i+1,j)-CV_MAT_ELEM(*t,float,i,j))); } } else { if (CV_MAT_ELEM(*f,uchar,i-1,j)!=INSIDE) { - gradT.y=(float)((CV_MAT_ELEM(*t,float,i,j)-CV_MAT_ELEM(*t,float,i-1,j))); + gradT[color].y=(float)((CV_MAT_ELEM(*t,float,i,j)-CV_MAT_ELEM(*t,float,i-1,j))); } else { - gradT.y=0; + gradT[color].y=0; } } - for (k=i-range; k<=i+range; k++) { - int km=k-1+(k==1),kp=k-1-(k==t->rows-2); - for (l=j-range; l<=j+range; l++) { - int lm=l-1+(l==1),lp=l-1-(l==t->cols-2); - if (k>0&&l>0&&krows-1&&lcols-1) { - if ((CV_MAT_ELEM(*f,uchar,k,l)!=INSIDE)&& - ((l-j)*(l-j)+(k-i)*(k-i)<=range*range)) { + } + + cv::Point2f gradI,r; + float Jx[3] = {0,0,0}; + float Jy[3] = {0,0,0}; + float Ia[3] = {0,0,0}; + float s[3] = {1.0e-20f,1.0e-20f,1.0e-20f}; + float w,dst,lev,dir,sat; + + for (k=i-range; k<=i+range; k++) { + int km=k-1+(k==1),kp=k-1-(k==t->rows-2); + for (l=j-range; l<=j+range; l++) { + int lm=l-1+(l==1),lp=l-1-(l==t->cols-2); + if (k>0&&l>0&&krows-1&&lcols-1) { + if ((CV_MAT_ELEM(*f,uchar,k,l)!=INSIDE)&& + ((l-j)*(l-j)+(k-i)*(k-i)<=range*range)) { + for (color=0; color<=2; color++) { r.y = (float)(i-k); r.x = (float)(j-l); dst = (float)(1./(VectorLength(r)*sqrt((double)VectorLength(r)))); lev = (float)(1./(1+fabs(CV_MAT_ELEM(*t,float,k,l)-CV_MAT_ELEM(*t,float,i,j)))); - dir=VectorScalMult(r,gradT); + dir=VectorScalMult(r,gradT[color]); if (fabs(dir)<=0.01) dir=0.000001f; w = (float)fabs(dst*lev*dir); @@ -381,18 +389,18 @@ icvTeleaInpaintFMM(const CvMat *f, CvMat *t, CvMat *out, int range, CvPriorityQu gradI.y=0; } } - Ia += (float)w * (float)(CV_MAT_3COLOR_ELEM(*out,uchar,km,lm,color)); - Jx -= (float)w * (float)(gradI.x*r.x); - Jy -= (float)w * (float)(gradI.y*r.y); - s += w; + Ia[color] += (float)w * (float)(CV_MAT_3COLOR_ELEM(*out,uchar,km,lm,color)); + Jx[color] -= (float)w * (float)(gradI.x*r.x); + Jy[color] -= (float)w * (float)(gradI.y*r.y); + s[color] += w; } } } } - sat = (float)((Ia/s+(Jx+Jy)/(sqrt(Jx*Jx+Jy*Jy)+1.0e-20f)+0.5f)); - { + } + for (color=0; color<=2; color++) { + sat = (float)((Ia[color]/s[color]+(Jx[color]+Jy[color])/(sqrt(Jx[color]*Jx[color]+Jy[color]*Jy[color])+1.0e-20f)+0.5f)); CV_MAT_3COLOR_ELEM(*out,uchar,i-1,j-1,color) = cv::saturate_cast(sat); - } } CV_MAT_ELEM(*f,uchar,i,j) = BAND; @@ -540,17 +548,19 @@ icvNSInpaintFMM(const CvMat *f, CvMat *t, CvMat *out, int range, CvPriorityQueue FastMarching_solve(i+1,j,i,j+1,f,t)); CV_MAT_ELEM(*t,float,i,j) = dist; - for (color=0; color<=2; color++) { - cv::Point2f gradI,r; - float Ia=0,s=1.0e-20f,w,dst,dir; + cv::Point2f gradI,r; + float Ia[3]={0,0,0}; + float s[3]={1.0e-20f,1.0e-20f,1.0e-20f}; + float w,dst,dir; - for (k=i-range; k<=i+range; k++) { - int km=k-1+(k==1),kp=k-1-(k==f->rows-2); - for (l=j-range; l<=j+range; l++) { - int lm=l-1+(l==1),lp=l-1-(l==f->cols-2); - if (k>0&&l>0&&krows-1&&lcols-1) { - if ((CV_MAT_ELEM(*f,uchar,k,l)!=INSIDE)&& - ((l-j)*(l-j)+(k-i)*(k-i)<=range*range)) { + for (k=i-range; k<=i+range; k++) { + int km=k-1+(k==1),kp=k-1-(k==f->rows-2); + for (l=j-range; l<=j+range; l++) { + int lm=l-1+(l==1),lp=l-1-(l==f->cols-2); + if (k>0&&l>0&&krows-1&&lcols-1) { + if ((CV_MAT_ELEM(*f,uchar,k,l)!=INSIDE)&& + ((l-j)*(l-j)+(k-i)*(k-i)<=range*range)) { + for (color=0; color<=2; color++) { r.y=(float)(k-i); r.x=(float)(l-j); @@ -594,13 +604,15 @@ icvNSInpaintFMM(const CvMat *f, CvMat *t, CvMat *out, int range, CvPriorityQueue dir = (float)fabs(VectorScalMult(r,gradI)/sqrt(VectorLength(r)*VectorLength(gradI))); } w = dst*dir; - Ia += (float)w * (float)(CV_MAT_3COLOR_ELEM(*out,uchar,km,lm,color)); - s += w; + Ia[color] += (float)w * (float)(CV_MAT_3COLOR_ELEM(*out,uchar,km,lm,color)); + s[color] += w; } } } } - CV_MAT_3COLOR_ELEM(*out,uchar,i-1,j-1,color) = cv::saturate_cast((double)Ia/s); + } + for (color=0; color<=2; color++) { + CV_MAT_3COLOR_ELEM(*out,uchar,i-1,j-1,color) = cv::saturate_cast((double)Ia[color]/s[color]); } CV_MAT_ELEM(*f,uchar,i,j) = BAND; From 236c64a17d4c9e1eac3fb25044bdfeceba1d7463 Mon Sep 17 00:00:00 2001 From: Nicholas Ho <88894303+Nicholas-Ho-arm@users.noreply.github.com> Date: Sat, 25 Sep 2021 18:43:33 +0100 Subject: [PATCH 12/12] Merge pull request #20712 from Nicholas-Ho-arm:3.4_RowVec_8u32f * Add RowVec_8u32f * Fix build errors in Linux x64 Debug and armeabi-v7a * Reformat code to make it more clean and conventional * Optimise with vx_load_expand_q() --- modules/imgproc/src/filter.simd.hpp | 47 ++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/modules/imgproc/src/filter.simd.hpp b/modules/imgproc/src/filter.simd.hpp index bed6f83071..94dbce060c 100644 --- a/modules/imgproc/src/filter.simd.hpp +++ b/modules/imgproc/src/filter.simd.hpp @@ -465,6 +465,49 @@ struct RowVec_8u32s bool smallValues; }; +struct RowVec_8u32f +{ + RowVec_8u32f() {} + RowVec_8u32f( const Mat& _kernel ) : kernel(_kernel) {} + + int operator()(const uchar* _src, uchar* _dst, int width, int cn) const + { + CV_INSTRUMENT_REGION(); + + int i = 0, k, _ksize = kernel.rows + kernel.cols - 1; + float* dst = (float*)_dst; + const float* _kx = kernel.ptr(); + width *= cn; + for( ; i <= width - v_uint8::nlanes; i += v_uint8::nlanes ) + { + v_float32 s0 = vx_setzero_f32(); + v_float32 s1 = vx_setzero_f32(); + v_float32 s2 = vx_setzero_f32(); + v_float32 s3 = vx_setzero_f32(); + k = 0; + for( ; k < _ksize ; k++ ) + { + v_float32 f = vx_setall_f32(_kx[k]); + const uchar* src = (const uchar*)_src + i + k * cn; + v_float32 vs_ll = v_cvt_f32(v_reinterpret_as_s32(vx_load_expand_q(src))); + v_float32 vs_lh = v_cvt_f32(v_reinterpret_as_s32(vx_load_expand_q(src + v_float32::nlanes))); + v_float32 vs_hl = v_cvt_f32(v_reinterpret_as_s32(vx_load_expand_q(src + 2*v_float32::nlanes))); + v_float32 vs_hh = v_cvt_f32(v_reinterpret_as_s32(vx_load_expand_q(src + 3*v_float32::nlanes))); + s0 = v_muladd(vs_ll, f, s0); + s1 = v_muladd(vs_lh, f, s1); + s2 = v_muladd(vs_hl, f, s2); + s3 = v_muladd(vs_hh, f, s3); + } + v_store(dst + i, s0); + v_store(dst + i + v_float32::nlanes, s1); + v_store(dst + i + 2*v_float32::nlanes, s2); + v_store(dst + i + 3*v_float32::nlanes, s3); + } + return i; + } + + Mat kernel; +}; struct SymmRowSmallVec_8u32s { @@ -2292,6 +2335,7 @@ struct FilterVec_32f #else typedef RowNoVec RowVec_8u32s; +typedef RowNoVec RowVec_8u32f; typedef RowNoVec RowVec_16s32f; typedef RowNoVec RowVec_32f; typedef SymmRowSmallNoVec SymmRowSmallVec_8u32s; @@ -2899,7 +2943,8 @@ Ptr getLinearRowFilter( return makePtr > (kernel, anchor, RowVec_8u32s(kernel)); if( sdepth == CV_8U && ddepth == CV_32F ) - return makePtr >(kernel, anchor); + return makePtr > + (kernel, anchor, RowVec_8u32f(kernel)); if( sdepth == CV_8U && ddepth == CV_64F ) return makePtr >(kernel, anchor); if( sdepth == CV_16U && ddepth == CV_32F )