Merge pull request #26348 from fengyuentau:imgproc/remap_opt

imgproc: add new remap kernels that align with the new warpAffine and warpPerspective kernels #26348

## Performance

M2:

```
Geometric mean (ms)

                                      Name of Test                                        base  patch   patch   
                                                                                                          vs    
                                                                                                         base   
                                                                                                      (x-factor)
WarpAffine::TestWarpAffine::(640x480, INTER_LINEAR, BORDER_CONSTANT, 8UC4)                0.213 0.185    1.15   
WarpAffine::TestWarpAffine::(640x480, INTER_LINEAR, BORDER_REPLICATE, 8UC4)               0.213 0.187    1.14   
WarpAffine::TestWarpAffine::(1280x720, INTER_LINEAR, BORDER_CONSTANT, 8UC4)               0.417 0.355    1.18   
WarpAffine::TestWarpAffine::(1280x720, INTER_LINEAR, BORDER_REPLICATE, 8UC4)              0.973 0.908    1.07   
WarpAffine::TestWarpAffine::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 8UC4)              0.563 0.507    1.11   
WarpAffine::TestWarpAffine::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 8UC4)             3.208 3.165    1.01   
WarpPerspective::TestWarpPerspective::(640x480, INTER_LINEAR, BORDER_CONSTANT, 8UC4)      0.244 0.195    1.26   
WarpPerspective::TestWarpPerspective::(640x480, INTER_LINEAR, BORDER_REPLICATE, 8UC4)     0.270 0.245    1.10   
WarpPerspective::TestWarpPerspective::(1280x720, INTER_LINEAR, BORDER_CONSTANT, 8UC4)     0.361 0.328    1.10   
WarpPerspective::TestWarpPerspective::(1280x720, INTER_LINEAR, BORDER_REPLICATE, 8UC4)    1.365 1.273    1.07   
WarpPerspective::TestWarpPerspective::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 8UC4)    0.532 0.508    1.05   
WarpPerspective::TestWarpPerspective::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 8UC4)   3.651 3.545    1.03   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 8UC1)                     0.272 0.097    2.80   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 16UC1)                    0.304 0.148    2.06   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 32FC1)                    0.271 0.125    2.16   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 8UC3)                     0.406 0.178    2.28   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 16UC3)                    0.476 0.275    1.73   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 32FC3)                    0.354 0.256    1.38   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 8UC4)                     0.382 0.168    2.28   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 16UC4)                    0.555 0.338    1.64   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 32FC4)                    0.385 0.307    1.25   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 8UC1)                    0.271 0.099    2.75   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 16UC1)                   0.301 0.145    2.07   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 32FC1)                   0.270 0.120    2.24   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 8UC3)                    0.408 0.180    2.27   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 16UC3)                   0.474 0.277    1.71   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 32FC3)                   0.352 0.261    1.35   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 8UC4)                    0.382 0.166    2.29   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 16UC4)                   0.552 0.339    1.63   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 32FC4)                   0.380 0.308    1.24   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 8UC1)                   1.013 0.474    2.14   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 16UC1)                  1.155 0.705    1.64   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 32FC1)                  1.200 0.674    1.78   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 8UC3)                   1.614 0.986    1.64   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 16UC3)                  2.042 1.605    1.27   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 32FC3)                  2.275 1.647    1.38   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 8UC4)                   1.558 0.847    1.84   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 16UC4)                  2.394 2.036    1.18   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 32FC4)                  2.693 2.112    1.27   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 8UC1)                  0.999 0.463    2.16   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 16UC1)                 1.194 0.699    1.71   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 32FC1)                 1.211 0.677    1.79   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 8UC3)                  1.619 1.045    1.55   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 16UC3)                 2.039 1.604    1.27   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 32FC3)                 2.257 1.657    1.36   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 8UC4)                  1.578 0.845    1.87   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 16UC4)                 2.405 2.032    1.18   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 32FC4)                 2.669 2.107    1.27   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 8UC1)                     0.277 0.104    2.66   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 16UC1)                    0.310 0.149    2.08   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 32FC1)                    0.275 0.122    2.26   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 8UC3)                     0.412 0.177    2.33   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 16UC3)                    0.479 0.277    1.73   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 32FC3)                    0.360 0.253    1.43   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 8UC4)                     0.388 0.173    2.24   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 16UC4)                    0.575 0.337    1.71   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 32FC4)                    0.387 0.307    1.26   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 8UC1)                    0.274 0.100    2.73   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 16UC1)                   0.312 0.144    2.16   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 32FC1)                   0.278 0.128    2.18   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 8UC3)                    0.407 0.178    2.29   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 16UC3)                   0.483 0.275    1.75   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 32FC3)                   0.358 0.250    1.43   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 8UC4)                    0.389 0.168    2.31   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 16UC4)                   0.563 0.338    1.66   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 32FC4)                   0.390 0.312    1.25   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 8UC1)                   1.024 0.483    2.12   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 16UC1)                  1.224 0.770    1.59   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 32FC1)                  1.185 0.674    1.76   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 8UC3)                   1.633 0.922    1.77   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 16UC3)                  2.042 1.607    1.27   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 32FC3)                  2.244 1.647    1.36   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 8UC4)                   1.592 0.872    1.83   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 16UC4)                  2.473 2.014    1.23   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 32FC4)                  2.604 2.127    1.22   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 8UC1)                  1.020 0.490    2.08   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 16UC1)                 1.193 0.733    1.63   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 32FC1)                 1.203 0.694    1.73   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 8UC3)                  1.642 0.923    1.78   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 16UC3)                 2.055 1.619    1.27   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 32FC3)                 2.210 1.658    1.33   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 8UC4)                  1.642 0.883    1.86   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 16UC4)                 2.463 2.077    1.19   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 32FC4)                 2.610 2.152    1.21   
```


Intel i7-12700K:

```
Geometric mean (ms)

                                      Name of Test                                        base  patch   patch   
                                                                                                          vs    
                                                                                                         base   
                                                                                                      (x-factor)
WarpAffine::TestWarpAffine::(640x480, INTER_LINEAR, BORDER_CONSTANT, 8UC4)                0.146 0.055    2.66   
WarpAffine::TestWarpAffine::(640x480, INTER_LINEAR, BORDER_REPLICATE, 8UC4)               0.146 0.055    2.65   
WarpAffine::TestWarpAffine::(1280x720, INTER_LINEAR, BORDER_CONSTANT, 8UC4)               0.301 0.138    2.18   
WarpAffine::TestWarpAffine::(1280x720, INTER_LINEAR, BORDER_REPLICATE, 8UC4)              0.490 0.329    1.49   
WarpAffine::TestWarpAffine::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 8UC4)              0.390 0.194    2.01   
WarpAffine::TestWarpAffine::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 8UC4)             1.286 1.190    1.08   
WarpPerspective::TestWarpPerspective::(640x480, INTER_LINEAR, BORDER_CONSTANT, 8UC4)      0.140 0.058    2.40   
WarpPerspective::TestWarpPerspective::(640x480, INTER_LINEAR, BORDER_REPLICATE, 8UC4)     0.157 0.078    2.02   
WarpPerspective::TestWarpPerspective::(1280x720, INTER_LINEAR, BORDER_CONSTANT, 8UC4)     0.234 0.117    2.01   
WarpPerspective::TestWarpPerspective::(1280x720, INTER_LINEAR, BORDER_REPLICATE, 8UC4)    0.550 0.472    1.16   
WarpPerspective::TestWarpPerspective::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 8UC4)    0.334 0.199    1.68   
WarpPerspective::TestWarpPerspective::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 8UC4)   1.361 1.347    1.01   

map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 8UC1)                     0.146 0.046    3.18   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 16UC1)                    0.174 0.045    3.88   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 32FC1)                    0.150 0.036    4.21   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 8UC3)                     0.195 0.120    1.63   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 16UC3)                    0.365 0.111    3.29   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 32FC3)                    0.217 0.106    2.05   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 8UC4)                     0.177 0.054    3.30   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 16UC4)                    0.451 0.143    3.15   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 32FC4)                    0.276 0.139    1.98   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 8UC1)                    0.142 0.046    3.06   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 16UC1)                   0.182 0.045    4.00   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 32FC1)                   0.154 0.036    4.31   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 8UC3)                    0.196 0.120    1.63   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 16UC3)                   0.364 0.111    3.29   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 32FC3)                   0.221 0.107    2.07   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 8UC4)                    0.177 0.054    3.31   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 16UC4)                   0.488 0.143    3.42   
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 32FC4)                   0.280 0.139    2.01   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 8UC1)                   0.480 0.290    1.66   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 16UC1)                  0.698 0.288    2.43   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 32FC1)                  0.613 0.322    1.90   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 8UC3)                   0.665 0.808    0.82   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 16UC3)                  1.522 0.942    1.62   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 32FC3)                  2.504 2.204    1.14   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 8UC4)                   0.619 0.376    1.64   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 16UC4)                  2.018 1.397    1.44   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 32FC4)                  3.582 3.157    1.13   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 8UC1)                  0.481 0.293    1.64   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 16UC1)                 0.698 0.288    2.42   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 32FC1)                 0.606 0.321    1.88   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 8UC3)                  0.669 0.806    0.83   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 16UC3)                 1.514 0.935    1.62   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 32FC3)                 2.472 2.203    1.12   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 8UC4)                  0.618 0.378    1.63   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 16UC4)                 1.998 1.404    1.42   
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 32FC4)                 3.583 3.160    1.13   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 8UC1)                     0.153 0.050    3.08   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 16UC1)                    0.189 0.048    3.90   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 32FC1)                    0.162 0.041    3.91   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 8UC3)                     0.211 0.124    1.70   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 16UC3)                    0.384 0.113    3.39   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 32FC3)                    0.221 0.107    2.07   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 8UC4)                     0.186 0.059    3.17   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 16UC4)                    0.465 0.147    3.16   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 32FC4)                    0.312 0.140    2.22   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 8UC1)                    0.148 0.052    2.88   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 16UC1)                   0.189 0.049    3.82   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 32FC1)                   0.167 0.041    4.06   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 8UC3)                    0.202 0.124    1.63   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 16UC3)                   0.383 0.113    3.39   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 32FC3)                   0.228 0.106    2.14   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 8UC4)                    0.188 0.058    3.26   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 16UC4)                   0.467 0.147    3.17   
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 32FC4)                   0.286 0.140    2.05   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 8UC1)                   0.519 0.311    1.67   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 16UC1)                  0.743 0.307    2.42   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 32FC1)                  0.646 0.329    1.96   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 8UC3)                   0.714 0.826    0.86   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 16UC3)                  1.567 0.939    1.67   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 32FC3)                  2.501 2.183    1.15   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 8UC4)                   0.670 0.389    1.72   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 16UC4)                  2.060 1.384    1.49   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 32FC4)                  3.556 3.151    1.13   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 8UC1)                  0.517 0.312    1.66   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 16UC1)                 0.745 0.306    2.44   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 32FC1)                 0.651 0.332    1.96   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 8UC3)                  0.731 0.831    0.88   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 16UC3)                 1.574 0.934    1.68   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 32FC3)                 2.442 2.181    1.12   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 8UC4)                  0.666 0.390    1.71   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 16UC4)                 2.045 1.391    1.47   
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 32FC4)                 3.557 3.154    1.13   
```

A311D:

```
Geometric mean (ms)

                                      Name of Test                                         base  patch    patch
                                                                                                            vs
                                                                                                           base
                                                                                                        (x-factor)
WarpAffine::TestWarpAffine::(640x480, INTER_LINEAR, BORDER_CONSTANT, 8UC4)                1.335  0.936     1.43
WarpAffine::TestWarpAffine::(640x480, INTER_LINEAR, BORDER_REPLICATE, 8UC4)               1.331  0.940     1.42
WarpAffine::TestWarpAffine::(1280x720, INTER_LINEAR, BORDER_CONSTANT, 8UC4)               2.950  2.199     1.34
WarpAffine::TestWarpAffine::(1280x720, INTER_LINEAR, BORDER_REPLICATE, 8UC4)              6.011  5.177     1.16
WarpAffine::TestWarpAffine::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 8UC4)              4.415  3.533     1.25
WarpAffine::TestWarpAffine::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 8UC4)             26.619 17.665    1.51
WarpPerspective::TestWarpPerspective::(640x480, INTER_LINEAR, BORDER_CONSTANT, 8UC4)      1.465  1.119     1.31
WarpPerspective::TestWarpPerspective::(640x480, INTER_LINEAR, BORDER_REPLICATE, 8UC4)     1.776  1.416     1.25
WarpPerspective::TestWarpPerspective::(1280x720, INTER_LINEAR, BORDER_CONSTANT, 8UC4)     4.106  2.307     1.78
WarpPerspective::TestWarpPerspective::(1280x720, INTER_LINEAR, BORDER_REPLICATE, 8UC4)    12.015 7.427     1.62
WarpPerspective::TestWarpPerspective::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 8UC4)    7.196  4.044     1.78
WarpPerspective::TestWarpPerspective::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 8UC4)   32.182 29.642    1.09

map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 8UC1)                     2.358  0.751     3.14
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 16UC1)                    3.342  0.847     3.94
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 32FC1)                    2.863  0.941     3.04
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 8UC3)                     4.062  1.474     2.75
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 16UC3)                    4.937  1.681     2.94
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 32FC3)                    3.796  2.152     1.76
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 8UC4)                     3.838  1.341     2.86
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 16UC4)                    5.682  2.288     2.48
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 32FC4)                    3.943  3.154     1.25
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 8UC1)                    2.346  0.754     3.11
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 16UC1)                   3.370  0.849     3.97
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 32FC1)                   2.841  0.934     3.04
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 8UC3)                    4.244  1.466     2.90
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 16UC3)                   4.882  1.680     2.91
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 32FC3)                   3.672  2.163     1.70
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 8UC4)                    3.822  1.349     2.83
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 16UC4)                   5.614  2.291     2.45
map1_32fc1::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 32FC4)                   3.987  3.174     1.26
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 8UC1)                   10.358 4.713     2.20
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 16UC1)                  14.165 4.903     2.89
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 32FC1)                  11.751 5.648     2.08
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 8UC3)                   13.912 6.793     2.05
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 16UC3)                  22.706 8.440     2.69
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 32FC3)                  16.738 13.517    1.24
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 8UC4)                   18.715 9.065     2.06
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 16UC4)                  28.190 15.483    1.82
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 32FC4)                  17.441 20.976    0.83
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 8UC1)                  10.506 4.770     2.20
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 16UC1)                 14.298 4.952     2.89
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 32FC1)                 11.534 5.669     2.03
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 8UC3)                  19.890 9.588     2.07
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 16UC3)                 23.599 11.543    2.04
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 32FC3)                 16.827 14.255    1.18
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 8UC4)                  18.878 9.185     2.06
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 16UC4)                 28.377 15.766    1.80
map1_32fc1::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 32FC4)                 17.337 21.134    0.82
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 8UC1)                     2.170  0.763     2.84
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 16UC1)                    3.035  0.959     3.17
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 32FC1)                    2.759  0.937     2.94
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 8UC3)                     4.074  1.484     2.74
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 16UC3)                    4.757  1.689     2.82
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 32FC3)                    3.766  2.165     1.74
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 8UC4)                     3.730  1.353     2.76
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 16UC4)                    5.623  2.301     2.44
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_CONSTANT, 32FC4)                    3.935  3.115     1.26
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 8UC1)                    2.236  0.761     2.94
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 16UC1)                   3.010  0.946     3.18
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 32FC1)                   2.750  0.933     2.95
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 8UC3)                    4.045  1.484     2.73
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 16UC3)                   4.785  1.694     2.83
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 32FC3)                   3.642  2.146     1.70
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 8UC4)                    3.710  1.357     2.73
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 16UC4)                   5.594  2.310     2.42
map1_32fc2::TestRemap::(640x480, INTER_LINEAR, BORDER_REPLICATE, 32FC4)                   3.845  3.120     1.23
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 8UC1)                   10.092 4.846     2.08
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 16UC1)                  14.501 5.724     2.53
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 32FC1)                  11.698 5.709     2.05
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 8UC3)                   19.480 9.290     2.10
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 16UC3)                  23.830 11.636    2.05
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 32FC3)                  16.725 13.922    1.20
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 8UC4)                   18.756 8.839     2.12
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 16UC4)                  29.698 15.668    1.90
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_CONSTANT, 32FC4)                  17.641 20.145    0.88
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 8UC1)                  10.128 4.883     2.07
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 16UC1)                 14.438 5.685     2.54
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 32FC1)                 11.440 5.674     2.02
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 8UC3)                  19.681 10.117    1.95
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 16UC3)                 23.757 11.623    2.04
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 32FC3)                 16.891 13.690    1.23
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 8UC4)                  18.887 8.756     2.16
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 16UC4)                 29.654 15.890    1.87
map1_32fc2::TestRemap::(1920x1080, INTER_LINEAR, BORDER_REPLICATE, 32FC4)                 17.412 20.535    0.85
```

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
Yuantao Feng 2024-11-12 02:44:01 +08:00 committed by GitHub
parent a4ab68f9f4
commit c445a000c9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 2327 additions and 596 deletions

View File

@ -1626,6 +1626,24 @@ inline v_int32 v_load_expand_q(const schar* ptr)
return __riscv_vwcvt_x(__riscv_vwcvt_x(__riscv_vle8_v_i8mf2(ptr, VTraits<v_int32>::vlanes()), VTraits<v_int32>::vlanes()), VTraits<v_int32>::vlanes());
}
template <int N = VTraits<v_uint32>::max_nlanes>
inline v_uint32 v_load_expand_q(const uchar* ptr, int n = N)
{
uchar buf[VTraits<v_uint8>::max_nlanes];
v_store(buf, v_setzero_u8());
for (int i = 0; i < n; i++) {
buf[i] = ptr[i];
}
return v_load_expand_q(buf);
}
template <> inline v_uint32 v_load_expand_q<4>(const uchar* ptr, int n)
{
uchar buf[VTraits<v_uint8>::max_nlanes];
v_store(buf, v_setzero_u8());
buf[0] = ptr[0]; buf[1] = ptr[1]; buf[2] = ptr[2]; buf[3] = ptr[3];
return v_load_expand_q(buf);
}
#define OPENCV_HAL_IMPL_RVV_PACK(_Tpvec, _Tp, _wTpvec, hwidth, hsuffix, suffix, rshr, shr) \
inline _Tpvec v_pack(const _wTpvec& a, const _wTpvec& b) \
{ \
@ -1696,6 +1714,23 @@ void v_rshr_pack_u_store(_Tp* ptr, const _wTpvec& a, int n = N) \
OPENCV_HAL_IMPL_RVV_PACK_U(v_uint8, uchar, v_int16, short, 8, 16, u8, i16, __riscv_vreinterpret_v_i16m4_u16m4, VTraits<v_int16>::vlanes(), VTraits<v_uint8>::vlanes())
OPENCV_HAL_IMPL_RVV_PACK_U(v_uint16, ushort, v_int32, int, 16, 32, u16, i32, __riscv_vreinterpret_v_i32m4_u32m4, VTraits<v_int32>::vlanes(), VTraits<v_uint16>::vlanes())
template <int N = VTraits<v_int16>::max_nlanes>
inline void v_pack_u_store(uchar* ptr, const v_int16& a, int n = N)
{
uchar buf[VTraits<v_uint8>::max_nlanes];
v_pack_u_store(buf, a);
for (int i = 0; i < n; i++) {
ptr[i] = buf[i];
}
}
template <> inline void v_pack_u_store<8>(uchar* ptr, const v_int16& a, int n)
{
uchar buf[VTraits<v_uint8>::max_nlanes];
v_pack_u_store(buf, a);
ptr[0] = buf[0]; ptr[1] = buf[1]; ptr[2] = buf[2]; ptr[3] = buf[3];
ptr[4] = buf[4]; ptr[5] = buf[5]; ptr[6] = buf[6]; ptr[7] = buf[7];
}
/* void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1)
a0 = {A1 A2 A3 A4}

View File

@ -2474,7 +2474,8 @@ flag #WARP_INVERSE_MAP that means that M is the inverse transformation (
borderMode=#BORDER_TRANSPARENT, it means that the pixels in the destination image corresponding to
the "outliers" in the source image are not modified by the function.
@param borderValue value used in case of a constant border; by default, it is 0.
@param hint Implementation modfication flags. See #AlgorithmHint
@param hint Implementation modfication flags. Set #ALGO_HINT_APPROX to use FP16 precision (if available)
for linear calculation for faster speed. See #AlgorithmHint.
@sa warpPerspective, resize, remap, getRectSubPix, transform
*/
@ -2508,7 +2509,8 @@ optional flag #WARP_INVERSE_MAP, that sets M as the inverse transformation (
\f$\texttt{dst}\rightarrow\texttt{src}\f$ ).
@param borderMode pixel extrapolation method (#BORDER_CONSTANT or #BORDER_REPLICATE).
@param borderValue value used in case of a constant border; by default, it equals 0.
@param hint Implementation modfication flags. See #AlgorithmHint
@param hint Implementation modfication flags. Set #ALGO_HINT_APPROX to use FP16 precision (if available)
for linear calculation for faster speed. See #AlgorithmHint.
@sa warpAffine, resize, remap, getRectSubPix, perspectiveTransform
*/
@ -2554,13 +2556,16 @@ The extra flag WARP_RELATIVE_MAP can be ORed to the interpolation method
borderMode=#BORDER_TRANSPARENT, it means that the pixels in the destination image that
corresponds to the "outliers" in the source image are not modified by the function.
@param borderValue Value used in case of a constant border. By default, it is 0.
@param hint Implementation modfication flags. Set #ALGO_HINT_APPROX to use FP16 precision (if available)
for linear calculation for faster speed. See #AlgorithmHint.
@note
Due to current implementation limitations the size of an input and output images should be less than 32767x32767.
*/
CV_EXPORTS_W void remap( InputArray src, OutputArray dst,
InputArray map1, InputArray map2,
int interpolation, int borderMode = BORDER_CONSTANT,
const Scalar& borderValue = Scalar());
const Scalar& borderValue = Scalar(),
AlgorithmHint hint = cv::ALGO_HINT_DEFAULT);
/** @brief Converts image transformation maps from one representation to another.

View File

@ -5,19 +5,16 @@
namespace opencv_test {
enum{HALF_SIZE=0, UPSIDE_DOWN, REFLECTION_X, REFLECTION_BOTH};
CV_ENUM(BorderMode, BORDER_CONSTANT, BORDER_REPLICATE)
CV_ENUM(InterType, INTER_NEAREST, INTER_LINEAR)
CV_ENUM(InterTypeExtended, INTER_NEAREST, INTER_LINEAR, WARP_RELATIVE_MAP)
CV_ENUM(RemapMode, HALF_SIZE, UPSIDE_DOWN, REFLECTION_X, REFLECTION_BOTH)
typedef TestBaseWithParam< tuple<Size, InterType, BorderMode, MatType> > TestWarpAffine;
typedef TestBaseWithParam< tuple<Size, InterType, BorderMode, MatType> > TestWarpPerspective;
typedef TestBaseWithParam< tuple<Size, InterType, BorderMode, MatType> > TestWarpPerspectiveNear_t;
typedef TestBaseWithParam< tuple<MatType, Size, InterTypeExtended, BorderMode, RemapMode> > TestRemap;
typedef TestBaseWithParam< tuple<Size, InterTypeExtended, BorderMode, MatType> > TestRemap;
void update_map(const Mat& src, Mat& map_x, Mat& map_y, const int remapMode, bool relative = false );
void update_map(const Mat& src, Mat& map_x, Mat& map_y, bool relative = false );
PERF_TEST_P( TestWarpAffine, WarpAffine,
Combine(
@ -156,21 +153,19 @@ PERF_TEST_P( TestWarpPerspectiveNear_t, WarpPerspectiveNear,
SANITY_CHECK(dst, 1);
}
PERF_TEST_P( TestRemap, remap,
PERF_TEST_P( TestRemap, map1_32fc1,
Combine(
Values( CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1 ),
Values( szVGA, sz1080p ),
InterTypeExtended::all(),
BorderMode::all(),
RemapMode::all()
Values(CV_8UC3, CV_16UC3, CV_32FC3, CV_8UC1, CV_16UC1, CV_32FC1, CV_8UC4, CV_16UC4, CV_32FC4)
)
)
{
int type = get<0>(GetParam());
Size size = get<1>(GetParam());
int interpolationType = get<2>(GetParam());
int borderMode = get<3>(GetParam());
int remapMode = get<4>(GetParam());
Size size = get<0>(GetParam());
int interpolationType = get<1>(GetParam());
int borderMode = get<2>(GetParam());
int type = get<3>(GetParam());
unsigned int height = size.height;
unsigned int width = size.width;
Mat source(height, width, type);
@ -180,7 +175,7 @@ PERF_TEST_P( TestRemap, remap,
declare.in(source, WARMUP_RNG);
update_map(source, map_x, map_y, remapMode, ((interpolationType & WARP_RELATIVE_MAP) != 0));
update_map(source, map_x, map_y, ((interpolationType & WARP_RELATIVE_MAP) != 0));
TEST_CYCLE()
{
@ -190,15 +185,68 @@ PERF_TEST_P( TestRemap, remap,
SANITY_CHECK_NOTHING();
}
void update_map(const Mat& src, Mat& map_x, Mat& map_y, const int remapMode, bool relative )
PERF_TEST_P( TestRemap, map1_32fc2,
Combine(
Values( szVGA, sz1080p ),
InterTypeExtended::all(),
BorderMode::all(),
Values(CV_8UC3, CV_16UC3, CV_32FC3, CV_8UC1, CV_16UC1, CV_32FC1, CV_8UC4, CV_16UC4, CV_32FC4)
)
)
{
for( int j = 0; j < src.rows; j++ )
Size size = get<0>(GetParam());
int interpolationType = get<1>(GetParam());
int borderMode = get<2>(GetParam());
int type = get<3>(GetParam());
unsigned int height = size.height;
unsigned int width = size.width;
Mat source(height, width, type);
Mat destination;
Mat map_x(height, width, CV_32FC2);
Mat map_y;
declare.in(source, WARMUP_RNG);
update_map(source, map_x, map_y, ((interpolationType & WARP_RELATIVE_MAP) != 0));
TEST_CYCLE()
{
for( int i = 0; i < src.cols; i++ )
remap(source, destination, map_x, map_y, interpolationType, borderMode);
}
SANITY_CHECK_NOTHING();
}
void update_map(const Mat& src, Mat& map_x, Mat& map_y, bool relative )
{
if (map_y.empty()) {
float *ptr_x = map_x.ptr<float>();
for (int j = 0; j < src.rows; j++) {
for (int i = 0; i < src.cols; i++) {
size_t offset = 2 * j * src.cols + 2 * i;
if( i > src.cols*0.25 && i < src.cols*0.75 && j > src.rows*0.25 && j < src.rows*0.75 )
{
ptr_x[offset] = 2*( i - src.cols*0.25f ) + 0.5f ;
ptr_x[offset+1] = 2*( j - src.rows*0.25f ) + 0.5f ;
}
else
{
ptr_x[offset] = 0 ;
ptr_x[offset+1] = 0 ;
}
if( relative )
{
ptr_x[offset] -= static_cast<float>(i) ;
ptr_x[offset+1] -= static_cast<float>(j) ;
}
}
}
} else {
for( int j = 0; j < src.rows; j++ )
{
switch( remapMode )
for( int i = 0; i < src.cols; i++ )
{
case HALF_SIZE:
if( i > src.cols*0.25 && i < src.cols*0.75 && j > src.rows*0.25 && j < src.rows*0.75 )
{
map_x.at<float>(j,i) = 2*( i - src.cols*0.25f ) + 0.5f ;
@ -209,25 +257,12 @@ void update_map(const Mat& src, Mat& map_x, Mat& map_y, const int remapMode, boo
map_x.at<float>(j,i) = 0 ;
map_y.at<float>(j,i) = 0 ;
}
break;
case UPSIDE_DOWN:
map_x.at<float>(j,i) = static_cast<float>(i) ;
map_y.at<float>(j,i) = static_cast<float>(src.rows - j) ;
break;
case REFLECTION_X:
map_x.at<float>(j,i) = static_cast<float>(src.cols - i) ;
map_y.at<float>(j,i) = static_cast<float>(j) ;
break;
case REFLECTION_BOTH:
map_x.at<float>(j,i) = static_cast<float>(src.cols - i) ;
map_y.at<float>(j,i) = static_cast<float>(src.rows - j) ;
break;
} // end of switch
if( relative )
{
map_x.at<float>(j,i) -= static_cast<float>(i);
map_y.at<float>(j,i) -= static_cast<float>(j);
if( relative )
{
map_x.at<float>(j,i) -= static_cast<float>(i);
map_y.at<float>(j,i) -= static_cast<float>(j);
}
}
}
}

View File

@ -1634,77 +1634,13 @@ private:
void cv::remap( InputArray _src, OutputArray _dst,
InputArray _map1, InputArray _map2,
int interpolation, int borderType, const Scalar& borderValue )
int interpolation, int borderType, const Scalar& borderValue,
AlgorithmHint hint )
{
CV_INSTRUMENT_REGION();
const bool hasRelativeFlag = ((interpolation & cv::WARP_RELATIVE_MAP) != 0);
static RemapNNFunc nn_tab[2][CV_DEPTH_MAX] =
{
{
remapNearest<uchar, false>, remapNearest<schar, false>, remapNearest<ushort, false>, remapNearest<short, false>,
remapNearest<int, false>, remapNearest<float, false>, remapNearest<double, false>, 0
},
{
remapNearest<uchar, true>, remapNearest<schar, true>, remapNearest<ushort, true>, remapNearest<short, true>,
remapNearest<int, true>, remapNearest<float, true>, remapNearest<double, true>, 0
}
};
static RemapFunc linear_tab[2][CV_DEPTH_MAX] =
{
{
remapBilinear<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, RemapVec_8u<false>, short, false>, 0,
remapBilinear<Cast<float, ushort>, RemapNoVec<false>, float, false>,
remapBilinear<Cast<float, short>, RemapNoVec<false>, float, false>, 0,
remapBilinear<Cast<float, float>, RemapNoVec<false>, float, false>,
remapBilinear<Cast<double, double>, RemapNoVec<false>, float, false>, 0
},
{
remapBilinear<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, RemapVec_8u<true>, short, true>, 0,
remapBilinear<Cast<float, ushort>, RemapNoVec<true>, float, true>,
remapBilinear<Cast<float, short>, RemapNoVec<true>, float, true>, 0,
remapBilinear<Cast<float, float>, RemapNoVec<true>, float, true>,
remapBilinear<Cast<double, double>, RemapNoVec<true>, float, true>, 0
}
};
static RemapFunc cubic_tab[2][CV_DEPTH_MAX] =
{
{
remapBicubic<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, short, INTER_REMAP_COEF_SCALE, false>, 0,
remapBicubic<Cast<float, ushort>, float, 1, false>,
remapBicubic<Cast<float, short>, float, 1, false>, 0,
remapBicubic<Cast<float, float>, float, 1, false>,
remapBicubic<Cast<double, double>, float, 1, false>, 0
},
{
remapBicubic<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, short, INTER_REMAP_COEF_SCALE, true>, 0,
remapBicubic<Cast<float, ushort>, float, 1, true>,
remapBicubic<Cast<float, short>, float, 1, true>, 0,
remapBicubic<Cast<float, float>, float, 1, true>,
remapBicubic<Cast<double, double>, float, 1, true>, 0
}
};
static RemapFunc lanczos4_tab[2][8] =
{
{
remapLanczos4<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, short, INTER_REMAP_COEF_SCALE, false>, 0,
remapLanczos4<Cast<float, ushort>, float, 1, false>,
remapLanczos4<Cast<float, short>, float, 1, false>, 0,
remapLanczos4<Cast<float, float>, float, 1, false>,
remapLanczos4<Cast<double, double>, float, 1, false>, 0
},
{
remapLanczos4<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, short, INTER_REMAP_COEF_SCALE, true>, 0,
remapLanczos4<Cast<float, ushort>, float, 1, true>,
remapLanczos4<Cast<float, short>, float, 1, true>, 0,
remapLanczos4<Cast<float, float>, float, 1, true>,
remapLanczos4<Cast<double, double>, float, 1, true>, 0
}
};
if (hint == cv::ALGO_HINT_DEFAULT)
hint = cv::getDefaultAlgorithmHint();
CV_Assert( !_map1.empty() );
CV_Assert( _map2.empty() || (_map2.size() == _map1.size()));
@ -1728,12 +1664,78 @@ void cv::remap( InputArray _src, OutputArray _dst,
map1.ptr<float>(), map1.step, map2.ptr<float>(), map2.step, interpolation, borderType, borderValue.val);
}
const bool hasRelativeFlag = ((interpolation & cv::WARP_RELATIVE_MAP) != 0);
interpolation &= ~cv::WARP_RELATIVE_MAP;
if( interpolation == INTER_AREA )
interpolation = INTER_LINEAR;
int type = src.type(), depth = CV_MAT_DEPTH(type);
if (interpolation == INTER_LINEAR) {
if (map1.depth() == CV_32F) {
const auto *src_data = src.ptr<const uint8_t>();
auto *dst_data = dst.ptr<uint8_t>();
size_t src_step = src.step, dst_step = dst.step,
map1_step = map1.step, map2_step = map2.step;
int src_rows = src.rows, src_cols = src.cols;
int dst_rows = dst.rows, dst_cols = dst.cols;
const float *map1_data = map1.ptr<const float>();
const float *map2_data = map2.ptr<const float>();
switch (src.type()) {
case CV_8UC1: {
if (hint == cv::ALGO_HINT_APPROX) {
CV_CPU_DISPATCH(remapLinearApproxInvoker_8UC1, (src_data, src_step, src_rows, src_cols, dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
} else {
CV_CPU_DISPATCH(remapLinearInvoker_8UC1, (src_data, src_step, src_rows, src_cols, dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
}
break;
}
case CV_8UC3: {
if (hint == cv::ALGO_HINT_APPROX) {
CV_CPU_DISPATCH(remapLinearApproxInvoker_8UC3, (src_data, src_step, src_rows, src_cols, dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
} else {
CV_CPU_DISPATCH(remapLinearInvoker_8UC3, (src_data, src_step, src_rows, src_cols, dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
}
break;
}
case CV_8UC4: {
if (hint == cv::ALGO_HINT_APPROX) {
CV_CPU_DISPATCH(remapLinearApproxInvoker_8UC4, (src_data, src_step, src_rows, src_cols, dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
} else {
CV_CPU_DISPATCH(remapLinearInvoker_8UC4, (src_data, src_step, src_rows, src_cols, dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
}
break;
}
case CV_16UC1: {
CV_CPU_DISPATCH(remapLinearInvoker_16UC1, ((uint16_t*)src_data, src_step, src_rows, src_cols, (uint16_t*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_16UC3: {
CV_CPU_DISPATCH(remapLinearInvoker_16UC3, ((uint16_t*)src_data, src_step, src_rows, src_cols, (uint16_t*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_16UC4: {
CV_CPU_DISPATCH(remapLinearInvoker_16UC4, ((uint16_t*)src_data, src_step, src_rows, src_cols, (uint16_t*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_32FC1: {
CV_CPU_DISPATCH(remapLinearInvoker_32FC1, ((float*)src_data, src_step, src_rows, src_cols, (float*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_32FC3: {
CV_CPU_DISPATCH(remapLinearInvoker_32FC3, ((float*)src_data, src_step, src_rows, src_cols, (float*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
break;
}
case CV_32FC4: {
CV_CPU_DISPATCH(remapLinearInvoker_32FC4, ((float*)src_data, src_step, src_rows, src_cols, (float*)dst_data, dst_step, dst_rows, dst_cols, borderType, borderValue.val, map1_data, map1_step, map2_data, map2_step, hasRelativeFlag), CV_CPU_DISPATCH_MODES_ALL);
break;
}
// no default
}
}
}
#if defined HAVE_IPP && !IPP_DISABLE_REMAP
CV_IPP_CHECK()
{
@ -1781,6 +1783,72 @@ void cv::remap( InputArray _src, OutputArray _dst,
bool fixpt = depth == CV_8U;
bool planar_input = false;
static RemapNNFunc nn_tab[2][CV_DEPTH_MAX] =
{
{
remapNearest<uchar, false>, remapNearest<schar, false>, remapNearest<ushort, false>, remapNearest<short, false>,
remapNearest<int, false>, remapNearest<float, false>, remapNearest<double, false>, 0
},
{
remapNearest<uchar, true>, remapNearest<schar, true>, remapNearest<ushort, true>, remapNearest<short, true>,
remapNearest<int, true>, remapNearest<float, true>, remapNearest<double, true>, 0
}
};
static RemapFunc linear_tab[2][CV_DEPTH_MAX] =
{
{
remapBilinear<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, RemapVec_8u<false>, short, false>, 0,
remapBilinear<Cast<float, ushort>, RemapNoVec<false>, float, false>,
remapBilinear<Cast<float, short>, RemapNoVec<false>, float, false>, 0,
remapBilinear<Cast<float, float>, RemapNoVec<false>, float, false>,
remapBilinear<Cast<double, double>, RemapNoVec<false>, float, false>, 0
},
{
remapBilinear<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, RemapVec_8u<true>, short, true>, 0,
remapBilinear<Cast<float, ushort>, RemapNoVec<true>, float, true>,
remapBilinear<Cast<float, short>, RemapNoVec<true>, float, true>, 0,
remapBilinear<Cast<float, float>, RemapNoVec<true>, float, true>,
remapBilinear<Cast<double, double>, RemapNoVec<true>, float, true>, 0
}
};
static RemapFunc cubic_tab[2][CV_DEPTH_MAX] =
{
{
remapBicubic<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, short, INTER_REMAP_COEF_SCALE, false>, 0,
remapBicubic<Cast<float, ushort>, float, 1, false>,
remapBicubic<Cast<float, short>, float, 1, false>, 0,
remapBicubic<Cast<float, float>, float, 1, false>,
remapBicubic<Cast<double, double>, float, 1, false>, 0
},
{
remapBicubic<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, short, INTER_REMAP_COEF_SCALE, true>, 0,
remapBicubic<Cast<float, ushort>, float, 1, true>,
remapBicubic<Cast<float, short>, float, 1, true>, 0,
remapBicubic<Cast<float, float>, float, 1, true>,
remapBicubic<Cast<double, double>, float, 1, true>, 0
}
};
static RemapFunc lanczos4_tab[2][8] =
{
{
remapLanczos4<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, short, INTER_REMAP_COEF_SCALE, false>, 0,
remapLanczos4<Cast<float, ushort>, float, 1, false>,
remapLanczos4<Cast<float, short>, float, 1, false>, 0,
remapLanczos4<Cast<float, float>, float, 1, false>,
remapLanczos4<Cast<double, double>, float, 1, false>, 0
},
{
remapLanczos4<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, short, INTER_REMAP_COEF_SCALE, true>, 0,
remapLanczos4<Cast<float, ushort>, float, 1, true>,
remapLanczos4<Cast<float, short>, float, 1, true>, 0,
remapLanczos4<Cast<float, float>, float, 1, true>,
remapLanczos4<Cast<double, double>, float, 1, true>, 0
}
};
const int relativeOptionIndex = (hasRelativeFlag ? 1 : 0);
if( interpolation == INTER_NEAREST )
{

View File

@ -334,14 +334,6 @@ __kernel void remap_16SC2_16UC1(__global const uchar * srcptr, int src_step, int
#elif defined INTER_LINEAR
__constant float coeffs[64] =
{ 1.000000f, 0.000000f, 0.968750f, 0.031250f, 0.937500f, 0.062500f, 0.906250f, 0.093750f, 0.875000f, 0.125000f, 0.843750f, 0.156250f,
0.812500f, 0.187500f, 0.781250f, 0.218750f, 0.750000f, 0.250000f, 0.718750f, 0.281250f, 0.687500f, 0.312500f, 0.656250f, 0.343750f,
0.625000f, 0.375000f, 0.593750f, 0.406250f, 0.562500f, 0.437500f, 0.531250f, 0.468750f, 0.500000f, 0.500000f, 0.468750f, 0.531250f,
0.437500f, 0.562500f, 0.406250f, 0.593750f, 0.375000f, 0.625000f, 0.343750f, 0.656250f, 0.312500f, 0.687500f, 0.281250f, 0.718750f,
0.250000f, 0.750000f, 0.218750f, 0.781250f, 0.187500f, 0.812500f, 0.156250f, 0.843750f, 0.125000f, 0.875000f, 0.093750f, 0.906250f,
0.062500f, 0.937500f, 0.031250f, 0.968750f };
__kernel void remap_16SC2_16UC1(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
__global const uchar * map1ptr, int map1_step, int map1_offset,
@ -422,109 +414,62 @@ __kernel void remap_2_32FC1(__global const uchar * srcptr, int src_step, int src
if (x < dst_cols)
{
WT scalar = CONVERT_TO_WT(convertScalar(nVal));
int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset));
int map1_index = mad24(y, map1_step, mad24(x, (int)sizeof(float), map1_offset));
int map2_index = mad24(y, map2_step, mad24(x, (int)sizeof(float), map2_offset));
#pragma unroll
for (int i = 0; i < ROWS_PER_WI; ++i, ++y,
map1_index += map1_step, map2_index += map2_step, dst_index += dst_step)
if (y < dst_rows)
{
__global const float * map1 = (__global const float *)(map1ptr + map1_index);
__global const float * map2 = (__global const float *)(map2ptr + map2_index);
__global T * dst = (__global T *)(dstptr + dst_index);
for (int dy = y, dy1 = min(dst_rows, y + ROWS_PER_WI); dy < dy1; ++dy, map1_index += map1_step, map2_index += map2_step)
{
__global const float * map1 = (__global const float *)(map1ptr + map1_index);
__global const float * map2 = (__global const float *)(map2ptr + map2_index);
#if defined BORDER_CONSTANT
float xf = map1[0], yf = map2[0];
int sx = (convert_int_sat_rtz(mad(xf, (float)INTER_TAB_SIZE, 0.5f)) >> INTER_BITS);
int sy = (convert_int_sat_rtz(mad(yf, (float)INTER_TAB_SIZE, 0.5f)) >> INTER_BITS);
#if WARP_RELATIVE
sx += x;
sy += y;
#endif
float X0 = map1[0];
float Y0 = map2[0];
#if WARP_RELATIVE
X0 += x;
Y0 += dy;
#endif
__constant float * coeffs_x = coeffs + ((convert_int_rte(xf * INTER_TAB_SIZE) & (INTER_TAB_SIZE - 1)) << 1);
__constant float * coeffs_y = coeffs + ((convert_int_rte(yf * INTER_TAB_SIZE) & (INTER_TAB_SIZE - 1)) << 1);
int sx = convert_int_rtn(X0);
int sy = convert_int_rtn(Y0);
WT sum = (WT)(0), xsum;
int src_index = mad24(sy, src_step, mad24(sx, TSIZE, src_offset));
float ax = X0 - (float) sx;
float ay = Y0 - (float) sy;
#pragma unroll
for (int yp = 0; yp < 2; ++yp, src_index += src_step)
{
if (sy + yp >= 0 && sy + yp < src_rows)
{
xsum = (WT)(0);
if (sx >= 0 && sx + 2 < src_cols)
{
#if SRC_DEPTH == 0 && CN == 1
uchar2 value = vload2(0, srcptr + src_index);
xsum = dot(convert_float2(value), (float2)(coeffs_x[0], coeffs_x[1]));
#else
#pragma unroll
for (int xp = 0; xp < 2; ++xp)
xsum = fma(CONVERT_TO_WT(loadpix(srcptr + mad24(xp, TSIZE, src_index))), coeffs_x[xp], xsum);
#endif
}
else
{
#pragma unroll
for (int xp = 0; xp < 2; ++xp)
xsum = fma(sx + xp >= 0 && sx + xp < src_cols ?
CONVERT_TO_WT(loadpix(srcptr + mad24(xp, TSIZE, src_index))) : scalar, coeffs_x[xp], xsum);
}
sum = fma(xsum, coeffs_y[yp], sum);
}
else
sum = fma(scalar, coeffs_y[yp], sum);
}
int2 map_data0 = (int2)(sx, sy);
int2 map_data1 = (int2)(sx+1, sy);
int2 map_data2 = (int2)(sx, sy+1);
int2 map_data3 = (int2)(sx+1, sy+1);
storepix(CONVERT_TO_T(sum), dst);
#else
float2 map_data = (float2)(map1[0], map2[0]);
#if WARP_RELATIVE
map_data.x += x;
map_data.y += y;
#endif
int2 map_dataA = convert_int2_sat_rtn(map_data);
int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y);
int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1);
int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y + 1);
float2 _u = map_data - convert_float2(map_dataA);
WT2 u = CONVERT_TO_WT2(convert_int2_rte(CONVERT_TO_WT2(_u) * (WT2)INTER_TAB_SIZE)) / (WT2)INTER_TAB_SIZE;
WT scalar = CONVERT_TO_WT(convertScalar(nVal));
WT a = scalar, b = scalar, c = scalar, d = scalar;
if (!NEED_EXTRAPOLATION(map_dataA.x, map_dataA.y))
a = CONVERT_TO_WT(loadpix((__global const T *)(srcptr + mad24(map_dataA.y, src_step, map_dataA.x * TSIZE + src_offset))));
else
EXTRAPOLATE(map_dataA, a);
if (!NEED_EXTRAPOLATION(map_dataB.x, map_dataB.y))
b = CONVERT_TO_WT(loadpix((__global const T *)(srcptr + mad24(map_dataB.y, src_step, map_dataB.x * TSIZE + src_offset))));
else
EXTRAPOLATE(map_dataB, b);
if (!NEED_EXTRAPOLATION(map_dataC.x, map_dataC.y))
c = CONVERT_TO_WT(loadpix((__global const T *)(srcptr + mad24(map_dataC.y, src_step, map_dataC.x * TSIZE + src_offset))));
else
EXTRAPOLATE(map_dataC, c);
if (!NEED_EXTRAPOLATION(map_dataD.x, map_dataD.y))
d = CONVERT_TO_WT(loadpix((__global const T *)(srcptr + mad24(map_dataD.y, src_step, map_dataD.x * TSIZE + src_offset))));
else
EXTRAPOLATE(map_dataD, d);
WT dst_data = a * (1 - u.x) * (1 - u.y) +
b * (u.x) * (1 - u.y) +
c * (1 - u.x) * (u.y) +
d * (u.x) * (u.y);
storepix(CONVERT_TO_T(dst_data), dst);
#endif
WT v0 = scalar, v1 = scalar, v2 = scalar, v3 = scalar;
if (sx >= 0 && sx < src_cols && sy >= 0 && sy < src_rows) {
v0 = CONVERT_TO_WT(loadpix((__global const T *)(srcptr + mad24(sy, src_step, mad24(sx, TSIZE, src_offset)))));
} else {
EXTRAPOLATE(map_data0, v0);
}
if (sx >= 0 && sx < src_cols && sy+1 >= 0 && sy+1 < src_rows) {
v2 = CONVERT_TO_WT(loadpix((__global const T *)(srcptr + mad24(sy+1, src_step, mad24(sx, TSIZE, src_offset)))));
} else {
EXTRAPOLATE(map_data2, v2);
}
if (sx+1 >= 0 && sx+1 < src_cols && sy >= 0 && sy < src_rows) {
v1 = CONVERT_TO_WT(loadpix((__global const T *)(srcptr + mad24(sy, src_step, mad24(sx+1, TSIZE, src_offset)))));
} else {
EXTRAPOLATE(map_data1, v1);
}
if (sx+1 >= 0 && sx+1 < src_cols && sy+1 >= 0 && sy+1 < src_rows) {
v3 = CONVERT_TO_WT(loadpix((__global const T *)(srcptr + mad24(sy+1, src_step, mad24(sx+1, TSIZE, src_offset)))));
} else {
EXTRAPOLATE(map_data3, v3);
}
int dst_index = mad24(dy, dst_step, mad24(x, TSIZE, dst_offset));
v0 = fma(v1 - v0, ax, v0);
v2 = fma(v3 - v2, ax, v2);
v0 = fma(v2 - v0, ay, v0);
storepix(CONVERT_TO_T(v0), dstptr + dst_index);
}
}
}
@ -539,57 +484,61 @@ __kernel void remap_32FC2(__global const uchar * srcptr, int src_step, int src_o
if (x < dst_cols)
{
WT scalar = CONVERT_TO_WT(convertScalar(nVal));
int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset));
int map_index = mad24(y, map_step, mad24(x, (int)sizeof(float2), map_offset));
#pragma unroll
for (int i = 0; i < ROWS_PER_WI; ++i, ++y,
map_index += map_step, dst_index += dst_step)
if (y < dst_rows)
{
__global const float2 * map = (__global const float2 *)(mapptr + map_index);
__global T * dst = (__global T *)(dstptr + dst_index);
for (int dy = y, dy1 = min(dst_rows, y + ROWS_PER_WI); dy < dy1; ++dy, map_index += map_step)
{
__global const float2 * map = (__global const float2 *)(mapptr + map_index);
float2 map_data = map[0];
float2 map_data = map[0];
#if WARP_RELATIVE
map_data.x += x;
map_data.y += y;
#endif
int2 map_dataA = convert_int2_sat_rtn(map_data);
int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y);
int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1);
int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y + 1);
float X0 = map_data.x;
float Y0 = map_data.y;
#if WARP_RELATIVE
X0 += x;
Y0 += dy;
#endif
float2 _u = map_data - convert_float2(map_dataA);
WT2 u = CONVERT_TO_WT2(convert_int2_rte(CONVERT_TO_WT2(_u) * (WT2)INTER_TAB_SIZE)) / (WT2)INTER_TAB_SIZE;
WT a = scalar, b = scalar, c = scalar, d = scalar;
int sx = convert_int_rtn(X0);
int sy = convert_int_rtn(Y0);
if (!NEED_EXTRAPOLATION(map_dataA.x, map_dataA.y))
a = CONVERT_TO_WT(loadpix((__global const T *)(srcptr + mad24(map_dataA.y, src_step, map_dataA.x * TSIZE + src_offset))));
else
EXTRAPOLATE(map_dataA, a);
float ax = X0 - (float) sx;
float ay = Y0 - (float) sy;
if (!NEED_EXTRAPOLATION(map_dataB.x, map_dataB.y))
b = CONVERT_TO_WT(loadpix((__global const T *)(srcptr + mad24(map_dataB.y, src_step, map_dataB.x * TSIZE + src_offset))));
else
EXTRAPOLATE(map_dataB, b);
int2 map_data0 = (int2)(sx, sy);
int2 map_data1 = (int2)(sx+1, sy);
int2 map_data2 = (int2)(sx, sy+1);
int2 map_data3 = (int2)(sx+1, sy+1);
if (!NEED_EXTRAPOLATION(map_dataC.x, map_dataC.y))
c = CONVERT_TO_WT(loadpix((__global const T *)(srcptr + mad24(map_dataC.y, src_step, map_dataC.x * TSIZE + src_offset))));
else
EXTRAPOLATE(map_dataC, c);
if (!NEED_EXTRAPOLATION(map_dataD.x, map_dataD.y))
d = CONVERT_TO_WT(loadpix((__global const T *)(srcptr + mad24(map_dataD.y, src_step, map_dataD.x * TSIZE + src_offset))));
else
EXTRAPOLATE(map_dataD, d);
WT dst_data = a * (1 - u.x) * (1 - u.y) +
b * (u.x) * (1 - u.y) +
c * (1 - u.x) * (u.y) +
d * (u.x) * (u.y);
storepix(CONVERT_TO_T(dst_data), dst);
WT v0 = scalar, v1 = scalar, v2 = scalar, v3 = scalar;
if (sx >= 0 && sx < src_cols && sy >= 0 && sy < src_rows) {
v0 = CONVERT_TO_WT(loadpix((__global const T *)(srcptr + mad24(sy, src_step, mad24(sx, TSIZE, src_offset)))));
} else {
EXTRAPOLATE(map_data0, v0);
}
if (sx >= 0 && sx < src_cols && sy+1 >= 0 && sy+1 < src_rows) {
v2 = CONVERT_TO_WT(loadpix((__global const T *)(srcptr + mad24(sy+1, src_step, mad24(sx, TSIZE, src_offset)))));
} else {
EXTRAPOLATE(map_data2, v2);
}
if (sx+1 >= 0 && sx+1 < src_cols && sy >= 0 && sy < src_rows) {
v1 = CONVERT_TO_WT(loadpix((__global const T *)(srcptr + mad24(sy, src_step, mad24(sx+1, TSIZE, src_offset)))));
} else {
EXTRAPOLATE(map_data1, v1);
}
if (sx+1 >= 0 && sx+1 < src_cols && sy+1 >= 0 && sy+1 < src_rows) {
v3 = CONVERT_TO_WT(loadpix((__global const T *)(srcptr + mad24(sy+1, src_step, mad24(sx+1, TSIZE, src_offset)))));
} else {
EXTRAPOLATE(map_data3, v3);
}
int dst_index = mad24(dy, dst_step, mad24(x, TSIZE, dst_offset));
v0 = fma(v1 - v0, ax, v0);
v2 = fma(v3 - v2, ax, v2);
v0 = fma(v2 - v0, ay, v0);
storepix(CONVERT_TO_T(v0), dstptr + dst_index);
}
}
}

View File

@ -3,6 +3,29 @@
// of this distribution and at http://opencv.org/license.html.
// Shuffle
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF(cn, dtype_reg) \
dtype_reg p00##cn, p01##cn, p10##cn, p11##cn;
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_C1(dtype_reg, dtype_ptr) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF(g, dtype_reg) \
const dtype_ptr *srcptr = src + srcstep * iy + ix;
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_C3(dtype_reg, dtype_ptr) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF(r, dtype_reg) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF(g, dtype_reg) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF(b, dtype_reg) \
const dtype_ptr *srcptr = src + srcstep * iy + ix*3;
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_C4(dtype_reg, dtype_ptr) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF(r, dtype_reg) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF(g, dtype_reg) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF(b, dtype_reg) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF(a, dtype_reg) \
const dtype_ptr *srcptr = src + srcstep * iy + ix*4;
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_8U(CN) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_##CN(int, uint8_t)
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_16U(CN) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_##CN(int, uint16_t)
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_32F(CN) \
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_##CN(float, float)
#define CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD(CN, cn, i) \
p00##CN = srcptr[i]; p01##CN = srcptr[i + cn]; \
p10##CN = srcptr[srcstep + i]; p11##CN = srcptr[srcstep + cn + i];
@ -93,7 +116,10 @@
pxy##a = src[glob_ofs+3]; \
}
#define CV_WARP_LINEAR_SCALAR_SHUFFLE(CN) \
#define CV_WARP_LINEAR_SCALAR_SHUFFLE(CN, DEPTH) \
int ix = cvFloor(sx), iy = cvFloor(sy); \
sx -= ix; sy -= iy; \
CV_WARP_LINEAR_SCALAR_SHUFFLE_DEF_##DEPTH(CN); \
if ((((unsigned)ix < (unsigned)(srccols-1)) & \
((unsigned)iy < (unsigned)(srcrows-1))) != 0) { \
CV_WARP_LINEAR_SCALAR_SHUFFLE_LOAD_##CN() \

View File

@ -555,3 +555,83 @@
vst4_u8(dstptr + x*4, result);
#define CV_WARP_LINEAR_VECTOR_INTER_STORE_F16U8(CN) \
CV_WARP_LINEAR_VECTOR_INTER_STORE_F16U8_##CN()
// Special case for C4 load, shuffle and bilinear interpolation
#define CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_8UC4_I(ofs) \
const uint8_t *srcptr##ofs = src + addr[i+ofs]; \
v_float32 i##ofs##_pix0 = v_cvt_f32(v_reinterpret_as_s32(v_load_expand_q(srcptr##ofs))); \
v_float32 i##ofs##_pix1 = v_cvt_f32(v_reinterpret_as_s32(v_load_expand_q(srcptr##ofs+4))); \
v_float32 i##ofs##_pix2 = v_cvt_f32(v_reinterpret_as_s32(v_load_expand_q(srcptr##ofs+srcstep))); \
v_float32 i##ofs##_pix3 = v_cvt_f32(v_reinterpret_as_s32(v_load_expand_q(srcptr##ofs+srcstep+4))); \
v_float32 i##ofs##_alpha = vx_setall_f32(valpha[i+ofs]), \
i##ofs##_beta = vx_setall_f32(vbeta[i+ofs]); \
i##ofs##_pix0 = v_fma(i##ofs##_alpha, v_sub(i##ofs##_pix1, i##ofs##_pix0), i##ofs##_pix0); \
i##ofs##_pix2 = v_fma(i##ofs##_alpha, v_sub(i##ofs##_pix3, i##ofs##_pix2), i##ofs##_pix2); \
i##ofs##_pix0 = v_fma(i##ofs##_beta, v_sub(i##ofs##_pix2, i##ofs##_pix0), i##ofs##_pix0);
#define CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_8UC4() \
for (int i = 0; i < uf; i+=vlanes_32) { \
CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_8UC4_I(0); \
CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_8UC4_I(1); \
CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_8UC4_I(2); \
CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_8UC4_I(3); \
auto i01_pix = v_pack_u(v_round(i0_pix0), v_round(i1_pix0)), \
i23_pix = v_pack_u(v_round(i2_pix0), v_round(i3_pix0)); \
v_pack_store(dstptr + 4*(x+i), i01_pix); \
v_pack_store(dstptr + 4*(x+i+2), i23_pix); \
}
#define CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_8UC4_I(ofs0, ofs1) \
const uint8_t *srcptr##ofs0 = src + addr[i+ofs0]; \
const uint8_t *srcptr##ofs1 = src + addr[i+ofs1]; \
v_int32 i##ofs0##_pix01 = v_reinterpret_as_s32(v256_load_expand_q(srcptr##ofs0)), \
i##ofs0##_pix23 = v_reinterpret_as_s32(v256_load_expand_q(srcptr##ofs0+srcstep)); \
v_int32 i##ofs1##_pix01 = v_reinterpret_as_s32(v256_load_expand_q(srcptr##ofs1)), \
i##ofs1##_pix23 = v_reinterpret_as_s32(v256_load_expand_q(srcptr##ofs1+srcstep)); \
v_float32 i##ofs0##_fpix01 = v_cvt_f32(i##ofs0##_pix01), i##ofs0##_fpix23 = v_cvt_f32(i##ofs0##_pix23); \
v_float32 i##ofs1##_fpix01 = v_cvt_f32(i##ofs1##_pix01), i##ofs1##_fpix23 = v_cvt_f32(i##ofs1##_pix23); \
v_float32 i##ofs0##ofs1##_fpix00, i##ofs0##ofs1##_fpix11, \
i##ofs0##ofs1##_fpix22, i##ofs0##ofs1##_fpix33; \
v_recombine(i##ofs0##_fpix01, i##ofs1##_fpix01, i##ofs0##ofs1##_fpix00, i##ofs0##ofs1##_fpix11); \
v_recombine(i##ofs0##_fpix23, i##ofs1##_fpix23, i##ofs0##ofs1##_fpix22, i##ofs0##ofs1##_fpix33); \
v_float32 i##ofs0##_alpha = vx_setall_f32(valpha[i+ofs0]), \
i##ofs1##_alpha = vx_setall_f32(valpha[i+ofs1]), \
i##ofs0##_beta = vx_setall_f32(vbeta[i+ofs0]), \
i##ofs1##_beta = vx_setall_f32(vbeta[i+ofs1]); \
v_float32 i##ofs0##ofs1##_alpha = v_combine_low(i##ofs0##_alpha, i##ofs1##_alpha), \
i##ofs0##ofs1##_beta = v_combine_low(i##ofs0##_beta, i##ofs1##_beta); \
i##ofs0##ofs1##_fpix00 = v_fma(i##ofs0##ofs1##_alpha, v_sub(i##ofs0##ofs1##_fpix11, i##ofs0##ofs1##_fpix00), i##ofs0##ofs1##_fpix00); \
i##ofs0##ofs1##_fpix22 = v_fma(i##ofs0##ofs1##_alpha, v_sub(i##ofs0##ofs1##_fpix33, i##ofs0##ofs1##_fpix22), i##ofs0##ofs1##_fpix22); \
i##ofs0##ofs1##_fpix00 = v_fma(i##ofs0##ofs1##_beta, v_sub(i##ofs0##ofs1##_fpix22, i##ofs0##ofs1##_fpix00), i##ofs0##ofs1##_fpix00);
#define CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_8UC4() \
for (int i = 0; i < uf; i+=vlanes_32) { \
CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_8UC4_I(0, 1); \
CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_8UC4_I(2, 3); \
auto i01_pix = v_round(i01_fpix00), i23_pix = v_round(i23_fpix00); \
v_pack_store(dstptr + 4*(x+i), v_pack_u(i01_pix, i23_pix)); \
CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_8UC4_I(4, 5); \
CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_8UC4_I(6, 7); \
auto i45_pix = v_round(i45_fpix00), i67_pix = v_round(i67_fpix00); \
v_pack_store(dstptr + 4*(x+i+4), v_pack_u(i45_pix, i67_pix)); \
}
#define CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_8UC4_I(ofs) \
const uint8_t *srcptr##ofs = src + addr[i+ofs]; \
v_float32 i##ofs##_fpix0 = v_cvt_f32(v_reinterpret_as_s32(v_load_expand_q<4>(srcptr##ofs))), \
i##ofs##_fpix1 = v_cvt_f32(v_reinterpret_as_s32(v_load_expand_q<4>(srcptr##ofs+4))), \
i##ofs##_fpix2 = v_cvt_f32(v_reinterpret_as_s32(v_load_expand_q<4>(srcptr##ofs+srcstep))), \
i##ofs##_fpix3 = v_cvt_f32(v_reinterpret_as_s32(v_load_expand_q<4>(srcptr##ofs+srcstep+4))); \
v_float32 i##ofs##_alpha = vx_setall_f32(valpha[i+ofs]), \
i##ofs##_beta = vx_setall_f32(vbeta[i+ofs]); \
i##ofs##_fpix0 = v_fma(i##ofs##_alpha, v_sub(i##ofs##_fpix1, i##ofs##_fpix0), i##ofs##_fpix0); \
i##ofs##_fpix2 = v_fma(i##ofs##_alpha, v_sub(i##ofs##_fpix3, i##ofs##_fpix2), i##ofs##_fpix2); \
i##ofs##_fpix0 = v_fma(i##ofs##_beta, v_sub(i##ofs##_fpix2, i##ofs##_fpix0), i##ofs##_fpix0);
#define CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_8UC4() \
for (int i = 0; i < uf; i+=4) { \
CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_8UC4_I(0); \
CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_8UC4_I(1); \
CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_8UC4_I(2); \
CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_8UC4_I(3); \
auto i01_pix = v_pack(v_round(i0_fpix0), v_round(i1_fpix0)), \
i23_pix = v_pack(v_round(i2_fpix0), v_round(i3_fpix0)); \
v_pack_u_store<8>(dstptr + 4*(x+i), i01_pix); \
v_pack_u_store<8>(dstptr + 4*(x+i+2), i23_pix); \
}

File diff suppressed because it is too large Load Diff

View File

@ -703,6 +703,16 @@ protected:
virtual void run_func();
virtual void run_reference_func();
template<typename T>
void new_linear_c1(int x, float sx, float sy, const T *srcptr_, T *dstptr, int srccols, int srcrows, size_t srcstep,
const T *bval, int borderType_x, int borderType_y);
template<typename T>
void new_linear_c3(int x, float sx, float sy, const T *srcptr_, T *dstptr, int srccols, int srcrows, size_t srcstep,
const T *bval, int borderType_x, int borderType_y);
template<typename T>
void new_linear_c4(int x, float sx, float sy, const T *srcptr_, T *dstptr, int srccols, int srcrows, size_t srcstep,
const T *bval, int borderType_x, int borderType_y);
Mat mapx, mapy;
int borderType;
Scalar borderValue;
@ -710,6 +720,7 @@ protected:
remap_func funcs[2];
private:
template <typename T> void new_remap(const Mat&, Mat&);
void remap_nearest(const Mat&, Mat&);
void remap_generic(const Mat&, Mat&);
@ -865,15 +876,189 @@ void CV_Remap_Test::prepare_test_data_for_reference_func()
void CV_Remap_Test::run_reference_func()
{
prepare_test_data_for_reference_func();
if (interpolation == INTER_AREA)
interpolation = INTER_LINEAR;
if (interpolation == INTER_LINEAR && mapx.depth() == CV_32F) {
int src_depth = src.depth(), src_channels = src.channels();
Mat tmp = Mat::zeros(dst.size(), dst.type());
if (src_depth == CV_8U && (src_channels == 1 || src_channels == 3 || src_channels == 4)) {
new_remap<uint8_t>(src, tmp);
tmp.convertTo(reference_dst, reference_dst.depth());
return;
} else if (src_depth == CV_16U && (src_channels == 1 || src_channels == 3 || src_channels == 4)) {
new_remap<uint16_t>(src, tmp);
tmp.convertTo(reference_dst, reference_dst.depth());
return;
} else if (src_depth == CV_32F && (src_channels == 1 || src_channels == 3 || src_channels == 4)) {
new_remap<float>(src, tmp);
tmp.convertTo(reference_dst, reference_dst.depth());
return;
}
}
prepare_test_data_for_reference_func();
int index = interpolation == INTER_NEAREST ? 0 : 1;
(this->*funcs[index])(src, reference_dst);
}
#define FETCH_PIXEL_SCALAR(cn, dy, dx) \
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
size_t ofs = dy*srcstep + dx*cn; \
for (int ci = 0; ci < cn; ci++) { pxy[2*dy*cn+dx*cn+ci] = srcptr[ofs+ci];} \
} else if (borderType == BORDER_CONSTANT) { \
for (int ci = 0; ci < cn; ci++) { pxy[2*dy*cn+dx*cn+ci] = bval[ci];} \
} else if (borderType == BORDER_TRANSPARENT) { \
for (int ci = 0; ci < cn; ci++) { pxy[2*dy*cn+dx*cn+ci] = dstptr[x*cn+ci];} \
} else { \
int ix_ = borderInterpolate(ix + dx, srccols, borderType_x); \
int iy_ = borderInterpolate(iy + dy, srcrows, borderType_y); \
size_t glob_ofs = iy_*srcstep + ix_*cn; \
for (int ci = 0; ci < cn; ci++) { pxy[2*dy*cn+dx*cn+ci] = srcptr_[glob_ofs+ci];} \
}
#define WARPAFFINE_SHUFFLE(cn) \
if ((((unsigned)ix < (unsigned)(srccols-1)) & \
((unsigned)iy < (unsigned)(srcrows-1))) != 0) { \
for (int ci = 0; ci < cn; ci++) { \
pxy[ci] = srcptr[ci]; \
pxy[ci+cn] = srcptr[ci+cn]; \
pxy[ci+cn*2] = srcptr[srcstep+ci]; \
pxy[ci+cn*3] = srcptr[srcstep+ci+cn]; \
} \
} else { \
if ((borderType == BORDER_CONSTANT || borderType == BORDER_TRANSPARENT) && \
(((unsigned)(ix+1) >= (unsigned)(srccols+1))| \
((unsigned)(iy+1) >= (unsigned)(srcrows+1))) != 0) { \
if (borderType == BORDER_CONSTANT) { \
for (int ci = 0; ci < cn; ci++) { dstptr[x*cn+ci] = bval[ci]; } \
} \
return; \
} \
FETCH_PIXEL_SCALAR(cn, 0, 0); \
FETCH_PIXEL_SCALAR(cn, 0, 1); \
FETCH_PIXEL_SCALAR(cn, 1, 0); \
FETCH_PIXEL_SCALAR(cn, 1, 1); \
}
template<typename T>
static inline void warpaffine_linear_calc(int cn, const T *pxy, T *dst, float sx, float sy)
{
for (int ci = 0; ci < cn; ci++) {
float p00 = pxy[ci];
float p01 = pxy[ci+cn];
float p10 = pxy[ci+cn*2];
float p11 = pxy[ci+cn*3];
float v0 = p00 + sx*(p01 - p00);
float v1 = p10 + sx*(p11 - p10);
v0 += sy*(v1 - v0);
dst[ci] = saturate_cast<T>(v0);
}
}
template<typename T>
void CV_Remap_Test::new_linear_c1(int x, float sx, float sy, const T *srcptr_, T *dstptr,
int srccols, int srcrows, size_t srcstep,
const T *bval, int borderType_x, int borderType_y)
{
int ix = (int)floorf(sx), iy = (int)floorf(sy);
sx -= ix; sy -= iy;
T pxy[4];
const T *srcptr = srcptr_ + srcstep*iy + ix;
WARPAFFINE_SHUFFLE(1);
warpaffine_linear_calc(1, pxy, dstptr+x, sx, sy);
}
template<typename T>
void CV_Remap_Test::new_linear_c3(int x, float sx, float sy, const T *srcptr_, T *dstptr,
int srccols, int srcrows, size_t srcstep,
const T *bval, int borderType_x, int borderType_y)
{
int ix = (int)floorf(sx), iy = (int)floorf(sy);
sx -= ix; sy -= iy;
T pxy[12];
const T *srcptr = srcptr_ + srcstep*iy + ix*3;
WARPAFFINE_SHUFFLE(3);
warpaffine_linear_calc(3, pxy, dstptr+x*3, sx, sy);
}
template<typename T>
void CV_Remap_Test::new_linear_c4(int x, float sx, float sy, const T *srcptr_, T *dstptr,
int srccols, int srcrows, size_t srcstep,
const T *bval, int borderType_x, int borderType_y)
{
int ix = (int)floorf(sx), iy = (int)floorf(sy);
sx -= ix; sy -= iy;
T pxy[16];
const T *srcptr = srcptr_ + srcstep*iy + ix*4;
WARPAFFINE_SHUFFLE(4);
warpaffine_linear_calc(4, pxy, dstptr+x*4, sx, sy);
}
template <typename T>
void CV_Remap_Test::new_remap(const Mat &_src, Mat &_dst) {
int src_channels = _src.channels();
CV_CheckTrue(_src.channels() == 1 || _src.channels() == 3 || _src.channels() == 4, "");
CV_CheckTrue(mapx.depth() == CV_32F, "");
CV_CheckTrue(mapx.channels() == 1 || mapx.channels() == 2, "");
auto *srcptr_ = _src.ptr<const T>();
auto *dstptr_ = _dst.ptr<T>();
size_t srcstep = _src.step/sizeof(T), dststep = _dst.step/sizeof(T);
int srccols = _src.cols, srcrows = _src.rows;
int dstcols = _dst.cols, dstrows = _dst.rows;
T bval[] = {
saturate_cast<T>(borderValue[0]),
saturate_cast<T>(borderValue[1]),
saturate_cast<T>(borderValue[2]),
saturate_cast<T>(borderValue[3]),
};
int borderType_x = borderType != BORDER_CONSTANT &&
borderType != BORDER_TRANSPARENT &&
srccols <= 1 ? BORDER_REPLICATE : borderType;
int borderType_y = borderType != BORDER_CONSTANT &&
borderType != BORDER_TRANSPARENT &&
srcrows <= 1 ? BORDER_REPLICATE : borderType;
const float *mapx_data = mapx.ptr<const float>(),
*mapy_data = mapy.ptr<const float>();
int mapx_channels = mapx.channels();
for (int y = 0; y < dstrows; y++) {
T* dstptr = dstptr_ + y*dststep;
for (int x = 0; x < dstcols; x++) {
float sx, sy;
size_t offset = y * dstcols + x;
if (mapx_channels == 1) {
sx = mapx_data[offset];
sy = mapy_data[offset];
} else { // mapx_channels == 2
sx = mapx_data[2*offset];
sy = mapx_data[2*offset+1];
}
if (src_channels == 3) {
new_linear_c3(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
} else if (src_channels == 4) {
new_linear_c4(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
} else {
new_linear_c1(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
}
}
}
}
void CV_Remap_Test::remap_nearest(const Mat& _src, Mat& _dst)
{
CV_Assert(_src.depth() == CV_32F && _dst.type() == _src.type());
@ -1042,10 +1227,6 @@ protected:
virtual void run_func();
virtual void run_reference_func();
template<int channels, typename T>
void newLinear(int x, float sx, float sy, const T *srcptr_, T *dstptr, int srccols, int srcrows, size_t srcstep,
const T *bval, int borderType_x, int borderType_y);
Mat M;
private:
void warpAffine(const Mat&, Mat&);
@ -1105,105 +1286,6 @@ void CV_WarpAffine_Test::run_reference_func()
tmp.convertTo(reference_dst, reference_dst.depth());
}
#define FETCH_PIXEL_SCALAR(cn, dy, dx) \
if ((((unsigned)(ix + dx) < (unsigned)srccols) & ((unsigned)(iy + dy) < (unsigned)srcrows)) != 0) { \
size_t ofs = dy*srcstep + dx*cn; \
for (int ci = 0; ci < cn; ci++) { pxy[2*dy*cn+dx*cn+ci] = srcptr[ofs+ci];} \
} else if (borderType == BORDER_CONSTANT) { \
for (int ci = 0; ci < cn; ci++) { pxy[2*dy*cn+dx*cn+ci] = bval[ci];} \
} else if (borderType == BORDER_TRANSPARENT) { \
for (int ci = 0; ci < cn; ci++) { pxy[2*dy*cn+dx*cn+ci] = dstptr[x*cn+ci];} \
} else { \
int ix_ = borderInterpolate(ix + dx, srccols, borderType_x); \
int iy_ = borderInterpolate(iy + dy, srcrows, borderType_y); \
size_t glob_ofs = iy_*srcstep + ix_*cn; \
for (int ci = 0; ci < cn; ci++) { pxy[2*dy*cn+dx*cn+ci] = srcptr_[glob_ofs+ci];} \
}
#define WARPAFFINE_SHUFFLE(cn) \
if ((((unsigned)ix < (unsigned)(srccols-1)) & \
((unsigned)iy < (unsigned)(srcrows-1))) != 0) { \
for (int ci = 0; ci < cn; ci++) { \
pxy[ci] = srcptr[ci]; \
pxy[ci+cn] = srcptr[ci+cn]; \
pxy[ci+cn*2] = srcptr[srcstep+ci]; \
pxy[ci+cn*3] = srcptr[srcstep+ci+cn]; \
} \
} else { \
if ((borderType == BORDER_CONSTANT || borderType == BORDER_TRANSPARENT) && \
(((unsigned)(ix+1) >= (unsigned)(srccols+1))| \
((unsigned)(iy+1) >= (unsigned)(srcrows+1))) != 0) { \
if (borderType == BORDER_CONSTANT) { \
for (int ci = 0; ci < cn; ci++) { dstptr[x*cn+ci] = bval[ci]; } \
} \
return; \
} \
FETCH_PIXEL_SCALAR(cn, 0, 0); \
FETCH_PIXEL_SCALAR(cn, 0, 1); \
FETCH_PIXEL_SCALAR(cn, 1, 0); \
FETCH_PIXEL_SCALAR(cn, 1, 1); \
}
template<typename T>
static inline void warpaffine_linear_calc(int cn, const T *pxy, T *dst, float sx, float sy)
{
for (int ci = 0; ci < cn; ci++) {
float p00 = pxy[ci];
float p01 = pxy[ci+cn];
float p10 = pxy[ci+cn*2];
float p11 = pxy[ci+cn*3];
float v0 = p00 + sx*(p01 - p00);
float v1 = p10 + sx*(p11 - p10);
v0 += sy*(v1 - v0);
dst[ci] = saturate_cast<T>(v0);
}
}
template<>
inline void warpaffine_linear_calc<float>(int cn, const float *pxy, float *dst, float sx, float sy)
{
for (int ci = 0; ci < cn; ci++) {
float p00 = pxy[ci];
float p01 = pxy[ci+cn];
float p10 = pxy[ci+cn*2];
float p11 = pxy[ci+cn*3];
float v0 = p00 + sx*(p01 - p00);
float v1 = p10 + sx*(p11 - p10);
v0 += sy*(v1 - v0);
dst[ci] = v0;
}
}
template<int channels, typename T>
void CV_WarpAffine_Test::newLinear(int x, float sx, float sy, const T *srcptr_, T *dstptr,
int srccols, int srcrows, size_t srcstep,
const T *bval, int borderType_x, int borderType_y)
{
int ix = (int)floorf(sx), iy = (int)floorf(sy);
sx -= ix; sy -= iy;
T pxy[channels*4];
const T *srcptr = srcptr_ + srcstep*iy + ix*channels;
WARPAFFINE_SHUFFLE(channels);
warpaffine_linear_calc(channels, pxy, dstptr+x*channels, sx, sy);
}
template<>
void CV_WarpAffine_Test::newLinear<3, float>(int x, float sx, float sy, const float *srcptr_, float *dstptr,
int srccols, int srcrows, size_t srcstep,
const float *bval, int borderType_x, int borderType_y)
{
int ix = (int)floorf(sx), iy = (int)floorf(sy);
sx -= ix; sy -= iy;
float pxy[12];
const float *srcptr = srcptr_ + srcstep*iy + ix*3;
WARPAFFINE_SHUFFLE(3);
warpaffine_linear_calc(3, pxy, dstptr+x*3, sx, sy);
}
template<typename T>
void CV_WarpAffine_Test::newWarpAffine(const Mat &_src, Mat &_dst, const Mat &tM)
{
@ -1241,11 +1323,11 @@ void CV_WarpAffine_Test::newWarpAffine(const Mat &_src, Mat &_dst, const Mat &tM
float sy = x*_M[3] + y*_M[4] + _M[5];
if (num_channels == 3) {
newLinear<3>(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
new_linear_c3(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
} else if (num_channels == 4) {
newLinear<4>(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
new_linear_c4(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
} else {
newLinear<1>(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
new_linear_c1(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
}
}
}
@ -1372,8 +1454,7 @@ void CV_WarpPerspective_Test::generate_test_data()
void CV_WarpPerspective_Test::run_func()
{
cv::warpPerspective(src, dst, M, dst.size(), interpolation, borderType, borderValue, cv::ALGO_HINT_APPROX);
// cv::warpPerspective(src, dst, M, dst.size(), interpolation, borderType, borderValue);
cv::warpPerspective(src, dst, M, dst.size(), interpolation, borderType, borderValue);
}
float CV_WarpPerspective_Test::get_success_error_level(int _interpolation, int _depth) const
@ -1426,11 +1507,11 @@ void CV_WarpPerspective_Test::newWarpPerspective(const Mat &_src, Mat &_dst, con
float sy = (x*_M[3] + y*_M[4] + _M[5]) / w;
if (num_channels == 3) {
newLinear<3>(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
new_linear_c3(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
} else if (num_channels == 4) {
newLinear<4>(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
new_linear_c4(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
} else {
newLinear<1>(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
new_linear_c1(x, sx, sy, srcptr_, dstptr, srccols, srcrows, srcstep, bval, borderType_x, borderType_y);
}
}
}

View File

@ -97,7 +97,7 @@ OCL_TEST_F(SphericalWarperTest, Mat)
OCL_OFF(warper->warp(src, K, R, INTER_LINEAR, BORDER_REPLICATE, dst));
OCL_ON(warper->warp(usrc, K, R, INTER_LINEAR, BORDER_REPLICATE, udst));
Near(1e-4);
Near(9.31e-4);
}
}
@ -118,7 +118,7 @@ OCL_TEST_F(CylindricalWarperTest, Mat)
OCL_OFF(warper->warp(src, K, R, INTER_LINEAR, BORDER_REPLICATE, dst));
OCL_ON(warper->warp(usrc, K, R, INTER_LINEAR, BORDER_REPLICATE, udst));
Near(1e-4);
Near(6.5e-4);
}
}
@ -139,7 +139,7 @@ OCL_TEST_F(PlaneWarperTest, Mat)
OCL_OFF(warper->warp(src, K, R, INTER_LINEAR, BORDER_REPLICATE, dst));
OCL_ON(warper->warp(usrc, K, R, INTER_LINEAR, BORDER_REPLICATE, udst));
Near(1.5e-4);
Near(6.6e-4);
}
}
@ -160,7 +160,7 @@ OCL_TEST_F(AffineWarperTest, Mat)
OCL_OFF(warper->warp(src, K, R, INTER_LINEAR, BORDER_REPLICATE, dst));
OCL_ON(warper->warp(usrc, K, R, INTER_LINEAR, BORDER_REPLICATE, udst));
Near(1.5e-4);
Near(1.3e-3);
}
}