diff --git a/CMakeLists.txt b/CMakeLists.txt index 80e1e085ad..9a56a15281 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -252,8 +252,8 @@ OCV_OPTION(WITH_CUBLAS "Include NVidia Cuda Basic Linear Algebra Subprograms (BL OCV_OPTION(WITH_NVCUVID "Include NVidia Video Decoding library support" WITH_CUDA VISIBLE_IF WITH_CUDA VERIFY HAVE_NVCUVID) -OCV_OPTION(WITH_EIGEN "Include Eigen2/Eigen3 support" (NOT CV_DISABLE_OPTIMIZATION) - VISIBLE_IF NOT WINRT AND NOT CMAKE_CROSSCOMPILING +OCV_OPTION(WITH_EIGEN "Include Eigen2/Eigen3 support" (NOT CV_DISABLE_OPTIMIZATION AND NOT CMAKE_CROSSCOMPILING) + VISIBLE_IF NOT WINRT VERIFY HAVE_EIGEN) OCV_OPTION(WITH_FFMPEG "Include FFMPEG support" (NOT ANDROID) VISIBLE_IF NOT IOS AND NOT WINRT diff --git a/apps/traincascade/haarfeatures.cpp b/apps/traincascade/haarfeatures.cpp index f2d18229e8..c151ee7963 100644 --- a/apps/traincascade/haarfeatures.cpp +++ b/apps/traincascade/haarfeatures.cpp @@ -153,14 +153,14 @@ void CvHaarEvaluator::generateFeatures() { features.push_back( Feature( offset, false, x, y, dx*3, dy, -1, - x+dx, y, dx , dy, +3 ) ); + x+dx, y, dx , dy, +2 ) ); } // haar_y3 if ( (x+dx <= winSize.width) && (y+dy*3 <= winSize.height) ) { features.push_back( Feature( offset, false, x, y, dx, dy*3, -1, - x, y+dy, dx, dy, +3 ) ); + x, y+dy, dx, dy, +2 ) ); } if( mode != CvHaarFeatureParams::BASIC ) { diff --git a/cmake/OpenCVFindLAPACK.cmake b/cmake/OpenCVFindLAPACK.cmake index 684818027e..342bebc723 100644 --- a/cmake/OpenCVFindLAPACK.cmake +++ b/cmake/OpenCVFindLAPACK.cmake @@ -31,27 +31,33 @@ macro(ocv_lapack_check) else() # adding proxy opencv_lapack.h header set(CBLAS_H_PROXY_PATH ${CMAKE_BINARY_DIR}/opencv_lapack.h) - if((APPLE OR OPENCV_SKIP_LAPACK_EXTERN_C) AND NOT OPENCV_FORCE_LAPACK_EXTERN_C) - set(_lapack_include_str_extern_C "") - set(_lapack_include_str_extern_C_end "") - else() - set(_lapack_include_str_extern_C "extern \"C\" {\n") - set(_lapack_include_str_extern_C_end "}\n") + + set(_lapack_add_extern_c NOT (APPLE OR OPENCV_SKIP_LAPACK_EXTERN_C) OR OPENCV_FORCE_LAPACK_EXTERN_C) + + set(_lapack_content "// This file is auto-generated\n") + if(${_lapack_add_extern_c}) + list(APPEND _lapack_content "extern \"C\" {") endif() - set(_lapack_include_str "${_lapack_include_str_extern_C}\#include \"${OPENCV_CBLAS_H_PATH_${_lapack_impl}}\"") + if(NOT OPENCV_SKIP_LAPACK_MSVC_FIX) + list(APPEND _lapack_content " +#ifdef _MSC_VER +#include +#define lapack_complex_float _Fcomplex +#define lapack_complex_double _Dcomplex +#endif +") + endif() + list(APPEND _lapack_content "#include \"${OPENCV_CBLAS_H_PATH_${_lapack_impl}}\"") if(NOT "${OPENCV_CBLAS_H_PATH_${_lapack_impl}}" STREQUAL "${OPENCV_LAPACKE_H_PATH_${_lapack_impl}}") - set(_lapack_include_str "${_lapack_include_str}\n#include \"${OPENCV_LAPACKE_H_PATH_${_lapack_impl}}\"") + list(APPEND _lapack_content "#include \"${OPENCV_LAPACKE_H_PATH_${_lapack_impl}}\"") endif() - set(_lapack_include_str "${_lapack_include_str}\n${_lapack_include_str_extern_C_end}") - # update file contents (if required) - set(__content_str "") - if(EXISTS "${CBLAS_H_PROXY_PATH}") - file(READ "${CBLAS_H_PROXY_PATH}" __content_str) - endif() - if(NOT " ${__content_str}" STREQUAL " ${_lapack_include_str}") - file(WRITE "${CBLAS_H_PROXY_PATH}" "${_lapack_include_str}") + if(${_lapack_add_extern_c}) + list(APPEND _lapack_content "}") endif() + string(REPLACE ";" "\n" _lapack_content "${_lapack_content}") + ocv_update_file("${CBLAS_H_PROXY_PATH}" "${_lapack_content}") + try_compile(__VALID_LAPACK "${OpenCV_BINARY_DIR}" "${OpenCV_SOURCE_DIR}/cmake/checks/lapack_check.cpp" diff --git a/cmake/OpenCVFindLibsGUI.cmake b/cmake/OpenCVFindLibsGUI.cmake index 27b5d77e92..e3593d4dc9 100644 --- a/cmake/OpenCVFindLibsGUI.cmake +++ b/cmake/OpenCVFindLibsGUI.cmake @@ -64,7 +64,7 @@ if(WITH_GTK AND NOT HAVE_QT) if(WITH_OPENGL AND NOT HAVE_GTK3) ocv_check_modules(GTKGLEXT gtkglext-1.0) if(HAVE_GTKGLEXT) - ocv_append_build_options(GTKGLEXT GTHREAD) + ocv_append_build_options(HIGHGUI GTKGLEXT) endif() endif() endif() diff --git a/cmake/OpenCVFindLibsPerf.cmake b/cmake/OpenCVFindLibsPerf.cmake index 67978f9210..a658bf6bdc 100644 --- a/cmake/OpenCVFindLibsPerf.cmake +++ b/cmake/OpenCVFindLibsPerf.cmake @@ -40,19 +40,67 @@ To eliminate this warning remove WITH_CUDA=ON CMake configuration option. endif(WITH_CUDA) # --- Eigen --- -if(WITH_EIGEN) - find_path(EIGEN_INCLUDE_PATH "Eigen/Core" - PATHS /usr/local /opt /usr $ENV{EIGEN_ROOT}/include ENV ProgramFiles ENV ProgramW6432 - PATH_SUFFIXES include/eigen3 include/eigen2 Eigen/include/eigen3 Eigen/include/eigen2 - DOC "The path to Eigen3/Eigen2 headers" - CMAKE_FIND_ROOT_PATH_BOTH) +if(WITH_EIGEN AND NOT HAVE_EIGEN) + find_package(Eigen3 QUIET) - if(EIGEN_INCLUDE_PATH) - ocv_include_directories(${EIGEN_INCLUDE_PATH}) - ocv_parse_header("${EIGEN_INCLUDE_PATH}/Eigen/src/Core/util/Macros.h" EIGEN_VERSION_LINES EIGEN_WORLD_VERSION EIGEN_MAJOR_VERSION EIGEN_MINOR_VERSION) - set(HAVE_EIGEN 1) + if(Eigen3_FOUND) + if(TARGET Eigen3::Eigen) + # Use Eigen3 imported target if possible + list(APPEND OPENCV_LINKER_LIBS Eigen3::Eigen) + set(HAVE_EIGEN 1) + else() + if(DEFINED EIGEN3_INCLUDE_DIRS) + set(EIGEN_INCLUDE_PATH ${EIGEN3_INCLUDE_DIRS}) + set(HAVE_EIGEN 1) + elseif(DEFINED EIGEN3_INCLUDE_DIR) + set(EIGEN_INCLUDE_PATH ${EIGEN3_INCLUDE_DIR}) + set(HAVE_EIGEN 1) + endif() + endif() + if(HAVE_EIGEN) + if(DEFINED EIGEN3_WORLD_VERSION) # CMake module + set(EIGEN_WORLD_VERSION ${EIGEN3_WORLD_VERSION}) + set(EIGEN_MAJOR_VERSION ${EIGEN3_MAJOR_VERSION}) + set(EIGEN_MINOR_VERSION ${EIGEN3_MINOR_VERSION}) + else() # Eigen config file + set(EIGEN_WORLD_VERSION ${EIGEN3_VERSION_MAJOR}) + set(EIGEN_MAJOR_VERSION ${EIGEN3_VERSION_MINOR}) + set(EIGEN_MINOR_VERSION ${EIGEN3_VERSION_PATCH}) + endif() + endif() endif() -endif(WITH_EIGEN) + + if(NOT HAVE_EIGEN) + if(NOT EIGEN_INCLUDE_PATH OR NOT EXISTS "${EIGEN_INCLUDE_PATH}") + set(__find_paths "") + set(__find_path_extra_options "") + if(NOT CMAKE_CROSSCOMPILING) + list(APPEND __find_paths /opt) + endif() + if(DEFINED ENV{EIGEN_ROOT}) + set(__find_paths "$ENV{EIGEN_ROOT}/include") + list(APPEND __find_path_extra_options NO_DEFAULT_PATH) + else() + set(__find_paths ENV ProgramFiles ENV ProgramW6432) + endif() + find_path(EIGEN_INCLUDE_PATH "Eigen/Core" + PATHS ${__find_paths} + PATH_SUFFIXES include/eigen3 include/eigen2 Eigen/include/eigen3 Eigen/include/eigen2 + DOC "The path to Eigen3/Eigen2 headers" + ${__find_path_extra_options} + ) + endif() + if(EIGEN_INCLUDE_PATH AND EXISTS "${EIGEN_INCLUDE_PATH}") + ocv_parse_header("${EIGEN_INCLUDE_PATH}/Eigen/src/Core/util/Macros.h" EIGEN_VERSION_LINES EIGEN_WORLD_VERSION EIGEN_MAJOR_VERSION EIGEN_MINOR_VERSION) + set(HAVE_EIGEN 1) + endif() + endif() +endif() +if(HAVE_EIGEN) + if(EIGEN_INCLUDE_PATH AND EXISTS "${EIGEN_INCLUDE_PATH}") + ocv_include_directories(SYSTEM ${EIGEN_INCLUDE_PATH}) + endif() +endif() # --- Clp --- # Ubuntu: sudo apt-get install coinor-libclp-dev coinor-libcoinutils-dev diff --git a/doc/opencv.bib b/doc/opencv.bib index fd1b60dfd1..e861e5b756 100644 --- a/doc/opencv.bib +++ b/doc/opencv.bib @@ -209,7 +209,21 @@ hal_id = {inria-00350283}, hal_version = {v1}, } - +@article{Collins14 + year = {2014}, + issn = {0920-5691}, + journal = {International Journal of Computer Vision}, + volume = {109}, + number = {3}, + doi = {10.1007/s11263-014-0725-5}, + title = {Infinitesimal Plane-Based Pose Estimation}, + url = {http://dx.doi.org/10.1007/s11263-014-0725-5}, + publisher = {Springer US}, + keywords = {Plane; Pose; SfM; PnP; Homography}, + author = {Collins, Toby and Bartoli, Adrien}, + pages = {252-286}, + language = {English} +} @article{Daniilidis98, author = {Konstantinos Daniilidis}, title = {Hand-Eye Calibration Using Dual Quaternions}, diff --git a/doc/py_tutorials/py_ml/py_knn/py_knn_opencv/py_knn_opencv.markdown b/doc/py_tutorials/py_ml/py_knn/py_knn_opencv/py_knn_opencv.markdown index 5fbbff27a3..1ef8443306 100644 --- a/doc/py_tutorials/py_ml/py_knn/py_knn_opencv/py_knn_opencv.markdown +++ b/doc/py_tutorials/py_ml/py_knn/py_knn_opencv/py_knn_opencv.markdown @@ -21,7 +21,6 @@ train_data, and next 250 samples as test_data. So let's prepare them first. @code{.py} import numpy as np import cv2 as cv -from matplotlib import pyplot as plt img = cv.imread('digits.png') gray = cv.cvtColor(img,cv.COLOR_BGR2GRAY) @@ -89,7 +88,6 @@ alphabets directly. @code{.py} import cv2 as cv import numpy as np -import matplotlib.pyplot as plt # Load the data, converters convert the letter to a number data= np.loadtxt('letter-recognition.data', dtype= 'float32', delimiter = ',', diff --git a/modules/calib3d/include/opencv2/calib3d.hpp b/modules/calib3d/include/opencv2/calib3d.hpp index b812d5928e..a70b968e47 100644 --- a/modules/calib3d/include/opencv2/calib3d.hpp +++ b/modules/calib3d/include/opencv2/calib3d.hpp @@ -231,13 +231,25 @@ enum { LMEDS = 4, //!< least-median of squares algorithm RHO = 16 //!< RHO algorithm }; -enum { SOLVEPNP_ITERATIVE = 0, - SOLVEPNP_EPNP = 1, //!< EPnP: Efficient Perspective-n-Point Camera Pose Estimation @cite lepetit2009epnp - SOLVEPNP_P3P = 2, //!< Complete Solution Classification for the Perspective-Three-Point Problem @cite gao2003complete - SOLVEPNP_DLS = 3, //!< A Direct Least-Squares (DLS) Method for PnP @cite hesch2011direct - SOLVEPNP_UPNP = 4, //!< Exhaustive Linearization for Robust Camera Pose and Focal Length Estimation @cite penate2013exhaustive - SOLVEPNP_AP3P = 5, //!< An Efficient Algebraic Solution to the Perspective-Three-Point Problem @cite Ke17 - SOLVEPNP_MAX_COUNT //!< Used for count +enum SolvePnPMethod { + SOLVEPNP_ITERATIVE = 0, + SOLVEPNP_EPNP = 1, //!< EPnP: Efficient Perspective-n-Point Camera Pose Estimation @cite lepetit2009epnp + SOLVEPNP_P3P = 2, //!< Complete Solution Classification for the Perspective-Three-Point Problem @cite gao2003complete + SOLVEPNP_DLS = 3, //!< A Direct Least-Squares (DLS) Method for PnP @cite hesch2011direct + SOLVEPNP_UPNP = 4, //!< Exhaustive Linearization for Robust Camera Pose and Focal Length Estimation @cite penate2013exhaustive + SOLVEPNP_AP3P = 5, //!< An Efficient Algebraic Solution to the Perspective-Three-Point Problem @cite Ke17 + SOLVEPNP_IPPE = 6, //!< Infinitesimal Plane-Based Pose Estimation @cite Collins14 \n + //!< Object points must be coplanar. + SOLVEPNP_IPPE_SQUARE = 7, //!< Infinitesimal Plane-Based Pose Estimation @cite Collins14 \n + //!< This is a special case suitable for marker pose estimation.\n + //!< 4 coplanar object points must be defined in the following order: + //!< - point 0: [-squareLength / 2, squareLength / 2, 0] + //!< - point 1: [ squareLength / 2, squareLength / 2, 0] + //!< - point 2: [ squareLength / 2, -squareLength / 2, 0] + //!< - point 3: [-squareLength / 2, -squareLength / 2, 0] +#ifndef CV_DOXYGEN + SOLVEPNP_MAX_COUNT //!< Used for count +#endif }; enum { CALIB_CB_ADAPTIVE_THRESH = 1, @@ -610,6 +622,17 @@ Check @ref tutorial_homography "the corresponding tutorial" for more details */ /** @brief Finds an object pose from 3D-2D point correspondences. +This function returns the rotation and the translation vectors that transform a 3D point expressed in the object +coordinate frame to the camera coordinate frame, using different methods: +- P3P methods (@ref SOLVEPNP_P3P, @ref SOLVEPNP_AP3P): need 4 input points to return a unique solution. +- @ref SOLVEPNP_IPPE Input points must be >= 4 and object points must be coplanar. +- @ref SOLVEPNP_IPPE_SQUARE Special case suitable for marker pose estimation. +Number of input points must be 4. Object points must be defined in the following order: + - point 0: [-squareLength / 2, squareLength / 2, 0] + - point 1: [ squareLength / 2, squareLength / 2, 0] + - point 2: [ squareLength / 2, -squareLength / 2, 0] + - point 3: [-squareLength / 2, -squareLength / 2, 0] +- for all the other flags, number of input points must be >= 4 and object points can be in any configuration. @param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or 1xN/Nx1 3-channel, where N is the number of points. vector\ can be also passed here. @@ -620,14 +643,14 @@ where N is the number of points. vector\ can be also passed here. \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of 4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are assumed. -@param rvec Output rotation vector (see @ref Rodrigues ) that, together with tvec , brings points from +@param rvec Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from the model coordinate system to the camera coordinate system. @param tvec Output translation vector. @param useExtrinsicGuess Parameter used for #SOLVEPNP_ITERATIVE. If true (1), the function uses the provided rvec and tvec values as initial approximations of the rotation and translation vectors, respectively, and further optimizes them. @param flags Method for solving a PnP problem: -- **SOLVEPNP_ITERATIVE** Iterative method is based on Levenberg-Marquardt optimization. In +- **SOLVEPNP_ITERATIVE** Iterative method is based on a Levenberg-Marquardt optimization. In this case the function finds such a pose that minimizes reprojection error, that is the sum of squared distances between the observed projections imagePoints and the projected (using projectPoints ) objectPoints . @@ -637,18 +660,24 @@ In this case the function requires exactly four object and image points. - **SOLVEPNP_AP3P** Method is based on the paper of T. Ke, S. Roumeliotis "An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17). In this case the function requires exactly four object and image points. -- **SOLVEPNP_EPNP** Method has been introduced by F.Moreno-Noguer, V.Lepetit and P.Fua in the +- **SOLVEPNP_EPNP** Method has been introduced by F. Moreno-Noguer, V. Lepetit and P. Fua in the paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp). -- **SOLVEPNP_DLS** Method is based on the paper of Joel A. Hesch and Stergios I. Roumeliotis. +- **SOLVEPNP_DLS** Method is based on the paper of J. Hesch and S. Roumeliotis. "A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct). -- **SOLVEPNP_UPNP** Method is based on the paper of A.Penate-Sanchez, J.Andrade-Cetto, -F.Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length +- **SOLVEPNP_UPNP** Method is based on the paper of A. Penate-Sanchez, J. Andrade-Cetto, +F. Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$ assuming that both have the same value. Then the cameraMatrix is updated with the estimated focal length. -- **SOLVEPNP_AP3P** Method is based on the paper of Tong Ke and Stergios I. Roumeliotis. -"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17). In this case the -function requires exactly four object and image points. +- **SOLVEPNP_IPPE** Method is based on the paper of T. Collins and A. Bartoli. +"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method requires coplanar object points. +- **SOLVEPNP_IPPE_SQUARE** Method is based on the paper of Toby Collins and Adrien Bartoli. +"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method is suitable for marker pose estimation. +It requires 4 coplanar object points defined in the following order: + - point 0: [-squareLength / 2, squareLength / 2, 0] + - point 1: [ squareLength / 2, squareLength / 2, 0] + - point 2: [ squareLength / 2, -squareLength / 2, 0] + - point 3: [-squareLength / 2, -squareLength / 2, 0] The function estimates the object pose given a set of object points, their corresponding image projections, as well as the camera matrix and the distortion coefficients, see the figure below @@ -704,7 +733,7 @@ using the perspective projection model \f$ \Pi \f$ and the camera intrinsic para \end{align*} \f] -The estimated pose is thus the rotation (`rvec`) and the translation (`tvec`) vectors that allow to transform +The estimated pose is thus the rotation (`rvec`) and the translation (`tvec`) vectors that allow transforming a 3D point expressed in the world frame into the camera frame: \f[ @@ -765,6 +794,13 @@ a 3D point expressed in the world frame into the camera frame: - With **SOLVEPNP_ITERATIVE** method and `useExtrinsicGuess=true`, the minimum number of points is 3 (3 points are sufficient to compute a pose but there are up to 4 solutions). The initial solution should be close to the global solution to converge. + - With **SOLVEPNP_IPPE** input points must be >= 4 and object points must be coplanar. + - With **SOLVEPNP_IPPE_SQUARE** this is a special case suitable for marker pose estimation. + Number of input points must be 4. Object points must be defined in the following order: + - point 0: [-squareLength / 2, squareLength / 2, 0] + - point 1: [ squareLength / 2, squareLength / 2, 0] + - point 2: [ squareLength / 2, -squareLength / 2, 0] + - point 3: [-squareLength / 2, -squareLength / 2, 0] */ CV_EXPORTS_W bool solvePnP( InputArray objectPoints, InputArray imagePoints, InputArray cameraMatrix, InputArray distCoeffs, @@ -782,10 +818,10 @@ where N is the number of points. vector\ can be also passed here. \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of 4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are assumed. -@param rvec Output rotation vector (see Rodrigues ) that, together with tvec , brings points from +@param rvec Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from the model coordinate system to the camera coordinate system. @param tvec Output translation vector. -@param useExtrinsicGuess Parameter used for SOLVEPNP_ITERATIVE. If true (1), the function uses +@param useExtrinsicGuess Parameter used for @ref SOLVEPNP_ITERATIVE. If true (1), the function uses the provided rvec and tvec values as initial approximations of the rotation and translation vectors, respectively, and further optimizes them. @param iterationsCount Number of iterations. @@ -794,12 +830,12 @@ is the maximum allowed distance between the observed and computed point projecti an inlier. @param confidence The probability that the algorithm produces a useful result. @param inliers Output vector that contains indices of inliers in objectPoints and imagePoints . -@param flags Method for solving a PnP problem (see solvePnP ). +@param flags Method for solving a PnP problem (see @ref solvePnP ). The function estimates an object pose given a set of object points, their corresponding image projections, as well as the camera matrix and the distortion coefficients. This function finds such a pose that minimizes reprojection error, that is, the sum of squared distances between the observed -projections imagePoints and the projected (using projectPoints ) objectPoints. The use of RANSAC +projections imagePoints and the projected (using @ref projectPoints ) objectPoints. The use of RANSAC makes the function resistant to outliers. @note @@ -819,6 +855,7 @@ CV_EXPORTS_W bool solvePnPRansac( InputArray objectPoints, InputArray imagePoint bool useExtrinsicGuess = false, int iterationsCount = 100, float reprojectionError = 8.0, double confidence = 0.99, OutputArray inliers = noArray(), int flags = SOLVEPNP_ITERATIVE ); + /** @brief Finds an object pose from 3 3D-2D point correspondences. @param objectPoints Array of object points in the object coordinate space, 3x3 1-channel or @@ -830,17 +867,20 @@ CV_EXPORTS_W bool solvePnPRansac( InputArray objectPoints, InputArray imagePoint \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of 4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are assumed. -@param rvecs Output rotation vectors (see Rodrigues ) that, together with tvecs , brings points from +@param rvecs Output rotation vectors (see @ref Rodrigues ) that, together with tvecs, brings points from the model coordinate system to the camera coordinate system. A P3P problem has up to 4 solutions. @param tvecs Output translation vectors. @param flags Method for solving a P3P problem: - **SOLVEPNP_P3P** Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang "Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete). -- **SOLVEPNP_AP3P** Method is based on the paper of Tong Ke and Stergios I. Roumeliotis. +- **SOLVEPNP_AP3P** Method is based on the paper of T. Ke and S. Roumeliotis. "An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17). The function estimates the object pose given 3 object points, their corresponding image projections, as well as the camera matrix and the distortion coefficients. + +@note +The solutions are sorted by reprojection errors (lowest to highest). */ CV_EXPORTS_W int solveP3P( InputArray objectPoints, InputArray imagePoints, InputArray cameraMatrix, InputArray distCoeffs, @@ -859,7 +899,7 @@ where N is the number of points. vector\ can also be passed here. \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of 4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are assumed. -@param rvec Input/Output rotation vector (see @ref Rodrigues ) that, together with tvec , brings points from +@param rvec Input/Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from the model coordinate system to the camera coordinate system. Input values are used as an initial solution. @param tvec Input/Output translation vector. Input values are used as an initial solution. @param criteria Criteria when to stop the Levenberg-Marquard iterative algorithm. @@ -887,12 +927,12 @@ where N is the number of points. vector\ can also be passed here. \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of 4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are assumed. -@param rvec Input/Output rotation vector (see @ref Rodrigues ) that, together with tvec , brings points from +@param rvec Input/Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from the model coordinate system to the camera coordinate system. Input values are used as an initial solution. @param tvec Input/Output translation vector. Input values are used as an initial solution. @param criteria Criteria when to stop the Levenberg-Marquard iterative algorithm. @param VVSlambda Gain for the virtual visual servoing control law, equivalent to the \f$\alpha\f$ -gain in the Gauss-Newton formulation. +gain in the Damped Gauss-Newton formulation. The function refines the object pose given at least 3 object points, their corresponding image projections, an initial solution for the rotation and translation vector, @@ -906,6 +946,202 @@ CV_EXPORTS_W void solvePnPRefineVVS( InputArray objectPoints, InputArray imagePo TermCriteria criteria = TermCriteria(TermCriteria::EPS + TermCriteria::COUNT, 20, FLT_EPSILON), double VVSlambda = 1); +/** @brief Finds an object pose from 3D-2D point correspondences. +This function returns a list of all the possible solutions (a solution is a +couple), depending on the number of input points and the chosen method: +- P3P methods (@ref SOLVEPNP_P3P, @ref SOLVEPNP_AP3P): 3 or 4 input points. Number of returned solutions can be between 0 and 4 with 3 input points. +- @ref SOLVEPNP_IPPE Input points must be >= 4 and object points must be coplanar. Returns 2 solutions. +- @ref SOLVEPNP_IPPE_SQUARE Special case suitable for marker pose estimation. +Number of input points must be 4 and 2 solutions are returned. Object points must be defined in the following order: + - point 0: [-squareLength / 2, squareLength / 2, 0] + - point 1: [ squareLength / 2, squareLength / 2, 0] + - point 2: [ squareLength / 2, -squareLength / 2, 0] + - point 3: [-squareLength / 2, -squareLength / 2, 0] +- for all the other flags, number of input points must be >= 4 and object points can be in any configuration. +Only 1 solution is returned. + +@param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or +1xN/Nx1 3-channel, where N is the number of points. vector\ can be also passed here. +@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel, +where N is the number of points. vector\ can be also passed here. +@param cameraMatrix Input camera matrix \f$A = \vecthreethree{fx}{0}{cx}{0}{fy}{cy}{0}{0}{1}\f$ . +@param distCoeffs Input vector of distortion coefficients +\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of +4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are +assumed. +@param rvecs Vector of output rotation vectors (see @ref Rodrigues ) that, together with tvecs, brings points from +the model coordinate system to the camera coordinate system. +@param tvecs Vector of output translation vectors. +@param useExtrinsicGuess Parameter used for #SOLVEPNP_ITERATIVE. If true (1), the function uses +the provided rvec and tvec values as initial approximations of the rotation and translation +vectors, respectively, and further optimizes them. +@param flags Method for solving a PnP problem: +- **SOLVEPNP_ITERATIVE** Iterative method is based on a Levenberg-Marquardt optimization. In +this case the function finds such a pose that minimizes reprojection error, that is the sum +of squared distances between the observed projections imagePoints and the projected (using +projectPoints ) objectPoints . +- **SOLVEPNP_P3P** Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang +"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete). +In this case the function requires exactly four object and image points. +- **SOLVEPNP_AP3P** Method is based on the paper of T. Ke, S. Roumeliotis +"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17). +In this case the function requires exactly four object and image points. +- **SOLVEPNP_EPNP** Method has been introduced by F.Moreno-Noguer, V.Lepetit and P.Fua in the +paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp). +- **SOLVEPNP_DLS** Method is based on the paper of Joel A. Hesch and Stergios I. Roumeliotis. +"A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct). +- **SOLVEPNP_UPNP** Method is based on the paper of A.Penate-Sanchez, J.Andrade-Cetto, +F.Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length +Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$ +assuming that both have the same value. Then the cameraMatrix is updated with the estimated +focal length. +- **SOLVEPNP_IPPE** Method is based on the paper of T. Collins and A. Bartoli. +"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method requires coplanar object points. +- **SOLVEPNP_IPPE_SQUARE** Method is based on the paper of Toby Collins and Adrien Bartoli. +"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method is suitable for marker pose estimation. +It requires 4 coplanar object points defined in the following order: + - point 0: [-squareLength / 2, squareLength / 2, 0] + - point 1: [ squareLength / 2, squareLength / 2, 0] + - point 2: [ squareLength / 2, -squareLength / 2, 0] + - point 3: [-squareLength / 2, -squareLength / 2, 0] +@param rvec Rotation vector used to initialize an iterative PnP refinement algorithm, when flag is SOLVEPNP_ITERATIVE +and useExtrinsicGuess is set to true. +@param tvec Translation vector used to initialize an iterative PnP refinement algorithm, when flag is SOLVEPNP_ITERATIVE +and useExtrinsicGuess is set to true. +@param reprojectionError Optional vector of reprojection error, that is the RMS error +(\f$ \text{RMSE} = \sqrt{\frac{\sum_{i}^{N} \left ( \hat{y_i} - y_i \right )^2}{N}} \f$) between the input image points +and the 3D object points projected with the estimated pose. + +The function estimates the object pose given a set of object points, their corresponding image +projections, as well as the camera matrix and the distortion coefficients, see the figure below +(more precisely, the X-axis of the camera frame is pointing to the right, the Y-axis downward +and the Z-axis forward). + +![](pnp.jpg) + +Points expressed in the world frame \f$ \bf{X}_w \f$ are projected into the image plane \f$ \left[ u, v \right] \f$ +using the perspective projection model \f$ \Pi \f$ and the camera intrinsic parameters matrix \f$ \bf{A} \f$: + +\f[ + \begin{align*} + \begin{bmatrix} + u \\ + v \\ + 1 + \end{bmatrix} &= + \bf{A} \hspace{0.1em} \Pi \hspace{0.2em} ^{c}\bf{M}_w + \begin{bmatrix} + X_{w} \\ + Y_{w} \\ + Z_{w} \\ + 1 + \end{bmatrix} \\ + \begin{bmatrix} + u \\ + v \\ + 1 + \end{bmatrix} &= + \begin{bmatrix} + f_x & 0 & c_x \\ + 0 & f_y & c_y \\ + 0 & 0 & 1 + \end{bmatrix} + \begin{bmatrix} + 1 & 0 & 0 & 0 \\ + 0 & 1 & 0 & 0 \\ + 0 & 0 & 1 & 0 + \end{bmatrix} + \begin{bmatrix} + r_{11} & r_{12} & r_{13} & t_x \\ + r_{21} & r_{22} & r_{23} & t_y \\ + r_{31} & r_{32} & r_{33} & t_z \\ + 0 & 0 & 0 & 1 + \end{bmatrix} + \begin{bmatrix} + X_{w} \\ + Y_{w} \\ + Z_{w} \\ + 1 + \end{bmatrix} + \end{align*} +\f] + +The estimated pose is thus the rotation (`rvec`) and the translation (`tvec`) vectors that allow transforming +a 3D point expressed in the world frame into the camera frame: + +\f[ + \begin{align*} + \begin{bmatrix} + X_c \\ + Y_c \\ + Z_c \\ + 1 + \end{bmatrix} &= + \hspace{0.2em} ^{c}\bf{M}_w + \begin{bmatrix} + X_{w} \\ + Y_{w} \\ + Z_{w} \\ + 1 + \end{bmatrix} \\ + \begin{bmatrix} + X_c \\ + Y_c \\ + Z_c \\ + 1 + \end{bmatrix} &= + \begin{bmatrix} + r_{11} & r_{12} & r_{13} & t_x \\ + r_{21} & r_{22} & r_{23} & t_y \\ + r_{31} & r_{32} & r_{33} & t_z \\ + 0 & 0 & 0 & 1 + \end{bmatrix} + \begin{bmatrix} + X_{w} \\ + Y_{w} \\ + Z_{w} \\ + 1 + \end{bmatrix} + \end{align*} +\f] + +@note + - An example of how to use solvePnP for planar augmented reality can be found at + opencv_source_code/samples/python/plane_ar.py + - If you are using Python: + - Numpy array slices won't work as input because solvePnP requires contiguous + arrays (enforced by the assertion using cv::Mat::checkVector() around line 55 of + modules/calib3d/src/solvepnp.cpp version 2.4.9) + - The P3P algorithm requires image points to be in an array of shape (N,1,2) due + to its calling of cv::undistortPoints (around line 75 of modules/calib3d/src/solvepnp.cpp version 2.4.9) + which requires 2-channel information. + - Thus, given some data D = np.array(...) where D.shape = (N,M), in order to use a subset of + it as, e.g., imagePoints, one must effectively copy it into a new array: imagePoints = + np.ascontiguousarray(D[:,:2]).reshape((N,1,2)) + - The methods **SOLVEPNP_DLS** and **SOLVEPNP_UPNP** cannot be used as the current implementations are + unstable and sometimes give completely wrong results. If you pass one of these two + flags, **SOLVEPNP_EPNP** method will be used instead. + - The minimum number of points is 4 in the general case. In the case of **SOLVEPNP_P3P** and **SOLVEPNP_AP3P** + methods, it is required to use exactly 4 points (the first 3 points are used to estimate all the solutions + of the P3P problem, the last one is used to retain the best solution that minimizes the reprojection error). + - With **SOLVEPNP_ITERATIVE** method and `useExtrinsicGuess=true`, the minimum number of points is 3 (3 points + are sufficient to compute a pose but there are up to 4 solutions). The initial solution should be close to the + global solution to converge. + - With **SOLVEPNP_IPPE** input points must be >= 4 and object points must be coplanar. + - With **SOLVEPNP_IPPE_SQUARE** this is a special case suitable for marker pose estimation. + Number of input points must be 4. Object points must be defined in the following order: + - point 0: [-squareLength / 2, squareLength / 2, 0] + - point 1: [ squareLength / 2, squareLength / 2, 0] + - point 2: [ squareLength / 2, -squareLength / 2, 0] + - point 3: [-squareLength / 2, -squareLength / 2, 0] + */ +CV_EXPORTS_W int solvePnPGeneric( InputArray objectPoints, InputArray imagePoints, + InputArray cameraMatrix, InputArray distCoeffs, + OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs, + bool useExtrinsicGuess = false, SolvePnPMethod flags = SOLVEPNP_ITERATIVE, + InputArray rvec = noArray(), InputArray tvec = noArray(), + OutputArray reprojectionError = noArray() ); + /** @brief Finds an initial camera matrix from 3D-2D point correspondences. @param objectPoints Vector of vectors of the calibration pattern points in the calibration pattern @@ -1041,7 +1277,7 @@ CV_EXPORTS_W void drawChessboardCorners( InputOutputArray image, Size patternSiz @param distCoeffs Input vector of distortion coefficients \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of 4, 5, 8, 12 or 14 elements. If the vector is empty, the zero distortion coefficients are assumed. -@param rvec Rotation vector (see @ref Rodrigues ) that, together with tvec , brings points from +@param rvec Rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from the model coordinate system to the camera coordinate system. @param tvec Translation vector. @param length Length of the painted axes in the same unit than tvec (usually in meters). diff --git a/modules/calib3d/src/ap3p.cpp b/modules/calib3d/src/ap3p.cpp index 7b86834db8..11171f81a6 100644 --- a/modules/calib3d/src/ap3p.cpp +++ b/modules/calib3d/src/ap3p.cpp @@ -1,3 +1,4 @@ +#include "precomp.hpp" #include "ap3p.h" #include @@ -154,10 +155,11 @@ ap3p::ap3p(double _fx, double _fy, double _cx, double _cy) { // worldPoints: The positions of the 3 feature points stored as column vectors // solutionsR: 4 possible solutions of rotation matrix of the world w.r.t the camera frame // solutionsT: 4 possible solutions of translation of the world origin w.r.t the camera frame -int ap3p::computePoses(const double featureVectors[3][3], - const double worldPoints[3][3], +int ap3p::computePoses(const double featureVectors[3][4], + const double worldPoints[3][4], double solutionsR[4][3][3], - double solutionsT[4][3]) { + double solutionsT[4][3], + bool p4p) { //world point vectors double w1[3] = {worldPoints[0][0], worldPoints[1][0], worldPoints[2][0]}; @@ -246,6 +248,13 @@ int ap3p::computePoses(const double featureVectors[3][3], double b3p[3]; vect_scale((delta / k3b3), b3, b3p); + double X3 = worldPoints[0][3]; + double Y3 = worldPoints[1][3]; + double Z3 = worldPoints[2][3]; + double mu3 = featureVectors[0][3]; + double mv3 = featureVectors[1][3]; + double reproj_errors[4]; + int nb_solutions = 0; for (int i = 0; i < 4; ++i) { double ctheta1p = s[i]; @@ -290,9 +299,29 @@ int ap3p::computePoses(const double featureVectors[3][3], solutionsR[nb_solutions][1][2] = R[2][1]; solutionsR[nb_solutions][2][2] = R[2][2]; + if (p4p) { + double X3p = solutionsR[nb_solutions][0][0] * X3 + solutionsR[nb_solutions][0][1] * Y3 + solutionsR[nb_solutions][0][2] * Z3 + solutionsT[nb_solutions][0]; + double Y3p = solutionsR[nb_solutions][1][0] * X3 + solutionsR[nb_solutions][1][1] * Y3 + solutionsR[nb_solutions][1][2] * Z3 + solutionsT[nb_solutions][1]; + double Z3p = solutionsR[nb_solutions][2][0] * X3 + solutionsR[nb_solutions][2][1] * Y3 + solutionsR[nb_solutions][2][2] * Z3 + solutionsT[nb_solutions][2]; + double mu3p = X3p / Z3p; + double mv3p = Y3p / Z3p; + reproj_errors[nb_solutions] = (mu3p - mu3) * (mu3p - mu3) + (mv3p - mv3) * (mv3p - mv3); + } + nb_solutions++; } + //sort the solutions + if (p4p) { + for (int i = 1; i < nb_solutions; i++) { + for (int j = i; j > 0 && reproj_errors[j-1] > reproj_errors[j]; j--) { + std::swap(reproj_errors[j], reproj_errors[j-1]); + std::swap(solutionsR[j], solutionsR[j-1]); + std::swap(solutionsT[j], solutionsT[j-1]); + } + } + } + return nb_solutions; } @@ -311,9 +340,10 @@ bool ap3p::solve(cv::Mat &R, cv::Mat &tvec, const cv::Mat &opoints, const cv::Ma else extract_points(opoints, ipoints, points); - bool result = solve(rotation_matrix, translation, points[0], points[1], points[2], points[3], points[4], points[5], - points[6], points[7], points[8], points[9], points[10], points[11], points[12], points[13], - points[14], + bool result = solve(rotation_matrix, translation, + points[0], points[1], points[2], points[3], points[4], + points[5], points[6], points[7], points[8], points[9], + points[10], points[11], points[12], points[13],points[14], points[15], points[16], points[17], points[18], points[19]); cv::Mat(3, 1, CV_64F, translation).copyTo(tvec); cv::Mat(3, 3, CV_64F, rotation_matrix).copyTo(R); @@ -335,10 +365,13 @@ int ap3p::solve(std::vector &Rs, std::vector &tvecs, const cv: else extract_points(opoints, ipoints, points); + const bool p4p = std::max(opoints.checkVector(3, CV_32F), opoints.checkVector(3, CV_64F)) == 4; int solutions = solve(rotation_matrix, translation, points[0], points[1], points[2], points[3], points[4], points[5], points[6], points[7], points[8], points[9], - points[10], points[11], points[12], points[13], points[14]); + points[10], points[11], points[12], points[13], points[14], + points[15], points[16], points[17], points[18], points[19], + p4p); for (int i = 0; i < solutions; i++) { cv::Mat R, tvec; @@ -353,42 +386,33 @@ int ap3p::solve(std::vector &Rs, std::vector &tvecs, const cv: } bool -ap3p::solve(double R[3][3], double t[3], double mu0, double mv0, double X0, double Y0, double Z0, double mu1, - double mv1, - double X1, double Y1, double Z1, double mu2, double mv2, double X2, double Y2, double Z2, double mu3, - double mv3, double X3, double Y3, double Z3) { +ap3p::solve(double R[3][3], double t[3], + double mu0, double mv0, double X0, double Y0, double Z0, + double mu1, double mv1, double X1, double Y1, double Z1, + double mu2, double mv2, double X2, double Y2, double Z2, + double mu3, double mv3, double X3, double Y3, double Z3) { double Rs[4][3][3], ts[4][3]; - int n = solve(Rs, ts, mu0, mv0, X0, Y0, Z0, mu1, mv1, X1, Y1, Z1, mu2, mv2, X2, Y2, Z2); + const bool p4p = true; + int n = solve(Rs, ts, mu0, mv0, X0, Y0, Z0, mu1, mv1, X1, Y1, Z1, mu2, mv2, X2, Y2, Z2, mu3, mv3, X3, Y3, Z3, p4p); if (n == 0) return false; - int ns = 0; - double min_reproj = 0; - for (int i = 0; i < n; i++) { - double X3p = Rs[i][0][0] * X3 + Rs[i][0][1] * Y3 + Rs[i][0][2] * Z3 + ts[i][0]; - double Y3p = Rs[i][1][0] * X3 + Rs[i][1][1] * Y3 + Rs[i][1][2] * Z3 + ts[i][1]; - double Z3p = Rs[i][2][0] * X3 + Rs[i][2][1] * Y3 + Rs[i][2][2] * Z3 + ts[i][2]; - double mu3p = cx + fx * X3p / Z3p; - double mv3p = cy + fy * Y3p / Z3p; - double reproj = (mu3p - mu3) * (mu3p - mu3) + (mv3p - mv3) * (mv3p - mv3); - if (i == 0 || min_reproj > reproj) { - ns = i; - min_reproj = reproj; - } - } - for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) - R[i][j] = Rs[ns][i][j]; - t[i] = ts[ns][i]; + R[i][j] = Rs[0][i][j]; + t[i] = ts[0][i]; } return true; } -int ap3p::solve(double R[4][3][3], double t[4][3], double mu0, double mv0, double X0, double Y0, double Z0, double mu1, - double mv1, double X1, double Y1, double Z1, double mu2, double mv2, double X2, double Y2, double Z2) { +int ap3p::solve(double R[4][3][3], double t[4][3], + double mu0, double mv0, double X0, double Y0, double Z0, + double mu1, double mv1, double X1, double Y1, double Z1, + double mu2, double mv2, double X2, double Y2, double Z2, + double mu3, double mv3, double X3, double Y3, double Z3, + bool p4p) { double mk0, mk1, mk2; double norm; @@ -413,13 +437,17 @@ int ap3p::solve(double R[4][3][3], double t[4][3], double mu0, double mv0, doubl mu2 *= mk2; mv2 *= mk2; - double featureVectors[3][3] = {{mu0, mu1, mu2}, - {mv0, mv1, mv2}, - {mk0, mk1, mk2}}; - double worldPoints[3][3] = {{X0, X1, X2}, - {Y0, Y1, Y2}, - {Z0, Z1, Z2}}; + mu3 = inv_fx * mu3 - cx_fx; + mv3 = inv_fy * mv3 - cy_fy; + double mk3 = 1; //not used - return computePoses(featureVectors, worldPoints, R, t); + double featureVectors[3][4] = {{mu0, mu1, mu2, mu3}, + {mv0, mv1, mv2, mv3}, + {mk0, mk1, mk2, mk3}}; + double worldPoints[3][4] = {{X0, X1, X2, X3}, + {Y0, Y1, Y2, Y3}, + {Z0, Z1, Z2, Z3}}; + + return computePoses(featureVectors, worldPoints, R, t, p4p); } } diff --git a/modules/calib3d/src/ap3p.h b/modules/calib3d/src/ap3p.h index df44198115..c044c6fd32 100644 --- a/modules/calib3d/src/ap3p.h +++ b/modules/calib3d/src/ap3p.h @@ -1,7 +1,7 @@ #ifndef P3P_P3P_H #define P3P_P3P_H -#include "precomp.hpp" +#include namespace cv { class ap3p { @@ -18,7 +18,7 @@ private: void extract_points(const cv::Mat &opoints, const cv::Mat &ipoints, std::vector &points) { points.clear(); int npoints = std::max(opoints.checkVector(3, CV_32F), opoints.checkVector(3, CV_64F)); - points.resize(5*npoints); + points.resize(5*4); //resize vector to fit for p4p case for (int i = 0; i < npoints; i++) { points[i * 5] = ipoints.at(i).x * fx + cx; points[i * 5 + 1] = ipoints.at(i).y * fy + cy; @@ -26,6 +26,12 @@ private: points[i * 5 + 3] = opoints.at(i).y; points[i * 5 + 4] = opoints.at(i).z; } + //Fill vectors with unused values for p3p case + for (int i = npoints; i < 4; i++) { + for (int j = 0; j < 5; j++) { + points[i * 5 + j] = 0; + } + } } void init_inverse_parameters(); @@ -45,7 +51,9 @@ public: int solve(double R[4][3][3], double t[4][3], double mu0, double mv0, double X0, double Y0, double Z0, double mu1, double mv1, double X1, double Y1, double Z1, - double mu2, double mv2, double X2, double Y2, double Z2); + double mu2, double mv2, double X2, double Y2, double Z2, + double mu3, double mv3, double X3, double Y3, double Z3, + bool p4p); bool solve(double R[3][3], double t[3], double mu0, double mv0, double X0, double Y0, double Z0, @@ -59,8 +67,8 @@ public: // worldPoints: Positions of the 3 feature points stored as column vectors // solutionsR: 4 possible solutions of rotation matrix of the world w.r.t the camera frame // solutionsT: 4 possible solutions of translation of the world origin w.r.t the camera frame - int computePoses(const double featureVectors[3][3], const double worldPoints[3][3], double solutionsR[4][3][3], - double solutionsT[4][3]); + int computePoses(const double featureVectors[3][4], const double worldPoints[3][4], double solutionsR[4][3][3], + double solutionsT[4][3], bool p4p); }; } diff --git a/modules/calib3d/src/ippe.cpp b/modules/calib3d/src/ippe.cpp new file mode 100644 index 0000000000..74a2864525 --- /dev/null +++ b/modules/calib3d/src/ippe.cpp @@ -0,0 +1,1100 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +// This file is based on file issued with the following license: + +/*============================================================================ + +Copyright 2017 Toby Collins +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "precomp.hpp" +#include "ippe.hpp" + +namespace cv { +namespace IPPE { +PoseSolver::PoseSolver() : IPPE_SMALL(1e-3) +{ +} + +void PoseSolver::solveGeneric(InputArray _objectPoints, InputArray _imagePoints, OutputArray _rvec1, OutputArray _tvec1, + float& err1, OutputArray _rvec2, OutputArray _tvec2, float& err2) +{ + Mat normalizedImagePoints; + if (_imagePoints.getMat().type() == CV_32FC2) + { + _imagePoints.getMat().convertTo(normalizedImagePoints, CV_64F); + } + else + { + normalizedImagePoints = _imagePoints.getMat(); + } + + //solve: + Mat Ma, Mb; + solveGeneric(_objectPoints, normalizedImagePoints, Ma, Mb); + + //the two poses computed by IPPE (sorted): + Mat M1, M2; + + //sort poses by reprojection error: + sortPosesByReprojError(_objectPoints, normalizedImagePoints, Ma, Mb, M1, M2, err1, err2); + + //fill outputs + rot2vec(M1.colRange(0, 3).rowRange(0, 3), _rvec1); + rot2vec(M2.colRange(0, 3).rowRange(0, 3), _rvec2); + + M1.colRange(3, 4).rowRange(0, 3).copyTo(_tvec1); + M2.colRange(3, 4).rowRange(0, 3).copyTo(_tvec2); +} + +void PoseSolver::solveGeneric(InputArray _objectPoints, InputArray _normalizedInputPoints, + OutputArray _Ma, OutputArray _Mb) +{ + //argument checking: + size_t n = static_cast(_objectPoints.rows() * _objectPoints.cols()); //number of points + int objType = _objectPoints.type(); + int type_input = _normalizedInputPoints.type(); + + CV_CheckType(objType, objType == CV_32FC3 || objType == CV_64FC3, + "Type of _objectPoints must be CV_32FC3 or CV_64FC3" ); + CV_CheckType(type_input, type_input == CV_32FC2 || type_input == CV_64FC2, + "Type of _normalizedInputPoints must be CV_32FC3 or CV_64FC3" ); + CV_Assert(_objectPoints.rows() == 1 || _objectPoints.cols() == 1); + CV_Assert(_objectPoints.rows() >= 4 || _objectPoints.cols() >= 4); + CV_Assert(_normalizedInputPoints.rows() == 1 || _normalizedInputPoints.cols() == 1); + CV_Assert(static_cast(_objectPoints.rows() * _objectPoints.cols()) == n); + + Mat normalizedInputPoints; + if (type_input == CV_32FC2) + { + _normalizedInputPoints.getMat().convertTo(normalizedInputPoints, CV_64F); + } + else + { + normalizedInputPoints = _normalizedInputPoints.getMat(); + } + + Mat objectInputPoints; + if (type_input == CV_32FC3) + { + _objectPoints.getMat().convertTo(objectInputPoints, CV_64F); + } + else + { + objectInputPoints = _objectPoints.getMat(); + } + + Mat canonicalObjPoints; + Mat MmodelPoints2Canonical; + + //transform object points to the canonical position (zero centred and on the plane z=0): + makeCanonicalObjectPoints(objectInputPoints, canonicalObjPoints, MmodelPoints2Canonical); + + //compute the homography mapping the model's points to normalizedInputPoints + Matx33d H; + HomographyHO::homographyHO(canonicalObjPoints, _normalizedInputPoints, H); + + //now solve + Mat MaCanon, MbCanon; + solveCanonicalForm(canonicalObjPoints, normalizedInputPoints, H, MaCanon, MbCanon); + + //transform computed poses to account for canonical transform: + Mat Ma = MaCanon * MmodelPoints2Canonical; + Mat Mb = MbCanon * MmodelPoints2Canonical; + + //output poses: + Ma.copyTo(_Ma); + Mb.copyTo(_Mb); +} + +void PoseSolver::solveCanonicalForm(InputArray _canonicalObjPoints, InputArray _normalizedInputPoints, const Matx33d& H, + OutputArray _Ma, OutputArray _Mb) +{ + _Ma.create(4, 4, CV_64FC1); + _Mb.create(4, 4, CV_64FC1); + + Mat Ma = _Ma.getMat(); + Mat Mb = _Mb.getMat(); + + //initialise poses: + Ma.setTo(0); + Ma.at(3, 3) = 1; + Mb.setTo(0); + Mb.at(3, 3) = 1; + + //Compute the Jacobian J of the homography at (0,0): + double j00 = H(0, 0) - H(2, 0) * H(0, 2); + double j01 = H(0, 1) - H(2, 1) * H(0, 2); + double j10 = H(1, 0) - H(2, 0) * H(1, 2); + double j11 = H(1, 1) - H(2, 1) * H(1, 2); + + //Compute the transformation of (0,0) into the image: + double v0 = H(0, 2); + double v1 = H(1, 2); + + //compute the two rotation solutions: + Mat Ra = Ma.colRange(0, 3).rowRange(0, 3); + Mat Rb = Mb.colRange(0, 3).rowRange(0, 3); + computeRotations(j00, j01, j10, j11, v0, v1, Ra, Rb); + + //for each rotation solution, compute the corresponding translation solution: + Mat ta = Ma.colRange(3, 4).rowRange(0, 3); + Mat tb = Mb.colRange(3, 4).rowRange(0, 3); + computeTranslation(_canonicalObjPoints, _normalizedInputPoints, Ra, ta); + computeTranslation(_canonicalObjPoints, _normalizedInputPoints, Rb, tb); +} + +void PoseSolver::solveSquare(InputArray _objectPoints, InputArray _imagePoints, OutputArray _rvec1, OutputArray _tvec1, + float& err1, OutputArray _rvec2, OutputArray _tvec2, float& err2) +{ + //allocate outputs: + _rvec1.create(3, 1, CV_64FC1); + _tvec1.create(3, 1, CV_64FC1); + _rvec2.create(3, 1, CV_64FC1); + _tvec2.create(3, 1, CV_64FC1); + + Mat objectPoints2D; + + //generate the object points: + objectPoints2D.create(1, 4, CV_64FC2); + Mat objectPoints = _objectPoints.getMat(); + double squareLength; + if (objectPoints.depth() == CV_32F) + { + objectPoints2D.ptr(0)[0] = Vec2d(objectPoints.ptr(0)[0](0), objectPoints.ptr(0)[0](1)); + objectPoints2D.ptr(0)[1] = Vec2d(objectPoints.ptr(0)[1](0), objectPoints.ptr(0)[1](1)); + objectPoints2D.ptr(0)[2] = Vec2d(objectPoints.ptr(0)[2](0), objectPoints.ptr(0)[2](1)); + objectPoints2D.ptr(0)[3] = Vec2d(objectPoints.ptr(0)[3](0), objectPoints.ptr(0)[3](1)); + + squareLength = sqrt( (objectPoints.ptr(0)[1](0) - objectPoints.ptr(0)[0](0))* + (objectPoints.ptr(0)[1](0) - objectPoints.ptr(0)[0](0)) + + (objectPoints.ptr(0)[1](1) - objectPoints.ptr(0)[0](1))* + (objectPoints.ptr(0)[1](1) - objectPoints.ptr(0)[0](1)) ); + } + else + { + objectPoints2D.ptr(0)[0] = Vec2d(objectPoints.ptr(0)[0](0), objectPoints.ptr(0)[0](1)); + objectPoints2D.ptr(0)[1] = Vec2d(objectPoints.ptr(0)[1](0), objectPoints.ptr(0)[1](1)); + objectPoints2D.ptr(0)[2] = Vec2d(objectPoints.ptr(0)[2](0), objectPoints.ptr(0)[2](1)); + objectPoints2D.ptr(0)[3] = Vec2d(objectPoints.ptr(0)[3](0), objectPoints.ptr(0)[3](1)); + + squareLength = sqrt( (objectPoints.ptr(0)[1](0) - objectPoints.ptr(0)[0](0))* + (objectPoints.ptr(0)[1](0) - objectPoints.ptr(0)[0](0)) + + (objectPoints.ptr(0)[1](1) - objectPoints.ptr(0)[0](1))* + (objectPoints.ptr(0)[1](1) - objectPoints.ptr(0)[0](1)) ); + } + + Mat H; //homography from canonical object points to normalized pixels + + Mat normalizedInputPoints; + if (_imagePoints.getMat().type() == CV_32FC2) + { + _imagePoints.getMat().convertTo(normalizedInputPoints, CV_64F); + } + else + { + normalizedInputPoints = _imagePoints.getMat(); + } + + //compute H + homographyFromSquarePoints(normalizedInputPoints, squareLength / 2.0, H); + + //now solve + Mat Ma, Mb; + solveCanonicalForm(objectPoints2D, normalizedInputPoints, H, Ma, Mb); + + //sort poses according to reprojection error: + Mat M1, M2; + sortPosesByReprojError(_objectPoints, normalizedInputPoints, Ma, Mb, M1, M2, err1, err2); + + //fill outputs + rot2vec(M1.colRange(0, 3).rowRange(0, 3), _rvec1); + rot2vec(M2.colRange(0, 3).rowRange(0, 3), _rvec2); + + M1.colRange(3, 4).rowRange(0, 3).copyTo(_tvec1); + M2.colRange(3, 4).rowRange(0, 3).copyTo(_tvec2); +} + +void PoseSolver::generateSquareObjectCorners3D(double squareLength, OutputArray _objectPoints) +{ + _objectPoints.create(1, 4, CV_64FC3); + Mat objectPoints = _objectPoints.getMat(); + objectPoints.ptr(0)[0] = Vec3d(-squareLength / 2.0, squareLength / 2.0, 0.0); + objectPoints.ptr(0)[1] = Vec3d(squareLength / 2.0, squareLength / 2.0, 0.0); + objectPoints.ptr(0)[2] = Vec3d(squareLength / 2.0, -squareLength / 2.0, 0.0); + objectPoints.ptr(0)[3] = Vec3d(-squareLength / 2.0, -squareLength / 2.0, 0.0); +} + +void PoseSolver::generateSquareObjectCorners2D(double squareLength, OutputArray _objectPoints) +{ + _objectPoints.create(1, 4, CV_64FC2); + Mat objectPoints = _objectPoints.getMat(); + objectPoints.ptr(0)[0] = Vec2d(-squareLength / 2.0, squareLength / 2.0); + objectPoints.ptr(0)[1] = Vec2d(squareLength / 2.0, squareLength / 2.0); + objectPoints.ptr(0)[2] = Vec2d(squareLength / 2.0, -squareLength / 2.0); + objectPoints.ptr(0)[3] = Vec2d(-squareLength / 2.0, -squareLength / 2.0); +} + +double PoseSolver::meanSceneDepth(InputArray _objectPoints, InputArray _rvec, InputArray _tvec) +{ + CV_CheckType(_objectPoints.type(), _objectPoints.type() == CV_64FC3, + "Type of _objectPoints must be CV_64FC3" ); + + size_t n = static_cast(_objectPoints.rows() * _objectPoints.cols()); + Mat R; + Mat q; + Rodrigues(_rvec, R); + double zBar = 0; + + for (size_t i = 0; i < n; i++) + { + Mat p(_objectPoints.getMat().at(static_cast(i))); + q = R * p + _tvec.getMat(); + double z; + if (q.depth() == CV_64F) + { + z = q.at(2); + } + else + { + z = static_cast(q.at(2)); + } + zBar += z; + } + return zBar / static_cast(n); +} + +void PoseSolver::rot2vec(InputArray _R, OutputArray _r) +{ + CV_CheckType(_R.type(), _R.type() == CV_64FC1, + "Type of _R must be CV_64FC1" ); + CV_Assert(_R.rows() == 3); + CV_Assert(_R.cols() == 3); + + _r.create(3, 1, CV_64FC1); + + Mat R = _R.getMat(); + Mat rvec = _r.getMat(); + + double trace = R.at(0, 0) + R.at(1, 1) + R.at(2, 2); + double w_norm = acos((trace - 1.0) / 2.0); + double eps = std::numeric_limits::epsilon(); + double d = 1 / (2 * sin(w_norm)) * w_norm; + if (w_norm < eps) //rotation is the identity + { + rvec.setTo(0); + } + else + { + double c0 = R.at(2, 1) - R.at(1, 2); + double c1 = R.at(0, 2) - R.at(2, 0); + double c2 = R.at(1, 0) - R.at(0, 1); + rvec.at(0) = d * c0; + rvec.at(1) = d * c1; + rvec.at(2) = d * c2; + } +} + +void PoseSolver::computeTranslation(InputArray _objectPoints, InputArray _normalizedImgPoints, InputArray _R, OutputArray _t) +{ + //This is solved by building the linear system At = b, where t corresponds to the (unknown) translation. + //This is then inverted with the associated normal equations to give t = inv(transpose(A)*A)*transpose(A)*b + //For efficiency we only store the coefficients of (transpose(A)*A) and (transpose(A)*b) + + CV_CheckType(_objectPoints.type(), _objectPoints.type() == CV_64FC2, + "Type of _objectPoints must be CV_64FC2" ); + CV_CheckType(_normalizedImgPoints.type(), _normalizedImgPoints.type() == CV_64FC2, + "Type of _normalizedImgPoints must be CV_64FC2" ); + CV_CheckType(_R.type(), _R.type() == CV_64FC1, + "Type of _R must be CV_64FC1" ); + CV_Assert(_R.rows() == 3 && _R.cols() == 3); + CV_Assert(_objectPoints.rows() == 1 || _objectPoints.cols() == 1); + CV_Assert(_normalizedImgPoints.rows() == 1 || _normalizedImgPoints.cols() == 1); + + size_t n = static_cast(_normalizedImgPoints.rows() * _normalizedImgPoints.cols()); + CV_Assert(n == static_cast(_objectPoints.rows() * _objectPoints.cols())); + + Mat objectPoints = _objectPoints.getMat(); + Mat imgPoints = _normalizedImgPoints.getMat(); + + _t.create(3, 1, CV_64FC1); + + Mat R = _R.getMat(); + + //coefficients of (transpose(A)*A) + double ATA00 = static_cast(n); + double ATA02 = 0; + double ATA11 = static_cast(n); + double ATA12 = 0; + double ATA20 = 0; + double ATA21 = 0; + double ATA22 = 0; + + //coefficients of (transpose(A)*b) + double ATb0 = 0; + double ATb1 = 0; + double ATb2 = 0; + + //now loop through each point and increment the coefficients: + for (int i = 0; i < static_cast(n); i++) + { + const Vec2d& objPt = objectPoints.at(i); + double rx = R.at(0, 0) * objPt(0) + R.at(0, 1) * objPt(1); + double ry = R.at(1, 0) * objPt(0) + R.at(1, 1) * objPt(1); + double rz = R.at(2, 0) * objPt(0) + R.at(2, 1) * objPt(1); + + const Vec2d& imgPt = imgPoints.at(i); + double a2 = -imgPt(0); + double b2 = -imgPt(1); + + ATA02 = ATA02 + a2; + ATA12 = ATA12 + b2; + ATA20 = ATA20 + a2; + ATA21 = ATA21 + b2; + ATA22 = ATA22 + a2 * a2 + b2 * b2; + + double bx = -a2 * rz - rx; + double by = -b2 * rz - ry; + + ATb0 = ATb0 + bx; + ATb1 = ATb1 + by; + ATb2 = ATb2 + a2 * bx + b2 * by; + } + + double detAInv = 1.0 / (ATA00 * ATA11 * ATA22 - ATA00 * ATA12 * ATA21 - ATA02 * ATA11 * ATA20); + + //S gives inv(transpose(A)*A)/det(A)^2 + //construct S: + double S00 = ATA11 * ATA22 - ATA12 * ATA21; + double S01 = ATA02 * ATA21; + double S02 = -ATA02 * ATA11; + double S10 = ATA12 * ATA20; + double S11 = ATA00 * ATA22 - ATA02 * ATA20; + double S12 = -ATA00 * ATA12; + double S20 = -ATA11 * ATA20; + double S21 = -ATA00 * ATA21; + double S22 = ATA00 * ATA11; + + //solve t: + Mat t = _t.getMat(); + t.at(0) = detAInv * (S00 * ATb0 + S01 * ATb1 + S02 * ATb2); + t.at(1) = detAInv * (S10 * ATb0 + S11 * ATb1 + S12 * ATb2); + t.at(2) = detAInv * (S20 * ATb0 + S21 * ATb1 + S22 * ATb2); +} + +void PoseSolver::computeRotations(double j00, double j01, double j10, double j11, double p, double q, OutputArray _R1, OutputArray _R2) +{ + //This is fairly optimized code which makes it hard to understand. The matlab code is certainly easier to read. + _R1.create(3, 3, CV_64FC1); + _R2.create(3, 3, CV_64FC1); + + Matx33d Rv; + Matx31d v(p, q, 1); + rotateVec2ZAxis(v,Rv); + Rv = Rv.t(); + + //setup the 2x2 SVD decomposition: + double rv00 = Rv(0,0); + double rv01 = Rv(0,1); + double rv02 = Rv(0,2); + + double rv10 = Rv(1,0); + double rv11 = Rv(1,1); + double rv12 = Rv(1,2); + + double rv20 = Rv(2,0); + double rv21 = Rv(2,1); + double rv22 = Rv(2,2); + + double b00 = rv00 - p * rv20; + double b01 = rv01 - p * rv21; + double b10 = rv10 - q * rv20; + double b11 = rv11 - q * rv21; + + double dtinv = 1.0 / ((b00 * b11 - b01 * b10)); + + double binv00 = dtinv * b11; + double binv01 = -dtinv * b01; + double binv10 = -dtinv * b10; + double binv11 = dtinv * b00; + + double a00 = binv00 * j00 + binv01 * j10; + double a01 = binv00 * j01 + binv01 * j11; + double a10 = binv10 * j00 + binv11 * j10; + double a11 = binv10 * j01 + binv11 * j11; + + //compute the largest singular value of A: + double ata00 = a00 * a00 + a01 * a01; + double ata01 = a00 * a10 + a01 * a11; + double ata11 = a10 * a10 + a11 * a11; + + double gamma2 = 0.5 * (ata00 + ata11 + sqrt((ata00 - ata11) * (ata00 - ata11) + 4.0 * ata01 * ata01)); + if (gamma2 < 0) + CV_Error(Error::StsNoConv, "gamma2 is negative."); + + double gamma = sqrt(gamma2); + + if (std::fabs(gamma) < std::numeric_limits::epsilon()) + CV_Error(Error::StsNoConv, "gamma is zero."); + + //reconstruct the full rotation matrices: + double rtilde00 = a00 / gamma; + double rtilde01 = a01 / gamma; + double rtilde10 = a10 / gamma; + double rtilde11 = a11 / gamma; + + double rtilde00_2 = rtilde00 * rtilde00; + double rtilde01_2 = rtilde01 * rtilde01; + double rtilde10_2 = rtilde10 * rtilde10; + double rtilde11_2 = rtilde11 * rtilde11; + + double b0 = sqrt(-rtilde00_2 - rtilde10_2 + 1); + double b1 = sqrt(-rtilde01_2 - rtilde11_2 + 1); + double sp = (-rtilde00 * rtilde01 - rtilde10 * rtilde11); + + if (sp < 0) + { + b1 = -b1; + } + + //store results: + Mat R1 = _R1.getMat(); + Mat R2 = _R2.getMat(); + + R1.at(0, 0) = (rtilde00)*rv00 + (rtilde10)*rv01 + (b0)*rv02; + R1.at(0, 1) = (rtilde01)*rv00 + (rtilde11)*rv01 + (b1)*rv02; + R1.at(0, 2) = (b1 * rtilde10 - b0 * rtilde11) * rv00 + (b0 * rtilde01 - b1 * rtilde00) * rv01 + (rtilde00 * rtilde11 - rtilde01 * rtilde10) * rv02; + R1.at(1, 0) = (rtilde00)*rv10 + (rtilde10)*rv11 + (b0)*rv12; + R1.at(1, 1) = (rtilde01)*rv10 + (rtilde11)*rv11 + (b1)*rv12; + R1.at(1, 2) = (b1 * rtilde10 - b0 * rtilde11) * rv10 + (b0 * rtilde01 - b1 * rtilde00) * rv11 + (rtilde00 * rtilde11 - rtilde01 * rtilde10) * rv12; + R1.at(2, 0) = (rtilde00)*rv20 + (rtilde10)*rv21 + (b0)*rv22; + R1.at(2, 1) = (rtilde01)*rv20 + (rtilde11)*rv21 + (b1)*rv22; + R1.at(2, 2) = (b1 * rtilde10 - b0 * rtilde11) * rv20 + (b0 * rtilde01 - b1 * rtilde00) * rv21 + (rtilde00 * rtilde11 - rtilde01 * rtilde10) * rv22; + + R2.at(0, 0) = (rtilde00)*rv00 + (rtilde10)*rv01 + (-b0) * rv02; + R2.at(0, 1) = (rtilde01)*rv00 + (rtilde11)*rv01 + (-b1) * rv02; + R2.at(0, 2) = (b0 * rtilde11 - b1 * rtilde10) * rv00 + (b1 * rtilde00 - b0 * rtilde01) * rv01 + (rtilde00 * rtilde11 - rtilde01 * rtilde10) * rv02; + R2.at(1, 0) = (rtilde00)*rv10 + (rtilde10)*rv11 + (-b0) * rv12; + R2.at(1, 1) = (rtilde01)*rv10 + (rtilde11)*rv11 + (-b1) * rv12; + R2.at(1, 2) = (b0 * rtilde11 - b1 * rtilde10) * rv10 + (b1 * rtilde00 - b0 * rtilde01) * rv11 + (rtilde00 * rtilde11 - rtilde01 * rtilde10) * rv12; + R2.at(2, 0) = (rtilde00)*rv20 + (rtilde10)*rv21 + (-b0) * rv22; + R2.at(2, 1) = (rtilde01)*rv20 + (rtilde11)*rv21 + (-b1) * rv22; + R2.at(2, 2) = (b0 * rtilde11 - b1 * rtilde10) * rv20 + (b1 * rtilde00 - b0 * rtilde01) * rv21 + (rtilde00 * rtilde11 - rtilde01 * rtilde10) * rv22; +} + +void PoseSolver::homographyFromSquarePoints(InputArray _targetPoints, double halfLength, OutputArray H_) +{ + CV_CheckType(_targetPoints.type(), _targetPoints.type() == CV_32FC2 || _targetPoints.type() == CV_64FC2, + "Type of _targetPoints must be CV_32FC2 or CV_64FC2" ); + + Mat pts = _targetPoints.getMat(); + + double p1x, p1y; + double p2x, p2y; + double p3x, p3y; + double p4x, p4y; + + if (_targetPoints.type() == CV_32FC2) + { + p1x = -pts.at(0)(0); + p1y = -pts.at(0)(1); + + p2x = -pts.at(1)(0); + p2y = -pts.at(1)(1); + + p3x = -pts.at(2)(0); + p3y = -pts.at(2)(1); + + p4x = -pts.at(3)(0); + p4y = -pts.at(3)(1); + } + else + { + p1x = -pts.at(0)(0); + p1y = -pts.at(0)(1); + + p2x = -pts.at(1)(0); + p2y = -pts.at(1)(1); + + p3x = -pts.at(2)(0); + p3y = -pts.at(2)(1); + + p4x = -pts.at(3)(0); + p4y = -pts.at(3)(1); + } + + //analytic solution: + double det = (halfLength * (p1x * p2y - p2x * p1y - p1x * p4y + p2x * p3y - p3x * p2y + p4x * p1y + p3x * p4y - p4x * p3y)); + if (abs(det) < 1e-9) + CV_Error(Error::StsNoConv, "Determinant is zero!"); + double detsInv = -1 / det; + + Matx33d H; + H(0, 0) = detsInv * (p1x * p3x * p2y - p2x * p3x * p1y - p1x * p4x * p2y + p2x * p4x * p1y - p1x * p3x * p4y + p1x * p4x * p3y + p2x * p3x * p4y - p2x * p4x * p3y); + H(0, 1) = detsInv * (p1x * p2x * p3y - p1x * p3x * p2y - p1x * p2x * p4y + p2x * p4x * p1y + p1x * p3x * p4y - p3x * p4x * p1y - p2x * p4x * p3y + p3x * p4x * p2y); + H(0, 2) = detsInv * halfLength * (p1x * p2x * p3y - p2x * p3x * p1y - p1x * p2x * p4y + p1x * p4x * p2y - p1x * p4x * p3y + p3x * p4x * p1y + p2x * p3x * p4y - p3x * p4x * p2y); + H(1, 0) = detsInv * (p1x * p2y * p3y - p2x * p1y * p3y - p1x * p2y * p4y + p2x * p1y * p4y - p3x * p1y * p4y + p4x * p1y * p3y + p3x * p2y * p4y - p4x * p2y * p3y); + H(1, 1) = detsInv * (p2x * p1y * p3y - p3x * p1y * p2y - p1x * p2y * p4y + p4x * p1y * p2y + p1x * p3y * p4y - p4x * p1y * p3y - p2x * p3y * p4y + p3x * p2y * p4y); + H(1, 2) = detsInv * halfLength * (p1x * p2y * p3y - p3x * p1y * p2y - p2x * p1y * p4y + p4x * p1y * p2y - p1x * p3y * p4y + p3x * p1y * p4y + p2x * p3y * p4y - p4x * p2y * p3y); + H(2, 0) = -detsInv * (p1x * p3y - p3x * p1y - p1x * p4y - p2x * p3y + p3x * p2y + p4x * p1y + p2x * p4y - p4x * p2y); + H(2, 1) = detsInv * (p1x * p2y - p2x * p1y - p1x * p3y + p3x * p1y + p2x * p4y - p4x * p2y - p3x * p4y + p4x * p3y); + H(2, 2) = 1.0; + + Mat(H, false).copyTo(H_); +} + +void PoseSolver::makeCanonicalObjectPoints(InputArray _objectPoints, OutputArray _canonicalObjPoints, OutputArray _MmodelPoints2Canonical) +{ + int objType = _objectPoints.type(); + CV_CheckType(objType, objType == CV_32FC3 || objType == CV_64FC3, + "Type of _objectPoints must be CV_32FC3 or CV_64FC3" ); + + int n = _objectPoints.rows() * _objectPoints.cols(); + + _canonicalObjPoints.create(1, n, CV_64FC2); + + Mat objectPoints = _objectPoints.getMat(); + Mat canonicalObjPoints = _canonicalObjPoints.getMat(); + + Mat UZero(3, n, CV_64FC1); + + double xBar = 0; + double yBar = 0; + double zBar = 0; + bool isOnZPlane = true; + for (int i = 0; i < n; i++) + { + double x, y, z; + if (objType == CV_32FC3) + { + x = static_cast(objectPoints.at(i)[0]); + y = static_cast(objectPoints.at(i)[1]); + z = static_cast(objectPoints.at(i)[2]); + } + else + { + x = objectPoints.at(i)[0]; + y = objectPoints.at(i)[1]; + z = objectPoints.at(i)[2]; + } + + if (abs(z) > IPPE_SMALL) + { + isOnZPlane = false; + } + + xBar += x; + yBar += y; + zBar += z; + + UZero.at(0, i) = x; + UZero.at(1, i) = y; + UZero.at(2, i) = z; + } + xBar = xBar / static_cast(n); + yBar = yBar / static_cast(n); + zBar = zBar / static_cast(n); + + for (int i = 0; i < n; i++) + { + UZero.at(0, i) -= xBar; + UZero.at(1, i) -= yBar; + UZero.at(2, i) -= zBar; + } + + Matx44d MCenter = Matx44d::eye(); + MCenter(0, 3) = -xBar; + MCenter(1, 3) = -yBar; + MCenter(2, 3) = -zBar; + + if (isOnZPlane) + { + //MmodelPoints2Canonical is given by MCenter + Mat(MCenter, false).copyTo(_MmodelPoints2Canonical); + for (int i = 0; i < n; i++) + { + canonicalObjPoints.at(i)[0] = UZero.at(0, i); + canonicalObjPoints.at(i)[1] = UZero.at(1, i); + } + } + else + { + Mat UZeroAligned(3, n, CV_64FC1); + Matx33d R; //rotation that rotates objectPoints to the plane z=0 + + if (!computeObjextSpaceR3Pts(objectPoints,R)) + { + //we could not compute R, problably because there is a duplicate point in {objectPoints(0),objectPoints(1),objectPoints(2)}. + //So we compute it with the SVD (which is slower): + computeObjextSpaceRSvD(UZero,R); + } + + UZeroAligned = R * UZero; + + for (int i = 0; i < n; i++) + { + canonicalObjPoints.at(i)[0] = UZeroAligned.at(0, i); + canonicalObjPoints.at(i)[1] = UZeroAligned.at(1, i); + if (abs(UZeroAligned.at(2, i)) > IPPE_SMALL) + CV_Error(Error::StsNoConv, "Cannot transform object points to the plane z=0!"); + } + + Matx44d MRot = Matx44d::zeros(); + MRot(3, 3) = 1; + + for (int i = 0; i < 3; i++) + { + for (int j = 0; j < 3; j++) + { + MRot(i,j) = R(i,j); + } + } + Matx44d Mb = MRot * MCenter; + Mat(Mb, false).copyTo(_MmodelPoints2Canonical); + } +} + +void PoseSolver::evalReprojError(InputArray _objectPoints, InputArray _imagePoints, InputArray _M, float& err) +{ + Mat projectedPoints; + Mat imagePoints = _imagePoints.getMat(); + Mat r; + rot2vec(_M.getMat().colRange(0, 3).rowRange(0, 3), r); + + Mat K = Mat::eye(3, 3, CV_64FC1); + Mat dist; + projectPoints(_objectPoints, r, _M.getMat().colRange(3, 4).rowRange(0, 3), K, dist, projectedPoints); + + err = 0; + int n = _objectPoints.rows() * _objectPoints.cols(); + + float dx, dy; + const int projPtsDepth = projectedPoints.depth(); + for (int i = 0; i < n; i++) + { + if (projPtsDepth == CV_32F) + { + dx = projectedPoints.at(i)[0] - static_cast(imagePoints.at(i)[0]); + dy = projectedPoints.at(i)[1] - static_cast(imagePoints.at(i)[1]); + } + else + { + dx = static_cast(projectedPoints.at(i)[0] - imagePoints.at(i)[0]); + dy = static_cast(projectedPoints.at(i)[1] - imagePoints.at(i)[1]); + } + + err += dx * dx + dy * dy; + } + err = sqrt(err / (2.0f * n)); +} + +void PoseSolver::sortPosesByReprojError(InputArray _objectPoints, InputArray _imagePoints, InputArray _Ma, InputArray _Mb, + OutputArray _M1, OutputArray _M2, float& err1, float& err2) +{ + float erra, errb; + evalReprojError(_objectPoints, _imagePoints, _Ma, erra); + evalReprojError(_objectPoints, _imagePoints, _Mb, errb); + if (erra < errb) + { + err1 = erra; + _Ma.copyTo(_M1); + + err2 = errb; + _Mb.copyTo(_M2); + } + else + { + err1 = errb; + _Mb.copyTo(_M1); + + err2 = erra; + _Ma.copyTo(_M2); + } +} + +void PoseSolver::rotateVec2ZAxis(const Matx31d& a, Matx33d& Ra) +{ + double ax = a(0); + double ay = a(1); + double az = a(2); + + double nrm = sqrt(ax*ax + ay*ay + az*az); + ax = ax/nrm; + ay = ay/nrm; + az = az/nrm; + + double c = az; + + if (abs(1.0+c) < std::numeric_limits::epsilon()) + { + Ra = Matx33d::zeros(); + Ra(0,0) = 1.0; + Ra(1,1) = 1.0; + Ra(2,2) = -1.0; + } + else + { + double d = 1.0/(1.0+c); + double ax2 = ax*ax; + double ay2 = ay*ay; + double axay = ax*ay; + + Ra(0,0) = -ax2*d + 1.0; + Ra(0,1) = -axay*d; + Ra(0,2) = -ax; + + Ra(1,0) = -axay*d; + Ra(1,1) = -ay2*d + 1.0; + Ra(1,2) = -ay; + + Ra(2,0) = ax; + Ra(2,1) = ay; + Ra(2,2) = 1.0 - (ax2 + ay2)*d; + } +} + +bool PoseSolver::computeObjextSpaceR3Pts(InputArray _objectPoints, Matx33d& R) +{ + bool ret; //return argument + double p1x,p1y,p1z; + double p2x,p2y,p2z; + double p3x,p3y,p3z; + + Mat objectPoints = _objectPoints.getMat(); + if (objectPoints.type() == CV_32FC3) + { + p1x = objectPoints.at(0)[0]; + p1y = objectPoints.at(0)[1]; + p1z = objectPoints.at(0)[2]; + + p2x = objectPoints.at(1)[0]; + p2y = objectPoints.at(1)[1]; + p2z = objectPoints.at(1)[2]; + + p3x = objectPoints.at(2)[0]; + p3y = objectPoints.at(2)[1]; + p3z = objectPoints.at(2)[2]; + } + else + { + p1x = objectPoints.at(0)[0]; + p1y = objectPoints.at(0)[1]; + p1z = objectPoints.at(0)[2]; + + p2x = objectPoints.at(1)[0]; + p2y = objectPoints.at(1)[1]; + p2z = objectPoints.at(1)[2]; + + p3x = objectPoints.at(2)[0]; + p3y = objectPoints.at(2)[1]; + p3z = objectPoints.at(2)[2]; + } + + double nx = (p1y - p2y)*(p1z - p3z) - (p1y - p3y)*(p1z - p2z); + double ny = (p1x - p3x)*(p1z - p2z) - (p1x - p2x)*(p1z - p3z); + double nz = (p1x - p2x)*(p1y - p3y) - (p1x - p3x)*(p1y - p2y); + + double nrm = sqrt(nx*nx+ ny*ny + nz*nz); + if (nrm > IPPE_SMALL) + { + nx = nx/nrm; + ny = ny/nrm; + nz = nz/nrm; + Matx31d v(nx, ny, nz); + rotateVec2ZAxis(v,R); + ret = true; + } + else + { + ret = false; + } + return ret; +} + +void PoseSolver::computeObjextSpaceRSvD(InputArray _objectPointsZeroMean, OutputArray _R) +{ + _R.create(3, 3, CV_64FC1); + Mat R = _R.getMat(); + + //we could not compute R with the first three points, so lets use the SVD + SVD s; + Mat W, U, VT; + s.compute(_objectPointsZeroMean.getMat() * _objectPointsZeroMean.getMat().t(), W, U, VT); + double s3 = W.at(2); + double s2 = W.at(1); + + //check if points are coplanar: + CV_Assert(s3 / s2 < IPPE_SMALL); + + R = U.t(); + if (determinant(R) < 0) + { + //this ensures R is a rotation matrix and not a general unitary matrix: + R.at(2, 0) = -R.at(2, 0); + R.at(2, 1) = -R.at(2, 1); + R.at(2, 2) = -R.at(2, 2); + } +} +} //namespace IPPE + +namespace HomographyHO { +void normalizeDataIsotropic(InputArray _Data, OutputArray _DataN, OutputArray _T, OutputArray _Ti) +{ + Mat Data = _Data.getMat(); + int numPoints = Data.rows * Data.cols; + CV_Assert(Data.rows == 1 || Data.cols == 1); + CV_Assert(Data.channels() == 2 || Data.channels() == 3); + CV_Assert(numPoints >= 4); + + int dataType = _Data.type(); + CV_CheckType(dataType, dataType == CV_32FC2 || dataType == CV_32FC3 || dataType == CV_64FC2 || dataType == CV_64FC3, + "Type of _Data must be one of CV_32FC2, CV_32FC3, CV_64FC2, CV_64FC3"); + + _DataN.create(2, numPoints, CV_64FC1); + + _T.create(3, 3, CV_64FC1); + _Ti.create(3, 3, CV_64FC1); + + Mat DataN = _DataN.getMat(); + Mat T = _T.getMat(); + Mat Ti = _Ti.getMat(); + + _T.setTo(0); + _Ti.setTo(0); + + int numChannels = Data.channels(); + double xm = 0; + double ym = 0; + for (int i = 0; i < numPoints; i++) + { + if (numChannels == 2) + { + if (dataType == CV_32FC2) + { + xm = xm + Data.at(i)[0]; + ym = ym + Data.at(i)[1]; + } + else + { + xm = xm + Data.at(i)[0]; + ym = ym + Data.at(i)[1]; + } + } + else + { + if (dataType == CV_32FC3) + { + xm = xm + Data.at(i)[0]; + ym = ym + Data.at(i)[1]; + } + else + { + xm = xm + Data.at(i)[0]; + ym = ym + Data.at(i)[1]; + } + } + } + xm = xm / static_cast(numPoints); + ym = ym / static_cast(numPoints); + + double kappa = 0; + double xh, yh; + + for (int i = 0; i < numPoints; i++) + { + + if (numChannels == 2) + { + if (dataType == CV_32FC2) + { + xh = Data.at(i)[0] - xm; + yh = Data.at(i)[1] - ym; + } + else + { + xh = Data.at(i)[0] - xm; + yh = Data.at(i)[1] - ym; + } + } + else + { + if (dataType == CV_32FC3) + { + xh = Data.at(i)[0] - xm; + yh = Data.at(i)[1] - ym; + } + else + { + xh = Data.at(i)[0] - xm; + yh = Data.at(i)[1] - ym; + } + } + + DataN.at(0, i) = xh; + DataN.at(1, i) = yh; + kappa = kappa + xh * xh + yh * yh; + } + double beta = sqrt(2 * numPoints / kappa); + DataN = DataN * beta; + + T.at(0, 0) = 1.0 / beta; + T.at(1, 1) = 1.0 / beta; + + T.at(0, 2) = xm; + T.at(1, 2) = ym; + + T.at(2, 2) = 1; + + Ti.at(0, 0) = beta; + Ti.at(1, 1) = beta; + + Ti.at(0, 2) = -beta * xm; + Ti.at(1, 2) = -beta * ym; + + Ti.at(2, 2) = 1; +} + +void homographyHO(InputArray _srcPoints, InputArray _targPoints, Matx33d& H) +{ + Mat DataA, DataB, TA, TAi, TB, TBi; + + HomographyHO::normalizeDataIsotropic(_srcPoints, DataA, TA, TAi); + HomographyHO::normalizeDataIsotropic(_targPoints, DataB, TB, TBi); + + int n = DataA.cols; + CV_Assert(n == DataB.cols); + + Mat C1(1, n, CV_64FC1); + Mat C2(1, n, CV_64FC1); + Mat C3(1, n, CV_64FC1); + Mat C4(1, n, CV_64FC1); + + double mC1 = 0, mC2 = 0, mC3 = 0, mC4 = 0; + + for (int i = 0; i < n; i++) + { + C1.at(0, i) = -DataB.at(0, i) * DataA.at(0, i); + C2.at(0, i) = -DataB.at(0, i) * DataA.at(1, i); + C3.at(0, i) = -DataB.at(1, i) * DataA.at(0, i); + C4.at(0, i) = -DataB.at(1, i) * DataA.at(1, i); + + mC1 += C1.at(0, i); + mC2 += C2.at(0, i); + mC3 += C3.at(0, i); + mC4 += C4.at(0, i); + } + + mC1 /= n; + mC2 /= n; + mC3 /= n; + mC4 /= n; + + Mat Mx(n, 3, CV_64FC1); + Mat My(n, 3, CV_64FC1); + + for (int i = 0; i < n; i++) + { + Mx.at(i, 0) = C1.at(0, i) - mC1; + Mx.at(i, 1) = C2.at(0, i) - mC2; + Mx.at(i, 2) = -DataB.at(0, i); + + My.at(i, 0) = C3.at(0, i) - mC3; + My.at(i, 1) = C4.at(0, i) - mC4; + My.at(i, 2) = -DataB.at(1, i); + } + + Mat DataAT, DataADataAT; + + transpose(DataA, DataAT); + DataADataAT = DataA * DataAT; + double dt = DataADataAT.at(0, 0) * DataADataAT.at(1, 1) - DataADataAT.at(0, 1) * DataADataAT.at(1, 0); + + Mat DataADataATi(2, 2, CV_64FC1); + DataADataATi.at(0, 0) = DataADataAT.at(1, 1) / dt; + DataADataATi.at(0, 1) = -DataADataAT.at(0, 1) / dt; + DataADataATi.at(1, 0) = -DataADataAT.at(1, 0) / dt; + DataADataATi.at(1, 1) = DataADataAT.at(0, 0) / dt; + + Mat Pp = DataADataATi * DataA; + + Mat Bx = Pp * Mx; + Mat By = Pp * My; + + Mat Ex = DataAT * Bx; + Mat Ey = DataAT * By; + + Mat D(2 * n, 3, CV_64FC1); + + for (int i = 0; i < n; i++) + { + D.at(i, 0) = Mx.at(i, 0) - Ex.at(i, 0); + D.at(i, 1) = Mx.at(i, 1) - Ex.at(i, 1); + D.at(i, 2) = Mx.at(i, 2) - Ex.at(i, 2); + + D.at(i + n, 0) = My.at(i, 0) - Ey.at(i, 0); + D.at(i + n, 1) = My.at(i, 1) - Ey.at(i, 1); + D.at(i + n, 2) = My.at(i, 2) - Ey.at(i, 2); + } + + Mat DT, DDT; + transpose(D, DT); + DDT = DT * D; + + Mat S, U; + eigen(DDT, S, U); + + Mat h789(3, 1, CV_64FC1); + h789.at(0, 0) = U.at(2, 0); + h789.at(1, 0) = U.at(2, 1); + h789.at(2, 0) = U.at(2, 2); + + Mat h12 = -Bx * h789; + Mat h45 = -By * h789; + + double h3 = -(mC1 * h789.at(0, 0) + mC2 * h789.at(1, 0)); + double h6 = -(mC3 * h789.at(0, 0) + mC4 * h789.at(1, 0)); + + H(0, 0) = h12.at(0, 0); + H(0, 1) = h12.at(1, 0); + H(0, 2) = h3; + + H(1, 0) = h45.at(0, 0); + H(1, 1) = h45.at(1, 0); + H(1, 2) = h6; + + H(2, 0) = h789.at(0, 0); + H(2, 1) = h789.at(1, 0); + H(2, 2) = h789.at(2, 0); + + H = Mat(TB * H * TAi); + double h22_inv = 1 / H(2, 2); + H = H * h22_inv; +} +} +} //namespace cv diff --git a/modules/calib3d/src/ippe.hpp b/modules/calib3d/src/ippe.hpp new file mode 100644 index 0000000000..6dc76f59a6 --- /dev/null +++ b/modules/calib3d/src/ippe.hpp @@ -0,0 +1,259 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +// This file is based on file issued with the following license: + +/*============================================================================ + +Copyright 2017 Toby Collins +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef OPENCV_CALIB3D_IPPE_HPP +#define OPENCV_CALIB3D_IPPE_HPP + +#include + +namespace cv { +namespace IPPE { + +class PoseSolver { +public: + /** + * @brief PoseSolver constructor + */ + PoseSolver(); + + /** + * @brief Finds the two possible poses of a planar object given a set of correspondences and their respective reprojection errors. + * The poses are sorted with the first having the lowest reprojection error. + * @param objectPoints Array of 4 or more coplanar object points defined in object coordinates. + * 1xN/Nx1 3-channel (float or double) where N is the number of points + * @param imagePoints Array of corresponding image points, 1xN/Nx1 2-channel. Points are in normalized pixel coordinates. + * @param rvec1 First rotation solution (3x1 rotation vector) + * @param tvec1 First translation solution (3x1 vector) + * @param reprojErr1 Reprojection error of first solution + * @param rvec2 Second rotation solution (3x1 rotation vector) + * @param tvec2 Second translation solution (3x1 vector) + * @param reprojErr2 Reprojection error of second solution + */ + void solveGeneric(InputArray objectPoints, InputArray imagePoints, OutputArray rvec1, OutputArray tvec1, + float& reprojErr1, OutputArray rvec2, OutputArray tvec2, float& reprojErr2); + + /** + * @brief Finds the two possible poses of a square planar object and their respective reprojection errors using IPPE. + * The poses are sorted so that the first one is the one with the lowest reprojection error. + * + * @param objectPoints Array of 4 coplanar object points defined in the following object coordinates: + * - point 0: [-squareLength / 2.0, squareLength / 2.0, 0] + * - point 1: [squareLength / 2.0, squareLength / 2.0, 0] + * - point 2: [squareLength / 2.0, -squareLength / 2.0, 0] + * - point 3: [-squareLength / 2.0, -squareLength / 2.0, 0] + * 1xN/Nx1 3-channel (float or double) where N is the number of points + * @param imagePoints Array of corresponding image points, 1xN/Nx1 2-channel. Points are in normalized pixel coordinates. + * @param rvec1 First rotation solution (3x1 rotation vector) + * @param tvec1 First translation solution (3x1 vector) + * @param reprojErr1 Reprojection error of first solution + * @param rvec2 Second rotation solution (3x1 rotation vector) + * @param tvec2 Second translation solution (3x1 vector) + * @param reprojErr2 Reprojection error of second solution + */ + void solveSquare(InputArray objectPoints, InputArray imagePoints, OutputArray rvec1, OutputArray tvec1, + float& reprojErr1, OutputArray rvec2, OutputArray tvec2, float& reprojErr2); + +private: + /** + * @brief Finds the two possible poses of a planar object given a set of correspondences in normalized pixel coordinates. + * These poses are **NOT** sorted on reprojection error. Note that the returned poses are object-to-camera transforms, and not camera-to-object transforms. + * @param objectPoints Array of 4 or more coplanar object points defined in object coordinates. 1xN/Nx1 3-channel (float or double). + * @param normalizedImagePoints Array of corresponding image points in normalized pixel coordinates, 1xN/Nx1 2-channel (float or double). + * @param Ma First pose solution (unsorted) + * @param Mb Second pose solution (unsorted) + */ + void solveGeneric(InputArray objectPoints, InputArray normalizedImagePoints, OutputArray Ma, OutputArray Mb); + + /** + * @brief Finds the two possible poses of a planar object in its canonical position, given a set of correspondences in normalized pixel coordinates. + * These poses are **NOT** sorted on reprojection error. Note that the returned poses are object-to-camera transforms, and not camera-to-object transforms. + * @param canonicalObjPoints Array of 4 or more coplanar object points defined in object coordinates. 1xN/Nx1 3-channel (double) where N is the number of points + * @param normalizedInputPoints Array of corresponding image points in normalized pixel coordinates, 1xN/Nx1 2-channel (double) where N is the number of points + * @param H Homography mapping canonicalObjPoints to normalizedInputPoints. + * @param Ma + * @param Mb + */ + void solveCanonicalForm(InputArray canonicalObjPoints, InputArray normalizedInputPoints, const Matx33d& H, + OutputArray Ma, OutputArray Mb); + + /** + * @brief Computes the translation solution for a given rotation solution + * @param objectPoints Array of corresponding object points, 1xN/Nx1 3-channel where N is the number of points + * @param normalizedImagePoints Array of corresponding image points (undistorted), 1xN/Nx1 2-channel where N is the number of points + * @param R Rotation solution (3x1 rotation vector) + * @param t Translation solution (3x1 rotation vector) + */ + void computeTranslation(InputArray objectPoints, InputArray normalizedImgPoints, InputArray R, OutputArray t); + + /** + * @brief Computes the two rotation solutions from the Jacobian of a homography matrix H at a point (ux,uy) on the object plane. + * For highest accuracy the Jacobian should be computed at the centroid of the point correspondences (see the IPPE paper for the explanation of this). + * For a point (ux,uy) on the object plane, suppose the homography H maps (ux,uy) to a point (p,q) in the image (in normalized pixel coordinates). + * The Jacobian matrix [J00, J01; J10,J11] is the Jacobian of the mapping evaluated at (ux,uy). + * @param j00 Homography jacobian coefficent at (ux,uy) + * @param j01 Homography jacobian coefficent at (ux,uy) + * @param j10 Homography jacobian coefficent at (ux,uy) + * @param j11 Homography jacobian coefficent at (ux,uy) + * @param p The x coordinate of point (ux,uy) mapped into the image (undistorted and normalized position) + * @param q The y coordinate of point (ux,uy) mapped into the image (undistorted and normalized position) + */ + void computeRotations(double j00, double j01, double j10, double j11, double p, double q, OutputArray _R1, OutputArray _R2); + + /** + * @brief Closed-form solution for the homography mapping with four corner correspondences of a square (it maps source points to target points). + * The source points are the four corners of a zero-centred squared defined by: + * - point 0: [-squareLength / 2.0, squareLength / 2.0] + * - point 1: [squareLength / 2.0, squareLength / 2.0] + * - point 2: [squareLength / 2.0, -squareLength / 2.0] + * - point 3: [-squareLength / 2.0, -squareLength / 2.0] + * + * @param targetPoints Array of four corresponding target points, 1x4/4x1 2-channel. Note that the points should be ordered to correspond with points 0, 1, 2 and 3. + * @param halfLength The square's half length (i.e. squareLength/2.0) + * @param H Homograhy mapping the source points to the target points, 3x3 single channel + */ + void homographyFromSquarePoints(InputArray targetPoints, double halfLength, OutputArray H); + + /** + * @brief Fast conversion from a rotation matrix to a rotation vector using Rodrigues' formula + * @param R Input rotation matrix, 3x3 1-channel (double) + * @param r Output rotation vector, 3x1/1x3 1-channel (double) + */ + void rot2vec(InputArray R, OutputArray r); + + /** + * @brief Takes a set of planar object points and transforms them to 'canonical' object coordinates This is when they have zero mean and are on the plane z=0 + * @param objectPoints Array of 4 or more coplanar object points defined in object coordinates. 1xN/Nx1 3-channel (float or double) where N is the number of points + * @param canonicalObjectPoints Object points in canonical coordinates 1xN/Nx1 2-channel (double) + * @param MobjectPoints2Canonical Transform matrix mapping _objectPoints to _canonicalObjectPoints: 4x4 1-channel (double) + */ + void makeCanonicalObjectPoints(InputArray objectPoints, OutputArray canonicalObjectPoints, OutputArray MobjectPoints2Canonical); + + /** + * @brief Evaluates the Root Mean Squared (RMS) reprojection error of a pose solution. + * @param objectPoints Array of 4 or more coplanar object points defined in object coordinates. 1xN/Nx1 3-channel (float or double) where N is the number of points + * @param imagePoints Array of corresponding image points, 1xN/Nx1 2-channel. This can either be in pixel coordinates or normalized pixel coordinates. + * @param M Pose matrix from 3D object to camera coordinates: 4x4 1-channel (double) + * @param err RMS reprojection error + */ + void evalReprojError(InputArray objectPoints, InputArray imagePoints, InputArray M, float& err); + + /** + * @brief Sorts two pose solutions according to their RMS reprojection error (lowest first). + * @param objectPoints Array of 4 or more coplanar object points defined in object coordinates. 1xN/Nx1 3-channel (float or double) where N is the number of points + * @param imagePoints Array of corresponding image points, 1xN/Nx1 2-channel. This can either be in pixel coordinates or normalized pixel coordinates. + * @param Ma Pose matrix 1: 4x4 1-channel + * @param Mb Pose matrix 2: 4x4 1-channel + * @param M1 Member of (Ma,Mb} with lowest RMS reprojection error. Performs deep copy. + * @param M2 Member of (Ma,Mb} with highest RMS reprojection error. Performs deep copy. + * @param err1 RMS reprojection error of _M1 + * @param err2 RMS reprojection error of _M2 + */ + void sortPosesByReprojError(InputArray objectPoints, InputArray imagePoints, InputArray Ma, InputArray Mb, OutputArray M1, OutputArray M2, float& err1, float& err2); + + /** + * @brief Finds the rotation _Ra that rotates a vector _a to the z axis (0,0,1) + * @param a vector: 3x1 mat (double) + * @param Ra Rotation: 3x3 mat (double) + */ + void rotateVec2ZAxis(const Matx31d& a, Matx33d& Ra); + + /** + * @brief Computes the rotation _R that rotates the object points to the plane z=0. This uses the cross-product method with the first three object points. + * @param objectPoints Array of N>=3 coplanar object points defined in object coordinates. 1xN/Nx1 3-channel (float or double) where N is the number of points + * @param R Rotation Mat: 3x3 (double) + * @return Success (true) or failure (false) + */ + bool computeObjextSpaceR3Pts(InputArray objectPoints, Matx33d& R); + + /** + * @brief computeObjextSpaceRSvD Computes the rotation _R that rotates the object points to the plane z=0. This uses the cross-product method with the first three object points. + * @param objectPointsZeroMean Zero-meaned coplanar object points: 3xN matrix (double) where N>=3 + * @param R Rotation Mat: 3x3 (double) + */ + void computeObjextSpaceRSvD(InputArray objectPointsZeroMean, OutputArray R); + + /** + * @brief Generates the 4 object points of a square planar object + * @param squareLength The square's length (which is also it's width) in object coordinate units (e.g. millimeters, meters, etc.) + * @param objectPoints Set of 4 object points (1x4 3-channel double) + */ + void generateSquareObjectCorners3D(double squareLength, OutputArray objectPoints); + + /** + * @brief Generates the 4 object points of a square planar object, without including the z-component (which is z=0 for all points). + * @param squareLength The square's length (which is also it's width) in object coordinate units (e.g. millimeters, meters, etc.) + * @param objectPoints Set of 4 object points (1x4 2-channel double) + */ + void generateSquareObjectCorners2D(double squareLength, OutputArray objectPoints); + + /** + * @brief Computes the average depth of an object given its pose in camera coordinates + * @param objectPoints: Object points defined in 3D object space + * @param rvec: Rotation component of pose + * @param tvec: Translation component of pose + * @return: average depth of the object + */ + double meanSceneDepth(InputArray objectPoints, InputArray rvec, InputArray tvec); + + //! a small constant used to test 'small' values close to zero. + double IPPE_SMALL; +}; +} //namespace IPPE + +namespace HomographyHO { + +/** +* @brief Computes the best-fitting homography matrix from source to target points using Harker and O'Leary's method: +* Harker, M., O'Leary, P., Computation of Homographies, Proceedings of the British Machine Vision Conference 2005, Oxford, England. +* This is not the author's implementation. +* @param srcPoints Array of source points: 1xN/Nx1 2-channel (float or double) where N is the number of points +* @param targPoints Array of target points: 1xN/Nx1 2-channel (float or double) +* @param H Homography from source to target: 3x3 1-channel (double) +*/ +void homographyHO(InputArray srcPoints, InputArray targPoints, Matx33d& H); + +/** +* @brief Performs data normalization before homography estimation. For details see Hartley, R., Zisserman, A., Multiple View Geometry in Computer Vision, +* Cambridge University Press, Cambridge, 2001 +* @param Data Array of source data points: 1xN/Nx1 2-channel (float or double) where N is the number of points +* @param DataN Normalized data points: 1xN/Nx1 2-channel (float or double) where N is the number of points +* @param T Homogeneous transform from source to normalized: 3x3 1-channel (double) +* @param Ti Homogeneous transform from normalized to source: 3x3 1-channel (double) +*/ +void normalizeDataIsotropic(InputArray Data, OutputArray DataN, OutputArray T, OutputArray Ti); + +} +} //namespace cv +#endif diff --git a/modules/calib3d/src/p3p.cpp b/modules/calib3d/src/p3p.cpp index 7521e6b167..8ee0f490c7 100644 --- a/modules/calib3d/src/p3p.cpp +++ b/modules/calib3d/src/p3p.cpp @@ -49,9 +49,11 @@ bool p3p::solve(cv::Mat& R, cv::Mat& tvec, const cv::Mat& opoints, const cv::Mat else extract_points(opoints, ipoints, points); - bool result = solve(rotation_matrix, translation, points[0], points[1], points[2], points[3], points[4], points[5], - points[6], points[7], points[8], points[9], points[10], points[11], points[12], points[13], points[14], - points[15], points[16], points[17], points[18], points[19]); + bool result = solve(rotation_matrix, translation, + points[0], points[1], points[2], points[3], points[4], + points[5], points[6], points[7], points[8], points[9], + points[10], points[11], points[12], points[13], points[14], + points[15], points[16], points[17], points[18], points[19]); cv::Mat(3, 1, CV_64F, translation).copyTo(tvec); cv::Mat(3, 3, CV_64F, rotation_matrix).copyTo(R); return result; @@ -75,10 +77,13 @@ int p3p::solve(std::vector& Rs, std::vector& tvecs, const cv:: else extract_points(opoints, ipoints, points); + const bool p4p = std::max(opoints.checkVector(3, CV_32F), opoints.checkVector(3, CV_64F)) == 4; int solutions = solve(rotation_matrix, translation, points[0], points[1], points[2], points[3], points[4], points[5], points[6], points[7], points[8], points[9], - points[10], points[11], points[12], points[13], points[14]); + points[10], points[11], points[12], points[13], points[14], + points[15], points[16], points[17], points[18], points[19], + p4p); for (int i = 0; i < solutions; i++) { cv::Mat R, tvec; @@ -100,39 +105,27 @@ bool p3p::solve(double R[3][3], double t[3], { double Rs[4][3][3], ts[4][3]; - int n = solve(Rs, ts, mu0, mv0, X0, Y0, Z0, mu1, mv1, X1, Y1, Z1, mu2, mv2, X2, Y2, Z2); + const bool p4p = true; + int n = solve(Rs, ts, mu0, mv0, X0, Y0, Z0, mu1, mv1, X1, Y1, Z1, mu2, mv2, X2, Y2, Z2, mu3, mv3, X3, Y3, Z3, p4p); if (n == 0) return false; - int ns = 0; - double min_reproj = 0; - for(int i = 0; i < n; i++) { - double X3p = Rs[i][0][0] * X3 + Rs[i][0][1] * Y3 + Rs[i][0][2] * Z3 + ts[i][0]; - double Y3p = Rs[i][1][0] * X3 + Rs[i][1][1] * Y3 + Rs[i][1][2] * Z3 + ts[i][1]; - double Z3p = Rs[i][2][0] * X3 + Rs[i][2][1] * Y3 + Rs[i][2][2] * Z3 + ts[i][2]; - double mu3p = cx + fx * X3p / Z3p; - double mv3p = cy + fy * Y3p / Z3p; - double reproj = (mu3p - mu3) * (mu3p - mu3) + (mv3p - mv3) * (mv3p - mv3); - if (i == 0 || min_reproj > reproj) { - ns = i; - min_reproj = reproj; - } - } - for(int i = 0; i < 3; i++) { for(int j = 0; j < 3; j++) - R[i][j] = Rs[ns][i][j]; - t[i] = ts[ns][i]; + R[i][j] = Rs[0][i][j]; + t[i] = ts[0][i]; } return true; } int p3p::solve(double R[4][3][3], double t[4][3], - double mu0, double mv0, double X0, double Y0, double Z0, - double mu1, double mv1, double X1, double Y1, double Z1, - double mu2, double mv2, double X2, double Y2, double Z2) + double mu0, double mv0, double X0, double Y0, double Z0, + double mu1, double mv1, double X1, double Y1, double Z1, + double mu2, double mv2, double X2, double Y2, double Z2, + double mu3, double mv3, double X3, double Y3, double Z3, + bool p4p) { double mk0, mk1, mk2; double norm; @@ -152,6 +145,9 @@ int p3p::solve(double R[4][3][3], double t[4][3], norm = sqrt(mu2 * mu2 + mv2 * mv2 + 1); mk2 = 1. / norm; mu2 *= mk2; mv2 *= mk2; + mu3 = inv_fx * mu3 - cx_fx; + mv3 = inv_fy * mv3 - cy_fy; + double distances[3]; distances[0] = sqrt( (X1 - X2) * (X1 - X2) + (Y1 - Y2) * (Y1 - Y2) + (Z1 - Z2) * (Z1 - Z2) ); distances[1] = sqrt( (X0 - X2) * (X0 - X2) + (Y0 - Y2) * (Y0 - Y2) + (Z0 - Z2) * (Z0 - Z2) ); @@ -167,6 +163,7 @@ int p3p::solve(double R[4][3][3], double t[4][3], int n = solve_for_lengths(lengths, distances, cosines); int nb_solutions = 0; + double reproj_errors[4]; for(int i = 0; i < n; i++) { double M_orig[3][3]; @@ -185,9 +182,29 @@ int p3p::solve(double R[4][3][3], double t[4][3], if (!align(M_orig, X0, Y0, Z0, X1, Y1, Z1, X2, Y2, Z2, R[nb_solutions], t[nb_solutions])) continue; + if (p4p) { + double X3p = R[nb_solutions][0][0] * X3 + R[nb_solutions][0][1] * Y3 + R[nb_solutions][0][2] * Z3 + t[nb_solutions][0]; + double Y3p = R[nb_solutions][1][0] * X3 + R[nb_solutions][1][1] * Y3 + R[nb_solutions][1][2] * Z3 + t[nb_solutions][1]; + double Z3p = R[nb_solutions][2][0] * X3 + R[nb_solutions][2][1] * Y3 + R[nb_solutions][2][2] * Z3 + t[nb_solutions][2]; + double mu3p = X3p / Z3p; + double mv3p = Y3p / Z3p; + reproj_errors[nb_solutions] = (mu3p - mu3) * (mu3p - mu3) + (mv3p - mv3) * (mv3p - mv3); + } + nb_solutions++; } + if (p4p) { + //sort the solutions + for (int i = 1; i < nb_solutions; i++) { + for (int j = i; j > 0 && reproj_errors[j-1] > reproj_errors[j]; j--) { + std::swap(reproj_errors[j], reproj_errors[j-1]); + std::swap(R[j], R[j-1]); + std::swap(t[j], t[j-1]); + } + } + } + return nb_solutions; } diff --git a/modules/calib3d/src/p3p.h b/modules/calib3d/src/p3p.h index 9c7f7ec987..93e867d479 100644 --- a/modules/calib3d/src/p3p.h +++ b/modules/calib3d/src/p3p.h @@ -15,7 +15,9 @@ class p3p int solve(double R[4][3][3], double t[4][3], double mu0, double mv0, double X0, double Y0, double Z0, double mu1, double mv1, double X1, double Y1, double Z1, - double mu2, double mv2, double X2, double Y2, double Z2); + double mu2, double mv2, double X2, double Y2, double Z2, + double mu3, double mv3, double X3, double Y3, double Z3, + bool p4p); bool solve(double R[3][3], double t[3], double mu0, double mv0, double X0, double Y0, double Z0, double mu1, double mv1, double X1, double Y1, double Z1, @@ -36,7 +38,7 @@ class p3p { points.clear(); int npoints = std::max(opoints.checkVector(3, CV_32F), opoints.checkVector(3, CV_64F)); - points.resize(5*npoints); + points.resize(5*4); //resize vector to fit for p4p case for(int i = 0; i < npoints; i++) { points[i*5] = ipoints.at(i).x*fx + cx; @@ -45,6 +47,12 @@ class p3p points[i*5+3] = opoints.at(i).y; points[i*5+4] = opoints.at(i).z; } + //Fill vectors with unused values for p3p case + for (int i = npoints; i < 4; i++) { + for (int j = 0; j < 5; j++) { + points[i * 5 + j] = 0; + } + } } void init_inverse_parameters(); int solve_for_lengths(double lengths[4][3], double distances[3], double cosines[3]); diff --git a/modules/calib3d/src/solvepnp.cpp b/modules/calib3d/src/solvepnp.cpp index 58c16f40cc..aa7332d69f 100644 --- a/modules/calib3d/src/solvepnp.cpp +++ b/modules/calib3d/src/solvepnp.cpp @@ -46,12 +46,44 @@ #include "epnp.h" #include "p3p.h" #include "ap3p.h" +#include "ippe.hpp" #include "calib3d_c_api.h" -#include - namespace cv { +#if defined _DEBUG || defined CV_STATIC_ANALYSIS +static bool isPlanarObjectPoints(InputArray _objectPoints, double threshold) +{ + CV_CheckType(_objectPoints.type(), _objectPoints.type() == CV_32FC3 || _objectPoints.type() == CV_64FC3, + "Type of _objectPoints must be CV_32FC3 or CV_64FC3"); + Mat objectPoints; + if (_objectPoints.type() == CV_32FC3) + { + _objectPoints.getMat().convertTo(objectPoints, CV_64F); + } + else + { + objectPoints = _objectPoints.getMat(); + } + + Scalar meanValues = mean(objectPoints); + int nbPts = objectPoints.checkVector(3, CV_64F); + Mat objectPointsCentred = objectPoints - meanValues; + objectPointsCentred = objectPointsCentred.reshape(1, nbPts); + + Mat w, u, vt; + Mat MM = objectPointsCentred.t() * objectPointsCentred; + SVDecomp(MM, w, u, vt); + + return (w.at(2) < w.at(1) * threshold); +} + +static bool approxEqual(double a, double b, double eps) +{ + return std::fabs(a-b) < eps; +} +#endif + void drawFrameAxes(InputOutputArray image, InputArray cameraMatrix, InputArray distCoeffs, InputArray rvec, InputArray tvec, float length, int thickness) { @@ -80,120 +112,24 @@ void drawFrameAxes(InputOutputArray image, InputArray cameraMatrix, InputArray d line(image, imagePoints[0], imagePoints[3], Scalar(255, 0, 0), thickness); } -bool solvePnP( InputArray _opoints, InputArray _ipoints, - InputArray _cameraMatrix, InputArray _distCoeffs, - OutputArray _rvec, OutputArray _tvec, bool useExtrinsicGuess, int flags ) +bool solvePnP( InputArray opoints, InputArray ipoints, + InputArray cameraMatrix, InputArray distCoeffs, + OutputArray rvec, OutputArray tvec, bool useExtrinsicGuess, int flags ) { CV_INSTRUMENT_REGION(); - Mat opoints = _opoints.getMat(), ipoints = _ipoints.getMat(); - int npoints = std::max(opoints.checkVector(3, CV_32F), opoints.checkVector(3, CV_64F)); - CV_Assert( ( (npoints >= 4) || (npoints == 3 && flags == SOLVEPNP_ITERATIVE && useExtrinsicGuess) ) - && npoints == std::max(ipoints.checkVector(2, CV_32F), ipoints.checkVector(2, CV_64F)) ); + vector rvecs, tvecs; + int solutions = solvePnPGeneric(opoints, ipoints, cameraMatrix, distCoeffs, rvecs, tvecs, useExtrinsicGuess, (SolvePnPMethod)flags, rvec, tvec); - Mat rvec, tvec; - if( flags != SOLVEPNP_ITERATIVE ) - useExtrinsicGuess = false; - - if( useExtrinsicGuess ) + if (solutions > 0) { - int rtype = _rvec.type(), ttype = _tvec.type(); - Size rsize = _rvec.size(), tsize = _tvec.size(); - CV_Assert( (rtype == CV_32F || rtype == CV_64F) && - (ttype == CV_32F || ttype == CV_64F) ); - CV_Assert( (rsize == Size(1, 3) || rsize == Size(3, 1)) && - (tsize == Size(1, 3) || tsize == Size(3, 1)) ); + int rdepth = rvec.empty() ? CV_64F : rvec.depth(); + int tdepth = tvec.empty() ? CV_64F : tvec.depth(); + rvecs[0].convertTo(rvec, rdepth); + tvecs[0].convertTo(tvec, tdepth); } - else - { - int mtype = CV_64F; - // use CV_32F if all PnP inputs are CV_32F and outputs are empty - if (_ipoints.depth() == _cameraMatrix.depth() && _ipoints.depth() == _opoints.depth() && - _rvec.empty() && _tvec.empty()) - mtype = _opoints.depth(); - _rvec.create(3, 1, mtype); - _tvec.create(3, 1, mtype); - } - rvec = _rvec.getMat(); - tvec = _tvec.getMat(); - - Mat cameraMatrix0 = _cameraMatrix.getMat(); - Mat distCoeffs0 = _distCoeffs.getMat(); - Mat cameraMatrix = Mat_(cameraMatrix0); - Mat distCoeffs = Mat_(distCoeffs0); - bool result = false; - - if (flags == SOLVEPNP_EPNP || flags == SOLVEPNP_DLS || flags == SOLVEPNP_UPNP) - { - Mat undistortedPoints; - undistortPoints(ipoints, undistortedPoints, cameraMatrix, distCoeffs); - epnp PnP(cameraMatrix, opoints, undistortedPoints); - - Mat R; - PnP.compute_pose(R, tvec); - Rodrigues(R, rvec); - result = true; - } - else if (flags == SOLVEPNP_P3P) - { - CV_Assert( npoints == 4); - Mat undistortedPoints; - undistortPoints(ipoints, undistortedPoints, cameraMatrix, distCoeffs); - p3p P3Psolver(cameraMatrix); - - Mat R; - result = P3Psolver.solve(R, tvec, opoints, undistortedPoints); - if (result) - Rodrigues(R, rvec); - } - else if (flags == SOLVEPNP_AP3P) - { - CV_Assert( npoints == 4); - Mat undistortedPoints; - undistortPoints(ipoints, undistortedPoints, cameraMatrix, distCoeffs); - ap3p P3Psolver(cameraMatrix); - - Mat R; - result = P3Psolver.solve(R, tvec, opoints, undistortedPoints); - if (result) - Rodrigues(R, rvec); - } - else if (flags == SOLVEPNP_ITERATIVE) - { - CvMat c_objectPoints = cvMat(opoints), c_imagePoints = cvMat(ipoints); - CvMat c_cameraMatrix = cvMat(cameraMatrix), c_distCoeffs = cvMat(distCoeffs); - CvMat c_rvec = cvMat(rvec), c_tvec = cvMat(tvec); - cvFindExtrinsicCameraParams2(&c_objectPoints, &c_imagePoints, &c_cameraMatrix, - (c_distCoeffs.rows && c_distCoeffs.cols) ? &c_distCoeffs : 0, - &c_rvec, &c_tvec, useExtrinsicGuess ); - result = true; - } - /*else if (flags == SOLVEPNP_DLS) - { - Mat undistortedPoints; - undistortPoints(ipoints, undistortedPoints, cameraMatrix, distCoeffs); - - dls PnP(opoints, undistortedPoints); - - Mat R, rvec = _rvec.getMat(), tvec = _tvec.getMat(); - bool result = PnP.compute_pose(R, tvec); - if (result) - Rodrigues(R, rvec); - return result; - } - else if (flags == SOLVEPNP_UPNP) - { - upnp PnP(cameraMatrix, opoints, ipoints); - - Mat R, rvec = _rvec.getMat(), tvec = _tvec.getMat(); - PnP.compute_pose(R, tvec); - Rodrigues(R, rvec); - return true; - }*/ - else - CV_Error(CV_StsBadArg, "The flags argument must be one of SOLVEPNP_ITERATIVE, SOLVEPNP_P3P, SOLVEPNP_EPNP or SOLVEPNP_DLS"); - return result; + return solutions > 0; } class PnPRansacCallback CV_FINAL : public PointSetRegistrator::Callback @@ -258,10 +194,10 @@ public: }; bool solvePnPRansac(InputArray _opoints, InputArray _ipoints, - InputArray _cameraMatrix, InputArray _distCoeffs, - OutputArray _rvec, OutputArray _tvec, bool useExtrinsicGuess, - int iterationsCount, float reprojectionError, double confidence, - OutputArray _inliers, int flags) + InputArray _cameraMatrix, InputArray _distCoeffs, + OutputArray _rvec, OutputArray _tvec, bool useExtrinsicGuess, + int iterationsCount, float reprojectionError, double confidence, + OutputArray _inliers, int flags) { CV_INSTRUMENT_REGION(); @@ -410,7 +346,8 @@ int solveP3P( InputArray _opoints, InputArray _ipoints, Mat opoints = _opoints.getMat(), ipoints = _ipoints.getMat(); int npoints = std::max(opoints.checkVector(3, CV_32F), opoints.checkVector(3, CV_64F)); - CV_Assert( npoints == 3 && npoints == std::max(ipoints.checkVector(2, CV_32F), ipoints.checkVector(2, CV_64F)) ); + CV_Assert( npoints == std::max(ipoints.checkVector(2, CV_32F), ipoints.checkVector(2, CV_64F)) ); + CV_Assert( npoints == 3 || npoints == 4 ); CV_Assert( flags == SOLVEPNP_P3P || flags == SOLVEPNP_AP3P ); Mat cameraMatrix0 = _cameraMatrix.getMat(); @@ -420,7 +357,7 @@ int solveP3P( InputArray _opoints, InputArray _ipoints, Mat undistortedPoints; undistortPoints(ipoints, undistortedPoints, cameraMatrix, distCoeffs); - std::vector Rs, ts; + std::vector Rs, ts, rvecs; int solutions = 0; if (flags == SOLVEPNP_P3P) @@ -438,19 +375,91 @@ int solveP3P( InputArray _opoints, InputArray _ipoints, return 0; } - if (_rvecs.needed()) { - _rvecs.create(solutions, 1, CV_64F); + Mat objPts, imgPts; + opoints.convertTo(objPts, CV_64F); + ipoints.convertTo(imgPts, CV_64F); + if (imgPts.cols > 1) + { + imgPts = imgPts.reshape(1); + imgPts = imgPts.t(); } + else + imgPts = imgPts.reshape(1, 2*imgPts.rows); - if (_tvecs.needed()) { - _tvecs.create(solutions, 1, CV_64F); - } - - for (int i = 0; i < solutions; i++) { + vector reproj_errors(solutions); + for (size_t i = 0; i < reproj_errors.size(); i++) + { Mat rvec; Rodrigues(Rs[i], rvec); - _tvecs.getMatRef(i) = ts[i]; - _rvecs.getMatRef(i) = rvec; + rvecs.push_back(rvec); + + Mat projPts; + projectPoints(objPts, rvec, ts[i], _cameraMatrix, _distCoeffs, projPts); + + projPts = projPts.reshape(1, 2*projPts.rows); + Mat err = imgPts - projPts; + + err = err.t() * err; + reproj_errors[i] = err.at(0,0); + } + + //sort the solutions + for (int i = 1; i < solutions; i++) + { + for (int j = i; j > 0 && reproj_errors[j-1] > reproj_errors[j]; j--) + { + std::swap(reproj_errors[j], reproj_errors[j-1]); + std::swap(rvecs[j], rvecs[j-1]); + std::swap(ts[j], ts[j-1]); + } + } + + int depthRot = _rvecs.fixedType() ? _rvecs.depth() : CV_64F; + int depthTrans = _tvecs.fixedType() ? _tvecs.depth() : CV_64F; + _rvecs.create(solutions, 1, CV_MAKETYPE(depthRot, _rvecs.fixedType() && _rvecs.kind() == _InputArray::STD_VECTOR ? 3 : 1)); + _tvecs.create(solutions, 1, CV_MAKETYPE(depthTrans, _tvecs.fixedType() && _tvecs.kind() == _InputArray::STD_VECTOR ? 3 : 1)); + + for (int i = 0; i < solutions; i++) + { + Mat rvec0, tvec0; + if (depthRot == CV_64F) + rvec0 = rvecs[i]; + else + rvecs[i].convertTo(rvec0, depthRot); + + if (depthTrans == CV_64F) + tvec0 = ts[i]; + else + ts[i].convertTo(tvec0, depthTrans); + + if (_rvecs.fixedType() && _rvecs.kind() == _InputArray::STD_VECTOR) + { + Mat rref = _rvecs.getMat_(); + + if (_rvecs.depth() == CV_32F) + rref.at(0,i) = Vec3f(rvec0.at(0,0), rvec0.at(1,0), rvec0.at(2,0)); + else + rref.at(0,i) = Vec3d(rvec0.at(0,0), rvec0.at(1,0), rvec0.at(2,0)); + } + else + { + _rvecs.getMatRef(i) = rvec0; + } + + if (_tvecs.fixedType() && _tvecs.kind() == _InputArray::STD_VECTOR) + { + + Mat tref = _tvecs.getMat_(); + + if (_tvecs.depth() == CV_32F) + tref.at(0,i) = Vec3f(tvec0.at(0,0), tvec0.at(1,0), tvec0.at(2,0)); + else + tref.at(0,i) = Vec3d(tvec0.at(0,0), tvec0.at(1,0), tvec0.at(2,0)); + } + else + { + _tvecs.getMatRef(i) = tvec0; + } } return solutions; @@ -723,4 +732,314 @@ void solvePnPRefineVVS(InputArray _objectPoints, InputArray _imagePoints, solvePnPRefine(_objectPoints, _imagePoints, _cameraMatrix, _distCoeffs, _rvec, _tvec, SOLVEPNP_REFINE_VVS, _criteria, _VVSlambda); } +int solvePnPGeneric( InputArray _opoints, InputArray _ipoints, + InputArray _cameraMatrix, InputArray _distCoeffs, + OutputArrayOfArrays _rvecs, OutputArrayOfArrays _tvecs, + bool useExtrinsicGuess, SolvePnPMethod flags, + InputArray _rvec, InputArray _tvec, + OutputArray reprojectionError) { + CV_INSTRUMENT_REGION(); + + Mat opoints = _opoints.getMat(), ipoints = _ipoints.getMat(); + int npoints = std::max(opoints.checkVector(3, CV_32F), opoints.checkVector(3, CV_64F)); + CV_Assert( ( (npoints >= 4) || (npoints == 3 && flags == SOLVEPNP_ITERATIVE && useExtrinsicGuess) ) + && npoints == std::max(ipoints.checkVector(2, CV_32F), ipoints.checkVector(2, CV_64F)) ); + + if( flags != SOLVEPNP_ITERATIVE ) + useExtrinsicGuess = false; + + if (useExtrinsicGuess) + CV_Assert( !_rvec.empty() && !_tvec.empty() ); + + if( useExtrinsicGuess ) + { + int rtype = _rvec.type(), ttype = _tvec.type(); + Size rsize = _rvec.size(), tsize = _tvec.size(); + CV_Assert( (rtype == CV_32FC1 || rtype == CV_64FC1) && + (ttype == CV_32FC1 || ttype == CV_64FC1) ); + CV_Assert( (rsize == Size(1, 3) || rsize == Size(3, 1)) && + (tsize == Size(1, 3) || tsize == Size(3, 1)) ); + } + + Mat cameraMatrix0 = _cameraMatrix.getMat(); + Mat distCoeffs0 = _distCoeffs.getMat(); + Mat cameraMatrix = Mat_(cameraMatrix0); + Mat distCoeffs = Mat_(distCoeffs0); + + vector vec_rvecs, vec_tvecs; + if (flags == SOLVEPNP_EPNP || flags == SOLVEPNP_DLS || flags == SOLVEPNP_UPNP) + { + Mat undistortedPoints; + undistortPoints(ipoints, undistortedPoints, cameraMatrix, distCoeffs); + epnp PnP(cameraMatrix, opoints, undistortedPoints); + + Mat rvec, tvec, R; + PnP.compute_pose(R, tvec); + Rodrigues(R, rvec); + + vec_rvecs.push_back(rvec); + vec_tvecs.push_back(tvec); + } + else if (flags == SOLVEPNP_P3P || flags == SOLVEPNP_AP3P) + { + vector rvecs, tvecs; + solveP3P(_opoints, _ipoints, _cameraMatrix, _distCoeffs, rvecs, tvecs, flags); + vec_rvecs.insert(vec_rvecs.end(), rvecs.begin(), rvecs.end()); + vec_tvecs.insert(vec_tvecs.end(), tvecs.begin(), tvecs.end()); + } + else if (flags == SOLVEPNP_ITERATIVE) + { + Mat rvec, tvec; + if (useExtrinsicGuess) + { + rvec = _rvec.getMat(); + tvec = _tvec.getMat(); + } + else + { + rvec.create(3, 1, CV_64FC1); + tvec.create(3, 1, CV_64FC1); + } + + CvMat c_objectPoints = cvMat(opoints), c_imagePoints = cvMat(ipoints); + CvMat c_cameraMatrix = cvMat(cameraMatrix), c_distCoeffs = cvMat(distCoeffs); + CvMat c_rvec = cvMat(rvec), c_tvec = cvMat(tvec); + cvFindExtrinsicCameraParams2(&c_objectPoints, &c_imagePoints, &c_cameraMatrix, + (c_distCoeffs.rows && c_distCoeffs.cols) ? &c_distCoeffs : 0, + &c_rvec, &c_tvec, useExtrinsicGuess ); + + vec_rvecs.push_back(rvec); + vec_tvecs.push_back(tvec); + } + else if (flags == SOLVEPNP_IPPE) + { + CV_DbgAssert(isPlanarObjectPoints(opoints, 1e-3)); + Mat undistortedPoints; + undistortPoints(ipoints, undistortedPoints, cameraMatrix, distCoeffs); + + IPPE::PoseSolver poseSolver; + Mat rvec1, tvec1, rvec2, tvec2; + float reprojErr1, reprojErr2; + try + { + poseSolver.solveGeneric(opoints, undistortedPoints, rvec1, tvec1, reprojErr1, rvec2, tvec2, reprojErr2); + + if (reprojErr1 < reprojErr2) + { + vec_rvecs.push_back(rvec1); + vec_tvecs.push_back(tvec1); + + vec_rvecs.push_back(rvec2); + vec_tvecs.push_back(tvec2); + } + else + { + vec_rvecs.push_back(rvec2); + vec_tvecs.push_back(tvec2); + + vec_rvecs.push_back(rvec1); + vec_tvecs.push_back(tvec1); + } + } + catch (...) { } + } + else if (flags == SOLVEPNP_IPPE_SQUARE) + { + CV_Assert(npoints == 4); + +#if defined _DEBUG || defined CV_STATIC_ANALYSIS + double Xs[4][3]; + if (opoints.depth() == CV_32F) + { + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 3; j++) + { + Xs[i][j] = opoints.ptr(0)[i](j); + } + } + } + else + { + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 3; j++) + { + Xs[i][j] = opoints.ptr(0)[i](j); + } + } + } + + const double equalThreshold = 1e-9; + //Z must be zero + for (int i = 0; i < 4; i++) + { + CV_DbgCheck(Xs[i][2], approxEqual(Xs[i][2], 0, equalThreshold), "Z object point coordinate must be zero!"); + } + //Y0 == Y1 && Y2 == Y3 + CV_DbgCheck(Xs[0][1], approxEqual(Xs[0][1], Xs[1][1], equalThreshold), "Object points must be: Y0 == Y1!"); + CV_DbgCheck(Xs[2][1], approxEqual(Xs[2][1], Xs[3][1], equalThreshold), "Object points must be: Y2 == Y3!"); + //X0 == X3 && X1 == X2 + CV_DbgCheck(Xs[0][0], approxEqual(Xs[0][0], Xs[3][0], equalThreshold), "Object points must be: X0 == X3!"); + CV_DbgCheck(Xs[1][0], approxEqual(Xs[1][0], Xs[2][0], equalThreshold), "Object points must be: X1 == X2!"); + //X1 == Y1 && X3 == Y3 + CV_DbgCheck(Xs[1][0], approxEqual(Xs[1][0], Xs[1][1], equalThreshold), "Object points must be: X1 == Y1!"); + CV_DbgCheck(Xs[3][0], approxEqual(Xs[3][0], Xs[3][1], equalThreshold), "Object points must be: X3 == Y3!"); +#endif + + Mat undistortedPoints; + undistortPoints(ipoints, undistortedPoints, cameraMatrix, distCoeffs); + + IPPE::PoseSolver poseSolver; + Mat rvec1, tvec1, rvec2, tvec2; + float reprojErr1, reprojErr2; + try + { + poseSolver.solveSquare(opoints, undistortedPoints, rvec1, tvec1, reprojErr1, rvec2, tvec2, reprojErr2); + + if (reprojErr1 < reprojErr2) + { + vec_rvecs.push_back(rvec1); + vec_tvecs.push_back(tvec1); + + vec_rvecs.push_back(rvec2); + vec_tvecs.push_back(tvec2); + } + else + { + vec_rvecs.push_back(rvec2); + vec_tvecs.push_back(tvec2); + + vec_rvecs.push_back(rvec1); + vec_tvecs.push_back(tvec1); + } + } catch (...) { } + } + /*else if (flags == SOLVEPNP_DLS) + { + Mat undistortedPoints; + undistortPoints(ipoints, undistortedPoints, cameraMatrix, distCoeffs); + + dls PnP(opoints, undistortedPoints); + + Mat rvec, tvec, R; + bool result = PnP.compute_pose(R, tvec); + if (result) + { + Rodrigues(R, rvec); + vec_rvecs.push_back(rvec); + vec_tvecs.push_back(tvec); + } + } + else if (flags == SOLVEPNP_UPNP) + { + upnp PnP(cameraMatrix, opoints, ipoints); + + Mat rvec, tvec, R; + PnP.compute_pose(R, tvec); + Rodrigues(R, rvec); + vec_rvecs.push_back(rvec); + vec_tvecs.push_back(tvec); + }*/ + else + CV_Error(CV_StsBadArg, "The flags argument must be one of SOLVEPNP_ITERATIVE, SOLVEPNP_P3P, SOLVEPNP_EPNP or SOLVEPNP_DLS"); + + CV_Assert(vec_rvecs.size() == vec_tvecs.size()); + + int solutions = static_cast(vec_rvecs.size()); + + int depthRot = _rvecs.fixedType() ? _rvecs.depth() : CV_64F; + int depthTrans = _tvecs.fixedType() ? _tvecs.depth() : CV_64F; + _rvecs.create(solutions, 1, CV_MAKETYPE(depthRot, _rvecs.fixedType() && _rvecs.kind() == _InputArray::STD_VECTOR ? 3 : 1)); + _tvecs.create(solutions, 1, CV_MAKETYPE(depthTrans, _tvecs.fixedType() && _tvecs.kind() == _InputArray::STD_VECTOR ? 3 : 1)); + + for (int i = 0; i < solutions; i++) + { + Mat rvec0, tvec0; + if (depthRot == CV_64F) + rvec0 = vec_rvecs[i]; + else + vec_rvecs[i].convertTo(rvec0, depthRot); + + if (depthTrans == CV_64F) + tvec0 = vec_tvecs[i]; + else + vec_tvecs[i].convertTo(tvec0, depthTrans); + + if (_rvecs.fixedType() && _rvecs.kind() == _InputArray::STD_VECTOR) + { + Mat rref = _rvecs.getMat_(); + + if (_rvecs.depth() == CV_32F) + rref.at(0,i) = Vec3f(rvec0.at(0,0), rvec0.at(1,0), rvec0.at(2,0)); + else + rref.at(0,i) = Vec3d(rvec0.at(0,0), rvec0.at(1,0), rvec0.at(2,0)); + } + else + { + _rvecs.getMatRef(i) = rvec0; + } + + if (_tvecs.fixedType() && _tvecs.kind() == _InputArray::STD_VECTOR) + { + + Mat tref = _tvecs.getMat_(); + + if (_tvecs.depth() == CV_32F) + tref.at(0,i) = Vec3f(tvec0.at(0,0), tvec0.at(1,0), tvec0.at(2,0)); + else + tref.at(0,i) = Vec3d(tvec0.at(0,0), tvec0.at(1,0), tvec0.at(2,0)); + } + else + { + _tvecs.getMatRef(i) = tvec0; + } + } + + if (reprojectionError.needed()) + { + int type = reprojectionError.type(); + reprojectionError.create(solutions, 1, type); + CV_CheckType(reprojectionError.type(), type == CV_32FC1 || type == CV_64FC1, + "Type of reprojectionError must be CV_32FC1 or CV_64FC1!"); + + Mat objectPoints, imagePoints; + if (_opoints.depth() == CV_32F) + { + _opoints.getMat().convertTo(objectPoints, CV_64F); + } + else + { + objectPoints = _opoints.getMat(); + } + if (_ipoints.depth() == CV_32F) + { + _ipoints.getMat().convertTo(imagePoints, CV_64F); + } + else + { + imagePoints = _ipoints.getMat(); + } + + for (size_t i = 0; i < vec_rvecs.size(); i++) + { + vector projectedPoints; + projectPoints(objectPoints, vec_rvecs[i], vec_tvecs[i], cameraMatrix, distCoeffs, projectedPoints); + double rmse = norm(projectedPoints, imagePoints, NORM_L2) / sqrt(2*projectedPoints.size()); + + Mat err = reprojectionError.getMat(); + if (type == CV_32F) + { + err.at(0,static_cast(i)) = static_cast(rmse); + } + else + { + err.at(0,static_cast(i)) = rmse; + } + } + } + + return solutions; +} + } diff --git a/modules/calib3d/test/test_solvepnp_ransac.cpp b/modules/calib3d/test/test_solvepnp_ransac.cpp index adf7758c92..77a5d5df8d 100644 --- a/modules/calib3d/test/test_solvepnp_ransac.cpp +++ b/modules/calib3d/test/test_solvepnp_ransac.cpp @@ -44,10 +44,161 @@ namespace opencv_test { namespace { +//Statistics Helpers +struct ErrorInfo +{ + ErrorInfo(double errT, double errR) : errorTrans(errT), errorRot(errR) + { + } + + bool operator<(const ErrorInfo& e) const + { + return sqrt(errorTrans*errorTrans + errorRot*errorRot) < + sqrt(e.errorTrans*e.errorTrans + e.errorRot*e.errorRot); + } + + double errorTrans; + double errorRot; +}; + +//Try to find the translation and rotation thresholds to achieve a predefined percentage of success. +//Since a success is defined by error_trans < trans_thresh && error_rot < rot_thresh +//this just gives an idea of the values to use +static void findThreshold(const std::vector& v_trans, const std::vector& v_rot, double percentage, + double& transThresh, double& rotThresh) +{ + if (v_trans.empty() || v_rot.empty() || v_trans.size() != v_rot.size()) + { + transThresh = -1; + rotThresh = -1; + return; + } + + std::vector error_info; + error_info.reserve(v_trans.size()); + for (size_t i = 0; i < v_trans.size(); i++) + { + error_info.push_back(ErrorInfo(v_trans[i], v_rot[i])); + } + + std::sort(error_info.begin(), error_info.end()); + size_t idx = static_cast(error_info.size() * percentage); + transThresh = error_info[idx].errorTrans; + rotThresh = error_info[idx].errorRot; +} + +static double getMax(const std::vector& v) +{ + return *std::max_element(v.begin(), v.end()); +} + +static double getMean(const std::vector& v) +{ + if (v.empty()) + { + return 0.0; + } + + double sum = std::accumulate(v.begin(), v.end(), 0.0); + return sum / v.size(); +} + +static double getMedian(const std::vector& v) +{ + if (v.empty()) + { + return 0.0; + } + + std::vector v_copy = v; + size_t size = v_copy.size(); + + size_t n = size / 2; + std::nth_element(v_copy.begin(), v_copy.begin() + n, v_copy.end()); + double val_n = v_copy[n]; + + if (size % 2 == 1) + { + return val_n; + } else + { + std::nth_element(v_copy.begin(), v_copy.begin() + n - 1, v_copy.end()); + return 0.5 * (val_n + v_copy[n - 1]); + } +} + +static void generatePose(const vector& points, Mat& rvec, Mat& tvec, RNG& rng, int nbTrials=10) +{ + const double minVal = 1.0e-3; + const double maxVal = 1.0; + rvec.create(3, 1, CV_64FC1); + tvec.create(3, 1, CV_64FC1); + + bool validPose = false; + for (int trial = 0; trial < nbTrials && !validPose; trial++) + { + for (int i = 0; i < 3; i++) + { + rvec.at(i,0) = rng.uniform(minVal, maxVal); + tvec.at(i,0) = (i == 2) ? rng.uniform(minVal*10, maxVal) : rng.uniform(-maxVal, maxVal); + } + + Mat R; + cv::Rodrigues(rvec, R); + bool positiveDepth = true; + for (size_t i = 0; i < points.size() && positiveDepth; i++) + { + Matx31d objPts(points[i].x, points[i].y, points[i].z); + Mat camPts = R*objPts + tvec; + if (camPts.at(2,0) <= 0) + { + positiveDepth = false; + } + } + validPose = positiveDepth; + } +} + +static void generatePose(const vector& points, Mat& rvec, Mat& tvec, RNG& rng, int nbTrials=10) +{ + vector points_double(points.size()); + + for (size_t i = 0; i < points.size(); i++) + { + points_double[i] = Point3d(points[i].x, points[i].y, points[i].z); + } + + generatePose(points_double, rvec, tvec, rng, nbTrials); +} + +static std::string printMethod(int method) +{ + switch (method) { + case 0: + return "SOLVEPNP_ITERATIVE"; + case 1: + return "SOLVEPNP_EPNP"; + case 2: + return "SOLVEPNP_P3P"; + case 3: + return "SOLVEPNP_DLS (remaped to SOLVEPNP_EPNP)"; + case 4: + return "SOLVEPNP_UPNP (remaped to SOLVEPNP_EPNP)"; + case 5: + return "SOLVEPNP_AP3P"; + case 6: + return "SOLVEPNP_IPPE"; + case 7: + return "SOLVEPNP_IPPE_SQUARE"; + default: + return "Unknown value"; + } +} + class CV_solvePnPRansac_Test : public cvtest::BaseTest { public: - CV_solvePnPRansac_Test() + CV_solvePnPRansac_Test(bool planar_=false, bool planarTag_=false) : planar(planar_), planarTag(planarTag_) { eps[SOLVEPNP_ITERATIVE] = 1.0e-2; eps[SOLVEPNP_EPNP] = 1.0e-2; @@ -61,10 +212,10 @@ public: ~CV_solvePnPRansac_Test() {} protected: void generate3DPointCloud(vector& points, - Point3f pmin = Point3f(-1, -1, 5), - Point3f pmax = Point3f(1, 1, 10)) + Point3f pmin = Point3f(-1, -1, 5), + Point3f pmax = Point3f(1, 1, 10)) { - RNG rng = cv::theRNG(); // fix the seed to use "fixed" input 3D points + RNG& rng = theRNG(); // fix the seed to use "fixed" input 3D points for (size_t i = 0; i < points.size(); i++) { @@ -75,6 +226,44 @@ protected: } } + void generatePlanarPointCloud(vector& points, + Point2f pmin = Point2f(-1, -1), + Point2f pmax = Point2f(1, 1)) + { + RNG& rng = theRNG(); // fix the seed to use "fixed" input 3D points + + if (planarTag) + { + const float squareLength_2 = rng.uniform(0.01f, pmax.x) / 2; + points.clear(); + points.push_back(Point3f(-squareLength_2, squareLength_2, 0)); + points.push_back(Point3f(squareLength_2, squareLength_2, 0)); + points.push_back(Point3f(squareLength_2, -squareLength_2, 0)); + points.push_back(Point3f(-squareLength_2, -squareLength_2, 0)); + } + else + { + Mat rvec_double, tvec_double; + generatePose(points, rvec_double, tvec_double, rng); + + Mat rvec, tvec, R; + rvec_double.convertTo(rvec, CV_32F); + tvec_double.convertTo(tvec, CV_32F); + cv::Rodrigues(rvec, R); + + for (size_t i = 0; i < points.size(); i++) + { + float x = rng.uniform(pmin.x, pmax.x); + float y = rng.uniform(pmin.y, pmax.y); + float z = 0; + + Matx31f pt(x, y, z); + Mat pt_trans = R * pt + tvec; + points[i] = Point3f(pt_trans.at(0,0), pt_trans.at(1,0), pt_trans.at(2,0)); + } + } + } + void generateCameraMatrix(Mat& cameraMatrix, RNG& rng) { const double fcMinVal = 1e-3; @@ -95,32 +284,34 @@ protected: distCoeffs.at(i,0) = rng.uniform(0.0, 1.0e-6); } - void generatePose(Mat& rvec, Mat& tvec, RNG& rng) + virtual bool runTest(RNG& rng, int mode, int method, const vector& points, double& errorTrans, double& errorRot) { - const double minVal = 1.0e-3; - const double maxVal = 1.0; - rvec.create(3, 1, CV_64FC1); - tvec.create(3, 1, CV_64FC1); - for (int i = 0; i < 3; i++) + if ((!planar && method == SOLVEPNP_IPPE) || method == SOLVEPNP_IPPE_SQUARE) { - rvec.at(i,0) = rng.uniform(minVal, maxVal); - tvec.at(i,0) = rng.uniform(minVal, maxVal/10); + return true; } - } - virtual bool runTest(RNG& rng, int mode, int method, const vector& points, const double* epsilon, double& maxError) - { Mat rvec, tvec; vector inliers; Mat trueRvec, trueTvec; Mat intrinsics, distCoeffs; generateCameraMatrix(intrinsics, rng); - if (method == 4) intrinsics.at(1,1) = intrinsics.at(0,0); + //UPnP is mapped to EPnP + //Uncomment this when UPnP is fixed +// if (method == SOLVEPNP_UPNP) +// { +// intrinsics.at(1,1) = intrinsics.at(0,0); +// } if (mode == 0) + { distCoeffs = Mat::zeros(4, 1, CV_64FC1); + } else + { generateDistCoeffs(distCoeffs, rng); - generatePose(trueRvec, trueTvec, rng); + } + + generatePose(points, trueRvec, trueTvec, rng); vector projectedPoints; projectedPoints.resize(points.size()); @@ -138,11 +329,9 @@ protected: bool isTestSuccess = inliers.size() >= points.size()*0.95; double rvecDiff = cvtest::norm(rvec, trueRvec, NORM_L2), tvecDiff = cvtest::norm(tvec, trueTvec, NORM_L2); - isTestSuccess = isTestSuccess && rvecDiff < epsilon[method] && tvecDiff < epsilon[method]; - double error = rvecDiff > tvecDiff ? rvecDiff : tvecDiff; - //cout << error << " " << inliers.size() << " " << eps[method] << endl; - if (error > maxError) - maxError = error; + isTestSuccess = isTestSuccess && rvecDiff < eps[method] && tvecDiff < eps[method]; + errorTrans = tvecDiff; + errorRot = rvecDiff; return isTestSuccess; } @@ -152,68 +341,184 @@ protected: ts->set_failed_test_info(cvtest::TS::OK); vector points, points_dls; - points.resize(pointsCount); - generate3DPointCloud(points); + points.resize(static_cast(pointsCount)); - RNG rng = ts->get_rng(); + if (planar || planarTag) + { + generatePlanarPointCloud(points); + } + else + { + generate3DPointCloud(points); + } + RNG& rng = ts->get_rng(); for (int mode = 0; mode < 2; mode++) { for (int method = 0; method < SOLVEPNP_MAX_COUNT; method++) { - double maxError = 0; + //To get the same input for each methods + RNG rngCopy = rng; + std::vector vec_errorTrans, vec_errorRot; + vec_errorTrans.reserve(static_cast(totalTestsCount)); + vec_errorRot.reserve(static_cast(totalTestsCount)); + int successfulTestsCount = 0; for (int testIndex = 0; testIndex < totalTestsCount; testIndex++) { - if (runTest(rng, mode, method, points, eps, maxError)) + double errorTrans, errorRot; + if (runTest(rngCopy, mode, method, points, errorTrans, errorRot)) { successfulTestsCount++; } + vec_errorTrans.push_back(errorTrans); + vec_errorRot.push_back(errorRot); } + + double maxErrorTrans = getMax(vec_errorTrans); + double maxErrorRot = getMax(vec_errorRot); + double meanErrorTrans = getMean(vec_errorTrans); + double meanErrorRot = getMean(vec_errorRot); + double medianErrorTrans = getMedian(vec_errorTrans); + double medianErrorRot = getMedian(vec_errorRot); + if (successfulTestsCount < 0.7*totalTestsCount) { - ts->printf( cvtest::TS::LOG, "Invalid accuracy for method %d, failed %d tests from %d, maximum error equals %f, distortion mode equals %d\n", - method, totalTestsCount - successfulTestsCount, totalTestsCount, maxError, mode); + ts->printf(cvtest::TS::LOG, "Invalid accuracy for %s, failed %d tests from %d, %s, " + "maxErrT: %f, maxErrR: %f, " + "meanErrT: %f, meanErrR: %f, " + "medErrT: %f, medErrR: %f\n", + printMethod(method).c_str(), totalTestsCount - successfulTestsCount, totalTestsCount, printMode(mode).c_str(), + maxErrorTrans, maxErrorRot, meanErrorTrans, meanErrorRot, medianErrorTrans, medianErrorRot); ts->set_failed_test_info(cvtest::TS::FAIL_BAD_ACCURACY); } - cout << "mode: " << mode << ", method: " << method << " -> " + cout << "mode: " << printMode(mode) << ", method: " << printMethod(method) << " -> " << ((double)successfulTestsCount / totalTestsCount) * 100 << "%" - << " (err < " << maxError << ")" << endl; + << " (maxErrT: " << maxErrorTrans << ", maxErrR: " << maxErrorRot + << ", meanErrT: " << meanErrorTrans << ", meanErrR: " << meanErrorRot + << ", medErrT: " << medianErrorTrans << ", medErrR: " << medianErrorRot << ")" << endl; + double transThres, rotThresh; + findThreshold(vec_errorTrans, vec_errorRot, 0.7, transThres, rotThresh); + cout << "approximate translation threshold for 0.7: " << transThres + << ", approximate rotation threshold for 0.7: " << rotThresh << endl; } + cout << endl; + } + } + std::string printMode(int mode) + { + switch (mode) { + case 0: + return "no distortion"; + case 1: + default: + return "distorsion"; } } double eps[SOLVEPNP_MAX_COUNT]; int totalTestsCount; int pointsCount; + bool planar; + bool planarTag; }; class CV_solvePnP_Test : public CV_solvePnPRansac_Test { public: - CV_solvePnP_Test() + CV_solvePnP_Test(bool planar_=false, bool planarTag_=false) : CV_solvePnPRansac_Test(planar_, planarTag_) { eps[SOLVEPNP_ITERATIVE] = 1.0e-6; eps[SOLVEPNP_EPNP] = 1.0e-6; eps[SOLVEPNP_P3P] = 2.0e-4; eps[SOLVEPNP_AP3P] = 1.0e-4; - eps[SOLVEPNP_DLS] = 1.0e-4; - eps[SOLVEPNP_UPNP] = 1.0e-4; + eps[SOLVEPNP_DLS] = 1.0e-6; //DLS is remapped to EPnP, so we use the same threshold + eps[SOLVEPNP_UPNP] = 1.0e-6; //UPnP is remapped to EPnP, so we use the same threshold + eps[SOLVEPNP_IPPE] = 1.0e-6; + eps[SOLVEPNP_IPPE_SQUARE] = 1.0e-6; + totalTestsCount = 1000; + + if (planar || planarTag) + { + if (planarTag) + { + pointsCount = 4; + } + else + { + pointsCount = 30; + } + } + else + { + pointsCount = 500; + } } ~CV_solvePnP_Test() {} protected: - virtual bool runTest(RNG& rng, int mode, int method, const vector& points, const double* epsilon, double& maxError) + virtual bool runTest(RNG& rng, int mode, int method, const vector& points, double& errorTrans, double& errorRot) { - Mat rvec, tvec; + if ((!planar && (method == SOLVEPNP_IPPE || method == SOLVEPNP_IPPE_SQUARE)) || + (!planarTag && method == SOLVEPNP_IPPE_SQUARE)) + { + errorTrans = -1; + errorRot = -1; + //SOLVEPNP_IPPE and SOLVEPNP_IPPE_SQUARE need planar object + return true; + } + + //Tune thresholds... + double epsilon_trans[SOLVEPNP_MAX_COUNT]; + memcpy(epsilon_trans, eps, SOLVEPNP_MAX_COUNT * sizeof(*epsilon_trans)); + + double epsilon_rot[SOLVEPNP_MAX_COUNT]; + memcpy(epsilon_rot, eps, SOLVEPNP_MAX_COUNT * sizeof(*epsilon_rot)); + + if (planar) + { + if (mode == 0) + { + epsilon_trans[SOLVEPNP_EPNP] = 5.0e-3; + epsilon_trans[SOLVEPNP_DLS] = 5.0e-3; + epsilon_trans[SOLVEPNP_UPNP] = 5.0e-3; + + epsilon_rot[SOLVEPNP_EPNP] = 5.0e-3; + epsilon_rot[SOLVEPNP_DLS] = 5.0e-3; + epsilon_rot[SOLVEPNP_UPNP] = 5.0e-3; + } + else + { + epsilon_trans[SOLVEPNP_ITERATIVE] = 1e-4; + epsilon_trans[SOLVEPNP_EPNP] = 5e-3; + epsilon_trans[SOLVEPNP_DLS] = 5e-3; + epsilon_trans[SOLVEPNP_UPNP] = 5e-3; + epsilon_trans[SOLVEPNP_P3P] = 1e-4; + epsilon_trans[SOLVEPNP_AP3P] = 1e-4; + epsilon_trans[SOLVEPNP_IPPE] = 1e-4; + epsilon_trans[SOLVEPNP_IPPE_SQUARE] = 1e-4; + + epsilon_rot[SOLVEPNP_ITERATIVE] = 1e-4; + epsilon_rot[SOLVEPNP_EPNP] = 5e-3; + epsilon_rot[SOLVEPNP_DLS] = 5e-3; + epsilon_rot[SOLVEPNP_UPNP] = 5e-3; + epsilon_rot[SOLVEPNP_P3P] = 1e-4; + epsilon_rot[SOLVEPNP_AP3P] = 1e-4; + epsilon_rot[SOLVEPNP_IPPE] = 1e-4; + epsilon_rot[SOLVEPNP_IPPE_SQUARE] = 1e-4; + } + } + Mat trueRvec, trueTvec; Mat intrinsics, distCoeffs; generateCameraMatrix(intrinsics, rng); - if (method == SOLVEPNP_DLS) - { - intrinsics.at(1,1) = intrinsics.at(0,0); - } + //UPnP is mapped to EPnP + //Uncomment this when UPnP is fixed +// if (method == SOLVEPNP_UPNP) +// { +// intrinsics.at(1,1) = intrinsics.at(0,0); +// } if (mode == 0) { distCoeffs = Mat::zeros(4, 1, CV_64FC1); @@ -222,7 +527,8 @@ protected: { generateDistCoeffs(distCoeffs, rng); } - generatePose(trueRvec, trueTvec, rng); + + generatePose(points, trueRvec, trueTvec, rng); std::vector opoints; switch(method) @@ -231,9 +537,18 @@ protected: case SOLVEPNP_AP3P: opoints = std::vector(points.begin(), points.begin()+4); break; - case SOLVEPNP_UPNP: - opoints = std::vector(points.begin(), points.begin()+50); - break; + //UPnP is mapped to EPnP + //Uncomment this when UPnP is fixed +// case SOLVEPNP_UPNP: +// if (points.size() > 50) +// { +// opoints = std::vector(points.begin(), points.begin()+50); +// } +// else +// { +// opoints = points; +// } +// break; default: opoints = points; break; @@ -243,20 +558,19 @@ protected: projectedPoints.resize(opoints.size()); projectPoints(opoints, trueRvec, trueTvec, intrinsics, distCoeffs, projectedPoints); + Mat rvec, tvec; bool isEstimateSuccess = solvePnP(opoints, projectedPoints, intrinsics, distCoeffs, rvec, tvec, false, method); - if (isEstimateSuccess == false) + + if (!isEstimateSuccess) { - return isEstimateSuccess; + return false; } double rvecDiff = cvtest::norm(rvec, trueRvec, NORM_L2), tvecDiff = cvtest::norm(tvec, trueTvec, NORM_L2); - bool isTestSuccess = rvecDiff < epsilon[method] && tvecDiff < epsilon[method]; + bool isTestSuccess = rvecDiff < epsilon_rot[method] && tvecDiff < epsilon_trans[method]; - double error = rvecDiff > tvecDiff ? rvecDiff : tvecDiff; - if (error > maxError) - { - maxError = error; - } + errorTrans = tvecDiff; + errorRot = rvecDiff; return isTestSuccess; } @@ -264,95 +578,129 @@ protected: class CV_solveP3P_Test : public CV_solvePnPRansac_Test { - public: - CV_solveP3P_Test() - { - eps[SOLVEPNP_P3P] = 2.0e-4; - eps[SOLVEPNP_AP3P] = 1.0e-4; - totalTestsCount = 1000; - } - - ~CV_solveP3P_Test() {} - protected: - virtual bool runTest(RNG& rng, int mode, int method, const vector& points, const double* epsilon, double& maxError) - { - std::vector rvecs, tvecs; - Mat trueRvec, trueTvec; - Mat intrinsics, distCoeffs; - generateCameraMatrix(intrinsics, rng); - if (mode == 0) - distCoeffs = Mat::zeros(4, 1, CV_64FC1); - else - generateDistCoeffs(distCoeffs, rng); - generatePose(trueRvec, trueTvec, rng); - - std::vector opoints; - opoints = std::vector(points.begin(), points.begin()+3); - - vector projectedPoints; - projectedPoints.resize(opoints.size()); - projectPoints(opoints, trueRvec, trueTvec, intrinsics, distCoeffs, projectedPoints); - - int num_of_solutions = solveP3P(opoints, projectedPoints, intrinsics, distCoeffs, rvecs, tvecs, method); - if (num_of_solutions != (int) rvecs.size() || num_of_solutions != (int) tvecs.size() || num_of_solutions == 0) - return false; - - bool isTestSuccess = false; - double error = DBL_MAX; - for (unsigned int i = 0; i < rvecs.size() && !isTestSuccess; ++i) { - double rvecDiff = cvtest::norm(rvecs[i], trueRvec, NORM_L2); - double tvecDiff = cvtest::norm(tvecs[i], trueTvec, NORM_L2); - isTestSuccess = rvecDiff < epsilon[method] && tvecDiff < epsilon[method]; - error = std::min(error, std::max(rvecDiff, tvecDiff)); - } - - if (error > maxError) - maxError = error; - - return isTestSuccess; - } - - virtual void run(int) - { - ts->set_failed_test_info(cvtest::TS::OK); - - vector points; - points.resize(pointsCount); - generate3DPointCloud(points); - - const int methodsCount = 2; - int methods[] = {SOLVEPNP_P3P, SOLVEPNP_AP3P}; - RNG rng = ts->get_rng(); - - for (int mode = 0; mode < 2; mode++) +public: + CV_solveP3P_Test() { - for (int method = 0; method < methodsCount; method++) - { - double maxError = 0; - int successfulTestsCount = 0; - for (int testIndex = 0; testIndex < totalTestsCount; testIndex++) - { - if (runTest(rng, mode, methods[method], points, eps, maxError)) - successfulTestsCount++; - } - if (successfulTestsCount < 0.7*totalTestsCount) - { - ts->printf( cvtest::TS::LOG, "Invalid accuracy for method %d, failed %d tests from %d, maximum error equals %f, distortion mode equals %d\n", - method, totalTestsCount - successfulTestsCount, totalTestsCount, maxError, mode); - ts->set_failed_test_info(cvtest::TS::FAIL_BAD_ACCURACY); - } - cout << "mode: " << mode << ", method: " << method << " -> " - << ((double)successfulTestsCount / totalTestsCount) * 100 << "%" - << " (err < " << maxError << ")" << endl; - } + eps[SOLVEPNP_P3P] = 2.0e-4; + eps[SOLVEPNP_AP3P] = 1.0e-4; + totalTestsCount = 1000; + } + + ~CV_solveP3P_Test() {} +protected: + virtual bool runTest(RNG& rng, int mode, int method, const vector& points, double& errorTrans, double& errorRot) + { + std::vector rvecs, tvecs; + Mat trueRvec, trueTvec; + Mat intrinsics, distCoeffs; + generateCameraMatrix(intrinsics, rng); + if (mode == 0) + { + distCoeffs = Mat::zeros(4, 1, CV_64FC1); + } + else + { + generateDistCoeffs(distCoeffs, rng); + } + generatePose(points, trueRvec, trueTvec, rng); + + std::vector opoints; + opoints = std::vector(points.begin(), points.begin()+3); + + vector projectedPoints; + projectedPoints.resize(opoints.size()); + projectPoints(opoints, trueRvec, trueTvec, intrinsics, distCoeffs, projectedPoints); + + int num_of_solutions = solveP3P(opoints, projectedPoints, intrinsics, distCoeffs, rvecs, tvecs, method); + if (num_of_solutions != (int) rvecs.size() || num_of_solutions != (int) tvecs.size() || num_of_solutions == 0) + { + return false; + } + + bool isTestSuccess = false; + for (size_t i = 0; i < rvecs.size() && !isTestSuccess; i++) { + double rvecDiff = cvtest::norm(rvecs[i], trueRvec, NORM_L2); + double tvecDiff = cvtest::norm(tvecs[i], trueTvec, NORM_L2); + isTestSuccess = rvecDiff < eps[method] && tvecDiff < eps[method]; + + errorTrans = std::min(errorTrans, tvecDiff); + errorRot = std::min(errorRot, rvecDiff); + } + + return isTestSuccess; + } + + virtual void run(int) + { + ts->set_failed_test_info(cvtest::TS::OK); + + vector points; + points.resize(static_cast(pointsCount)); + generate3DPointCloud(points); + + const int methodsCount = 2; + int methods[] = {SOLVEPNP_P3P, SOLVEPNP_AP3P}; + RNG rng = ts->get_rng(); + + for (int mode = 0; mode < 2; mode++) + { + //To get the same input for each methods + RNG rngCopy = rng; + for (int method = 0; method < methodsCount; method++) + { + std::vector vec_errorTrans, vec_errorRot; + vec_errorTrans.reserve(static_cast(totalTestsCount)); + vec_errorRot.reserve(static_cast(totalTestsCount)); + + int successfulTestsCount = 0; + for (int testIndex = 0; testIndex < totalTestsCount; testIndex++) + { + double errorTrans = 0, errorRot = 0; + if (runTest(rngCopy, mode, methods[method], points, errorTrans, errorRot)) + { + successfulTestsCount++; + } + vec_errorTrans.push_back(errorTrans); + vec_errorRot.push_back(errorRot); + } + + double maxErrorTrans = getMax(vec_errorTrans); + double maxErrorRot = getMax(vec_errorRot); + double meanErrorTrans = getMean(vec_errorTrans); + double meanErrorRot = getMean(vec_errorRot); + double medianErrorTrans = getMedian(vec_errorTrans); + double medianErrorRot = getMedian(vec_errorRot); + + if (successfulTestsCount < 0.7*totalTestsCount) + { + ts->printf(cvtest::TS::LOG, "Invalid accuracy for %s, failed %d tests from %d, %s, " + "maxErrT: %f, maxErrR: %f, " + "meanErrT: %f, meanErrR: %f, " + "medErrT: %f, medErrR: %f\n", + printMethod(methods[method]).c_str(), totalTestsCount - successfulTestsCount, totalTestsCount, printMode(mode).c_str(), + maxErrorTrans, maxErrorRot, meanErrorTrans, meanErrorRot, medianErrorTrans, medianErrorRot); + ts->set_failed_test_info(cvtest::TS::FAIL_BAD_ACCURACY); + } + cout << "mode: " << printMode(mode) << ", method: " << printMethod(methods[method]) << " -> " + << ((double)successfulTestsCount / totalTestsCount) * 100 << "%" + << " (maxErrT: " << maxErrorTrans << ", maxErrR: " << maxErrorRot + << ", meanErrT: " << meanErrorTrans << ", meanErrR: " << meanErrorRot + << ", medErrT: " << medianErrorTrans << ", medErrR: " << medianErrorRot << ")" << endl; + double transThres, rotThresh; + findThreshold(vec_errorTrans, vec_errorRot, 0.7, transThres, rotThresh); + cout << "approximate translation threshold for 0.7: " << transThres + << ", approximate rotation threshold for 0.7: " << rotThresh << endl; + } + } } - } }; TEST(Calib3d_SolveP3P, accuracy) { CV_solveP3P_Test test; test.safe_run();} TEST(Calib3d_SolvePnPRansac, accuracy) { CV_solvePnPRansac_Test test; test.safe_run(); } TEST(Calib3d_SolvePnP, accuracy) { CV_solvePnP_Test test; test.safe_run(); } +TEST(Calib3d_SolvePnP, accuracy_planar) { CV_solvePnP_Test test(true); test.safe_run(); } +TEST(Calib3d_SolvePnP, accuracy_planar_tag) { CV_solvePnP_Test test(true, true); test.safe_run(); } TEST(Calib3d_SolvePnPRansac, concurrency) { @@ -367,6 +715,7 @@ TEST(Calib3d_SolvePnPRansac, concurrency) camera_mat.at(1, 0) = 0.f; camera_mat.at(2, 0) = 0.f; camera_mat.at(2, 1) = 0.f; + camera_mat.at(2, 2) = 1.f; Mat dist_coef(1, 8, CV_32F, cv::Scalar::all(0)); @@ -420,7 +769,7 @@ TEST(Calib3d_SolvePnPRansac, input_type) { const int numPoints = 10; Matx33d intrinsics(5.4794130238156129e+002, 0., 2.9835545700043139e+002, 0., - 5.4817724002728005e+002, 2.3062194051986233e+002, 0., 0., 1.); + 5.4817724002728005e+002, 2.3062194051986233e+002, 0., 0., 1.); std::vector points3d; std::vector points2d; @@ -455,7 +804,7 @@ TEST(Calib3d_SolvePnPRansac, input_type) EXPECT_LE(cvtest::norm(t1, t4, NORM_INF), 1e-6); } -TEST(Calib3d_SolvePnP, double_support) +TEST(Calib3d_SolvePnPRansac, double_support) { Matx33d intrinsics(5.4794130238156129e+002, 0., 2.9835545700043139e+002, 0., 5.4817724002728005e+002, 2.3062194051986233e+002, 0., 0., 1.); @@ -466,15 +815,15 @@ TEST(Calib3d_SolvePnP, double_support) for (int i = 0; i < 10 ; i+=2) { points3d.push_back(cv::Point3d(5+i, 3, 2)); - points3dF.push_back(cv::Point3d(5+i, 3, 2)); + points3dF.push_back(cv::Point3f(static_cast(5+i), 3, 2)); points3d.push_back(cv::Point3d(5+i, 3+i, 2+i)); - points3dF.push_back(cv::Point3d(5+i, 3+i, 2+i)); + points3dF.push_back(cv::Point3f(static_cast(5+i), static_cast(3+i), static_cast(2+i))); points2d.push_back(cv::Point2d(0, i)); - points2dF.push_back(cv::Point2d(0, i)); + points2dF.push_back(cv::Point2f(0, static_cast(i))); points2d.push_back(cv::Point2d(-i, i)); - points2dF.push_back(cv::Point2d(-i, i)); + points2dF.push_back(cv::Point2f(static_cast(-i), static_cast(i))); } - Mat R,t, RF, tF; + Mat R, t, RF, tF; vector inliers; solvePnPRansac(points3dF, points2dF, intrinsics, cv::Mat(), RF, tF, true, 100, 8.f, 0.999, inliers, cv::SOLVEPNP_P3P); @@ -484,6 +833,367 @@ TEST(Calib3d_SolvePnP, double_support) EXPECT_LE(cvtest::norm(t, Mat_(tF), NORM_INF), 1e-3); } +TEST(Calib3d_SolvePnP, input_type) +{ + Matx33d intrinsics(5.4794130238156129e+002, 0., 2.9835545700043139e+002, 0., + 5.4817724002728005e+002, 2.3062194051986233e+002, 0., 0., 1.); + vector points3d_; + vector points3dF_; + //Cube + const float l = -0.1f; + //Front face + points3d_.push_back(Point3d(-l, -l, -l)); + points3dF_.push_back(Point3f(-l, -l, -l)); + points3d_.push_back(Point3d(l, -l, -l)); + points3dF_.push_back(Point3f(l, -l, -l)); + points3d_.push_back(Point3d(l, l, -l)); + points3dF_.push_back(Point3f(l, l, -l)); + points3d_.push_back(Point3d(-l, l, -l)); + points3dF_.push_back(Point3f(-l, l, -l)); + //Back face + points3d_.push_back(Point3d(-l, -l, l)); + points3dF_.push_back(Point3f(-l, -l, l)); + points3d_.push_back(Point3d(l, -l, l)); + points3dF_.push_back(Point3f(l, -l, l)); + points3d_.push_back(Point3d(l, l, l)); + points3dF_.push_back(Point3f(l, l, l)); + points3d_.push_back(Point3d(-l, l, l)); + points3dF_.push_back(Point3f(-l, l, l)); + + Mat trueRvec = (Mat_(3,1) << 0.1, -0.25, 0.467); + Mat trueTvec = (Mat_(3,1) << -0.21, 0.12, 0.746); + + for (int method = 0; method < SOLVEPNP_MAX_COUNT; method++) + { + vector points3d; + vector points2d; + vector points3dF; + vector points2dF; + + if (method == SOLVEPNP_IPPE || method == SOLVEPNP_IPPE_SQUARE) + { + const float tagSize_2 = 0.05f / 2; + points3d.push_back(Point3d(-tagSize_2, tagSize_2, 0)); + points3d.push_back(Point3d( tagSize_2, tagSize_2, 0)); + points3d.push_back(Point3d( tagSize_2, -tagSize_2, 0)); + points3d.push_back(Point3d(-tagSize_2, -tagSize_2, 0)); + + points3dF.push_back(Point3f(-tagSize_2, tagSize_2, 0)); + points3dF.push_back(Point3f( tagSize_2, tagSize_2, 0)); + points3dF.push_back(Point3f( tagSize_2, -tagSize_2, 0)); + points3dF.push_back(Point3f(-tagSize_2, -tagSize_2, 0)); + } + else if (method == SOLVEPNP_P3P || method == SOLVEPNP_AP3P) + { + points3d = vector(points3d_.begin(), points3d_.begin()+4); + points3dF = vector(points3dF_.begin(), points3dF_.begin()+4); + } + else + { + points3d = points3d_; + points3dF = points3dF_; + } + + projectPoints(points3d, trueRvec, trueTvec, intrinsics, noArray(), points2d); + projectPoints(points3dF, trueRvec, trueTvec, intrinsics, noArray(), points2dF); + + //solvePnP + { + Mat R, t, RF, tF; + + solvePnP(points3dF, points2dF, Matx33f(intrinsics), Mat(), RF, tF, false, method); + solvePnP(points3d, points2d, intrinsics, Mat(), R, t, false, method); + + //By default rvec and tvec must be returned in double precision + EXPECT_EQ(RF.type(), tF.type()); + EXPECT_EQ(RF.type(), CV_64FC1); + + EXPECT_EQ(R.type(), t.type()); + EXPECT_EQ(R.type(), CV_64FC1); + + EXPECT_LE(cvtest::norm(R, RF, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(t, tF, NORM_INF), 1e-3); + + EXPECT_LE(cvtest::norm(trueRvec, R, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueTvec, t, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueRvec, RF, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueTvec, tF, NORM_INF), 1e-3); + } + { + Mat R1, t1, R2, t2; + + solvePnP(points3dF, points2d, intrinsics, Mat(), R1, t1, false, method); + solvePnP(points3d, points2dF, intrinsics, Mat(), R2, t2, false, method); + + //By default rvec and tvec must be returned in double precision + EXPECT_EQ(R1.type(), t1.type()); + EXPECT_EQ(R1.type(), CV_64FC1); + + EXPECT_EQ(R2.type(), t2.type()); + EXPECT_EQ(R2.type(), CV_64FC1); + + EXPECT_LE(cvtest::norm(R1, R2, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(t1, t2, NORM_INF), 1e-3); + + EXPECT_LE(cvtest::norm(trueRvec, R1, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueTvec, t1, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueRvec, R2, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueTvec, t2, NORM_INF), 1e-3); + } + { + Mat R1(3,1,CV_32FC1), t1(3,1,CV_64FC1); + Mat R2(3,1,CV_64FC1), t2(3,1,CV_32FC1); + + solvePnP(points3dF, points2d, intrinsics, Mat(), R1, t1, false, method); + solvePnP(points3d, points2dF, intrinsics, Mat(), R2, t2, false, method); + + //If not null, rvec and tvec must be returned in the same precision + EXPECT_EQ(R1.type(), CV_32FC1); + EXPECT_EQ(t1.type(), CV_64FC1); + + EXPECT_EQ(R2.type(), CV_64FC1); + EXPECT_EQ(t2.type(), CV_32FC1); + + EXPECT_LE(cvtest::norm(Mat_(R1), R2, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(t1, Mat_(t2), NORM_INF), 1e-3); + + EXPECT_LE(cvtest::norm(trueRvec, Mat_(R1), NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueTvec, t1, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueRvec, R2, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueTvec, Mat_(t2), NORM_INF), 1e-3); + } + { + Matx31f R1, t2; + Matx31d R2, t1; + + solvePnP(points3dF, points2d, intrinsics, Mat(), R1, t1, false, method); + solvePnP(points3d, points2dF, intrinsics, Mat(), R2, t2, false, method); + + Matx31d R1d(R1(0), R1(1), R1(2)); + Matx31d t2d(t2(0), t2(1), t2(2)); + + EXPECT_LE(cvtest::norm(R1d, R2, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(t1, t2d, NORM_INF), 1e-3); + + EXPECT_LE(cvtest::norm(trueRvec, R1d, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueTvec, t1, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueRvec, R2, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueTvec, t2d, NORM_INF), 1e-3); + } + + //solvePnPGeneric + { + vector Rs, ts, RFs, tFs; + + int res1 = solvePnPGeneric(points3dF, points2dF, Matx33f(intrinsics), Mat(), RFs, tFs, false, (SolvePnPMethod)method); + int res2 = solvePnPGeneric(points3d, points2d, intrinsics, Mat(), Rs, ts, false, (SolvePnPMethod)method); + + EXPECT_GT(res1, 0); + EXPECT_GT(res2, 0); + + Mat R = Rs.front(), t = ts.front(), RF = RFs.front(), tF = tFs.front(); + + //By default rvecs and tvecs must be returned in double precision + EXPECT_EQ(RF.type(), tF.type()); + EXPECT_EQ(RF.type(), CV_64FC1); + + EXPECT_EQ(R.type(), t.type()); + EXPECT_EQ(R.type(), CV_64FC1); + + EXPECT_LE(cvtest::norm(R, RF, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(t, tF, NORM_INF), 1e-3); + + EXPECT_LE(cvtest::norm(trueRvec, R, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueTvec, t, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueRvec, RF, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueTvec, tF, NORM_INF), 1e-3); + } + { + vector R1s, t1s, R2s, t2s; + + int res1 = solvePnPGeneric(points3dF, points2d, intrinsics, Mat(), R1s, t1s, false, (SolvePnPMethod)method); + int res2 = solvePnPGeneric(points3d, points2dF, intrinsics, Mat(), R2s, t2s, false, (SolvePnPMethod)method); + + EXPECT_GT(res1, 0); + EXPECT_GT(res2, 0); + + Mat R1 = R1s.front(), t1 = t1s.front(), R2 = R2s.front(), t2 = t2s.front(); + + //By default rvecs and tvecs must be returned in double precision + EXPECT_EQ(R1.type(), t1.type()); + EXPECT_EQ(R1.type(), CV_64FC1); + + EXPECT_EQ(R2.type(), t2.type()); + EXPECT_EQ(R2.type(), CV_64FC1); + + EXPECT_LE(cvtest::norm(R1, R2, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(t1, t2, NORM_INF), 1e-3); + + EXPECT_LE(cvtest::norm(trueRvec, R1, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueTvec, t1, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueRvec, R2, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueTvec, t2, NORM_INF), 1e-3); + } + { + vector > R1s, t2s; + vector > R2s, t1s; + + int res1 = solvePnPGeneric(points3dF, points2d, intrinsics, Mat(), R1s, t1s, false, (SolvePnPMethod)method); + int res2 = solvePnPGeneric(points3d, points2dF, intrinsics, Mat(), R2s, t2s, false, (SolvePnPMethod)method); + + EXPECT_GT(res1, 0); + EXPECT_GT(res2, 0); + + Mat R1 = R1s.front(), t1 = t1s.front(); + Mat R2 = R2s.front(), t2 = t2s.front(); + + //If not null, rvecs and tvecs must be returned in the same precision + EXPECT_EQ(R1.type(), CV_32FC1); + EXPECT_EQ(t1.type(), CV_64FC1); + + EXPECT_EQ(R2.type(), CV_64FC1); + EXPECT_EQ(t2.type(), CV_32FC1); + + EXPECT_LE(cvtest::norm(Mat_(R1), R2, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(t1, Mat_(t2), NORM_INF), 1e-3); + + EXPECT_LE(cvtest::norm(trueRvec, Mat_(R1), NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueTvec, t1, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueRvec, R2, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueTvec, Mat_(t2), NORM_INF), 1e-3); + } + { + vector R1s, t2s; + vector R2s, t1s; + + int res1 = solvePnPGeneric(points3dF, points2d, intrinsics, Mat(), R1s, t1s, false, (SolvePnPMethod)method); + int res2 = solvePnPGeneric(points3d, points2dF, intrinsics, Mat(), R2s, t2s, false, (SolvePnPMethod)method); + + EXPECT_GT(res1, 0); + EXPECT_GT(res2, 0); + + Matx31f R1 = R1s.front(), t2 = t2s.front(); + Matx31d R2 = R2s.front(), t1 = t1s.front(); + Matx31d R1d(R1(0), R1(1), R1(2)), t2d(t2(0), t2(1), t2(2)); + + EXPECT_LE(cvtest::norm(R1d, R2, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(t1, t2d, NORM_INF), 1e-3); + + EXPECT_LE(cvtest::norm(trueRvec, R1d, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueTvec, t1, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueRvec, R2, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueTvec, t2d, NORM_INF), 1e-3); + } + + if (method == SOLVEPNP_P3P || method == SOLVEPNP_AP3P) + { + //solveP3P + { + vector Rs, ts, RFs, tFs; + + int res1 = solveP3P(points3dF, points2dF, Matx33f(intrinsics), Mat(), RFs, tFs, (SolvePnPMethod)method); + int res2 = solveP3P(points3d, points2d, intrinsics, Mat(), Rs, ts, (SolvePnPMethod)method); + + EXPECT_GT(res1, 0); + EXPECT_GT(res2, 0); + + Mat R = Rs.front(), t = ts.front(), RF = RFs.front(), tF = tFs.front(); + + //By default rvecs and tvecs must be returned in double precision + EXPECT_EQ(RF.type(), tF.type()); + EXPECT_EQ(RF.type(), CV_64FC1); + + EXPECT_EQ(R.type(), t.type()); + EXPECT_EQ(R.type(), CV_64FC1); + + EXPECT_LE(cvtest::norm(R, RF, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(t, tF, NORM_INF), 1e-3); + + EXPECT_LE(cvtest::norm(trueRvec, R, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueTvec, t, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueRvec, RF, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueTvec, tF, NORM_INF), 1e-3); + } + { + vector R1s, t1s, R2s, t2s; + + int res1 = solveP3P(points3dF, points2d, intrinsics, Mat(), R1s, t1s, (SolvePnPMethod)method); + int res2 = solveP3P(points3d, points2dF, intrinsics, Mat(), R2s, t2s, (SolvePnPMethod)method); + + EXPECT_GT(res1, 0); + EXPECT_GT(res2, 0); + + Mat R1 = R1s.front(), t1 = t1s.front(), R2 = R2s.front(), t2 = t2s.front(); + + //By default rvecs and tvecs must be returned in double precision + EXPECT_EQ(R1.type(), t1.type()); + EXPECT_EQ(R1.type(), CV_64FC1); + + EXPECT_EQ(R2.type(), t2.type()); + EXPECT_EQ(R2.type(), CV_64FC1); + + EXPECT_LE(cvtest::norm(R1, R2, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(t1, t2, NORM_INF), 1e-3); + + EXPECT_LE(cvtest::norm(trueRvec, R1, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueTvec, t1, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueRvec, R2, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueTvec, t2, NORM_INF), 1e-3); + } + { + vector > R1s, t2s; + vector > R2s, t1s; + + int res1 = solveP3P(points3dF, points2d, intrinsics, Mat(), R1s, t1s, (SolvePnPMethod)method); + int res2 = solveP3P(points3d, points2dF, intrinsics, Mat(), R2s, t2s, (SolvePnPMethod)method); + + EXPECT_GT(res1, 0); + EXPECT_GT(res2, 0); + + Mat R1 = R1s.front(), t1 = t1s.front(); + Mat R2 = R2s.front(), t2 = t2s.front(); + + //If not null, rvecs and tvecs must be returned in the same precision + EXPECT_EQ(R1.type(), CV_32FC1); + EXPECT_EQ(t1.type(), CV_64FC1); + + EXPECT_EQ(R2.type(), CV_64FC1); + EXPECT_EQ(t2.type(), CV_32FC1); + + EXPECT_LE(cvtest::norm(Mat_(R1), R2, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(t1, Mat_(t2), NORM_INF), 1e-3); + + EXPECT_LE(cvtest::norm(trueRvec, Mat_(R1), NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueTvec, t1, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueRvec, R2, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueTvec, Mat_(t2), NORM_INF), 1e-3); + } + { + vector R1s, t2s; + vector R2s, t1s; + + int res1 = solveP3P(points3dF, points2d, intrinsics, Mat(), R1s, t1s, (SolvePnPMethod)method); + int res2 = solveP3P(points3d, points2dF, intrinsics, Mat(), R2s, t2s, (SolvePnPMethod)method); + + EXPECT_GT(res1, 0); + EXPECT_GT(res2, 0); + + Matx31f R1 = R1s.front(), t2 = t2s.front(); + Matx31d R2 = R2s.front(), t1 = t1s.front(); + Matx31d R1d(R1(0), R1(1), R1(2)), t2d(t2(0), t2(1), t2(2)); + + EXPECT_LE(cvtest::norm(R1d, R2, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(t1, t2d, NORM_INF), 1e-3); + + EXPECT_LE(cvtest::norm(trueRvec, R1d, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueTvec, t1, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueRvec, R2, NORM_INF), 1e-3); + EXPECT_LE(cvtest::norm(trueTvec, t2d, NORM_INF), 1e-3); + } + } + } +} + TEST(Calib3d_SolvePnP, translation) { Mat cameraIntrinsic = Mat::eye(3,3, CV_32FC1); @@ -548,13 +1258,16 @@ TEST(Calib3d_SolvePnP, iterativeInitialGuess3pts) solvePnP(p3d, p2d, intrinsics, noArray(), rvec_est, tvec_est, true, SOLVEPNP_ITERATIVE); - std::cout << "rvec_ground_truth: " << rvec_ground_truth.t() << std::endl; - std::cout << "rvec_est: " << rvec_est.t() << std::endl; - std::cout << "tvec_ground_truth: " << tvec_ground_truth.t() << std::endl; - std::cout << "tvec_est: " << tvec_est.t() << std::endl; + cout << "rvec_ground_truth: " << rvec_ground_truth.t() << std::endl; + cout << "rvec_est: " << rvec_est.t() << std::endl; + cout << "tvec_ground_truth: " << tvec_ground_truth.t() << std::endl; + cout << "tvec_est: " << tvec_est.t() << std::endl; EXPECT_LE(cvtest::norm(rvec_ground_truth, rvec_est, NORM_INF), 1e-6); EXPECT_LE(cvtest::norm(tvec_ground_truth, tvec_est, NORM_INF), 1e-6); + + EXPECT_EQ(rvec_est.type(), CV_64FC1); + EXPECT_EQ(tvec_est.type(), CV_64FC1); } { @@ -579,13 +1292,230 @@ TEST(Calib3d_SolvePnP, iterativeInitialGuess3pts) solvePnP(p3d, p2d, intrinsics, noArray(), rvec_est, tvec_est, true, SOLVEPNP_ITERATIVE); - std::cout << "rvec_ground_truth: " << rvec_ground_truth.t() << std::endl; - std::cout << "rvec_est: " << rvec_est.t() << std::endl; - std::cout << "tvec_ground_truth: " << tvec_ground_truth.t() << std::endl; - std::cout << "tvec_est: " << tvec_est.t() << std::endl; + cout << "rvec_ground_truth: " << rvec_ground_truth.t() << std::endl; + cout << "rvec_est: " << rvec_est.t() << std::endl; + cout << "tvec_ground_truth: " << tvec_ground_truth.t() << std::endl; + cout << "tvec_est: " << tvec_est.t() << std::endl; EXPECT_LE(cvtest::norm(rvec_ground_truth, rvec_est, NORM_INF), 1e-6); EXPECT_LE(cvtest::norm(tvec_ground_truth, tvec_est, NORM_INF), 1e-6); + + EXPECT_EQ(rvec_est.type(), CV_32FC1); + EXPECT_EQ(tvec_est.type(), CV_32FC1); + } +} + +TEST(Calib3d_SolvePnP, iterativeInitialGuess) +{ + { + Matx33d intrinsics(605.4, 0.0, 317.35, + 0.0, 601.2, 242.63, + 0.0, 0.0, 1.0); + + double L = 0.1; + vector p3d; + p3d.push_back(Point3d(-L, -L, 0.0)); + p3d.push_back(Point3d(L, -L, 0.0)); + p3d.push_back(Point3d(L, L, 0.0)); + p3d.push_back(Point3d(-L, L, L/2)); + p3d.push_back(Point3d(0, 0, -L/2)); + + Mat rvec_ground_truth = (Mat_(3,1) << 0.3, -0.2, 0.75); + Mat tvec_ground_truth = (Mat_(3,1) << 0.15, -0.2, 1.5); + + vector p2d; + projectPoints(p3d, rvec_ground_truth, tvec_ground_truth, intrinsics, noArray(), p2d); + + Mat rvec_est = (Mat_(3,1) << 0.1, -0.1, 0.1); + Mat tvec_est = (Mat_(3,1) << 0.0, -0.5, 1.0); + + solvePnP(p3d, p2d, intrinsics, noArray(), rvec_est, tvec_est, true, SOLVEPNP_ITERATIVE); + + cout << "rvec_ground_truth: " << rvec_ground_truth.t() << std::endl; + cout << "rvec_est: " << rvec_est.t() << std::endl; + cout << "tvec_ground_truth: " << tvec_ground_truth.t() << std::endl; + cout << "tvec_est: " << tvec_est.t() << std::endl; + + EXPECT_LE(cvtest::norm(rvec_ground_truth, rvec_est, NORM_INF), 1e-6); + EXPECT_LE(cvtest::norm(tvec_ground_truth, tvec_est, NORM_INF), 1e-6); + + EXPECT_EQ(rvec_est.type(), CV_64FC1); + EXPECT_EQ(tvec_est.type(), CV_64FC1); + } + + { + Matx33f intrinsics(605.4f, 0.0f, 317.35f, + 0.0f, 601.2f, 242.63f, + 0.0f, 0.0f, 1.0f); + + float L = 0.1f; + vector p3d; + p3d.push_back(Point3f(-L, -L, 0.0f)); + p3d.push_back(Point3f(L, -L, 0.0f)); + p3d.push_back(Point3f(L, L, 0.0f)); + p3d.push_back(Point3f(-L, L, L/2)); + p3d.push_back(Point3f(0, 0, -L/2)); + + Mat rvec_ground_truth = (Mat_(3,1) << -0.75f, 0.4f, 0.34f); + Mat tvec_ground_truth = (Mat_(3,1) << -0.15f, 0.35f, 1.58f); + + vector p2d; + projectPoints(p3d, rvec_ground_truth, tvec_ground_truth, intrinsics, noArray(), p2d); + + Mat rvec_est = (Mat_(3,1) << -0.1f, 0.1f, 0.1f); + Mat tvec_est = (Mat_(3,1) << 0.0f, 0.0f, 1.0f); + + solvePnP(p3d, p2d, intrinsics, noArray(), rvec_est, tvec_est, true, SOLVEPNP_ITERATIVE); + + cout << "rvec_ground_truth: " << rvec_ground_truth.t() << std::endl; + cout << "rvec_est: " << rvec_est.t() << std::endl; + cout << "tvec_ground_truth: " << tvec_ground_truth.t() << std::endl; + cout << "tvec_est: " << tvec_est.t() << std::endl; + + EXPECT_LE(cvtest::norm(rvec_ground_truth, rvec_est, NORM_INF), 1e-6); + EXPECT_LE(cvtest::norm(tvec_ground_truth, tvec_est, NORM_INF), 1e-6); + + EXPECT_EQ(rvec_est.type(), CV_32FC1); + EXPECT_EQ(tvec_est.type(), CV_32FC1); + } +} + +TEST(Calib3d_SolvePnP, generic) +{ + { + Matx33d intrinsics(605.4, 0.0, 317.35, + 0.0, 601.2, 242.63, + 0.0, 0.0, 1.0); + + double L = 0.1; + vector p3d_; + p3d_.push_back(Point3d(-L, L, 0)); + p3d_.push_back(Point3d(L, L, 0)); + p3d_.push_back(Point3d(L, -L, 0)); + p3d_.push_back(Point3d(-L, -L, 0)); + p3d_.push_back(Point3d(-L, L, L/2)); + p3d_.push_back(Point3d(0, 0, -L/2)); + + const int ntests = 10; + for (int numTest = 0; numTest < ntests; numTest++) + { + Mat rvec_ground_truth; + Mat tvec_ground_truth; + generatePose(p3d_, rvec_ground_truth, tvec_ground_truth, theRNG()); + + vector p2d_; + projectPoints(p3d_, rvec_ground_truth, tvec_ground_truth, intrinsics, noArray(), p2d_); + + for (int method = 0; method < SOLVEPNP_MAX_COUNT; method++) + { + vector rvecs_est; + vector tvecs_est; + + vector p3d; + vector p2d; + if (method == SOLVEPNP_P3P || method == SOLVEPNP_AP3P || + method == SOLVEPNP_IPPE || method == SOLVEPNP_IPPE_SQUARE) + { + p3d = vector(p3d_.begin(), p3d_.begin()+4); + p2d = vector(p2d_.begin(), p2d_.begin()+4); + } + else + { + p3d = p3d_; + p2d = p2d_; + } + + vector reprojectionErrors; + solvePnPGeneric(p3d, p2d, intrinsics, noArray(), rvecs_est, tvecs_est, false, (SolvePnPMethod)method, + noArray(), noArray(), reprojectionErrors); + + EXPECT_TRUE(!rvecs_est.empty()); + EXPECT_TRUE(rvecs_est.size() == tvecs_est.size() && tvecs_est.size() == reprojectionErrors.size()); + + for (size_t i = 0; i < reprojectionErrors.size()-1; i++) + { + EXPECT_GE(reprojectionErrors[i+1], reprojectionErrors[i]); + } + + bool isTestSuccess = false; + for (size_t i = 0; i < rvecs_est.size() && !isTestSuccess; i++) { + double rvecDiff = cvtest::norm(rvecs_est[i], rvec_ground_truth, NORM_L2); + double tvecDiff = cvtest::norm(tvecs_est[i], tvec_ground_truth, NORM_L2); + const double threshold = method == SOLVEPNP_P3P ? 1e-2 : 1e-4; + isTestSuccess = rvecDiff < threshold && tvecDiff < threshold; + } + + EXPECT_TRUE(isTestSuccess); + } + } + } + + { + Matx33f intrinsics(605.4f, 0.0f, 317.35f, + 0.0f, 601.2f, 242.63f, + 0.0f, 0.0f, 1.0f); + + float L = 0.1f; + vector p3f_; + p3f_.push_back(Point3f(-L, L, 0)); + p3f_.push_back(Point3f(L, L, 0)); + p3f_.push_back(Point3f(L, -L, 0)); + p3f_.push_back(Point3f(-L, -L, 0)); + p3f_.push_back(Point3f(-L, L, L/2)); + p3f_.push_back(Point3f(0, 0, -L/2)); + + const int ntests = 10; + for (int numTest = 0; numTest < ntests; numTest++) + { + Mat rvec_ground_truth; + Mat tvec_ground_truth; + generatePose(p3f_, rvec_ground_truth, tvec_ground_truth, theRNG()); + + vector p2f_; + projectPoints(p3f_, rvec_ground_truth, tvec_ground_truth, intrinsics, noArray(), p2f_); + + for (int method = 0; method < SOLVEPNP_MAX_COUNT; method++) + { + vector rvecs_est; + vector tvecs_est; + + vector p3f; + vector p2f; + if (method == SOLVEPNP_P3P || method == SOLVEPNP_AP3P || + method == SOLVEPNP_IPPE || method == SOLVEPNP_IPPE_SQUARE) + { + p3f = vector(p3f_.begin(), p3f_.begin()+4); + p2f = vector(p2f_.begin(), p2f_.begin()+4); + } + else + { + p3f = p3f_; + p2f = p2f_; + } + + vector reprojectionErrors; + solvePnPGeneric(p3f, p2f, intrinsics, noArray(), rvecs_est, tvecs_est, false, (SolvePnPMethod)method, + noArray(), noArray(), reprojectionErrors); + + EXPECT_TRUE(!rvecs_est.empty()); + EXPECT_TRUE(rvecs_est.size() == tvecs_est.size() && tvecs_est.size() == reprojectionErrors.size()); + + for (size_t i = 0; i < reprojectionErrors.size()-1; i++) + { + EXPECT_GE(reprojectionErrors[i+1], reprojectionErrors[i]); + } + + bool isTestSuccess = false; + for (size_t i = 0; i < rvecs_est.size() && !isTestSuccess; i++) { + double rvecDiff = cvtest::norm(rvecs_est[i], rvec_ground_truth, NORM_L2); + double tvecDiff = cvtest::norm(tvecs_est[i], tvec_ground_truth, NORM_L2); + const double threshold = method == SOLVEPNP_P3P ? 1e-2 : 1e-4; + isTestSuccess = rvecDiff < threshold && tvecDiff < threshold; + } + + EXPECT_TRUE(isTestSuccess); + } + } } } diff --git a/modules/core/include/opencv2/core/base.hpp b/modules/core/include/opencv2/core/base.hpp index f484e9e108..5d76f52494 100644 --- a/modules/core/include/opencv2/core/base.hpp +++ b/modules/core/include/opencv2/core/base.hpp @@ -188,7 +188,7 @@ enum NormTypes { norm = \forkthree { \| \texttt{src1} \| _{L_2} ^{2} = \sum_I \texttt{src1}(I)^2} {if \(\texttt{normType} = \texttt{NORM_L2SQR}\)} { \| \texttt{src1} - \texttt{src2} \| _{L_2} ^{2} = \sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2 }{if \(\texttt{normType} = \texttt{NORM_L2SQR}\) } - { \left(\frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}}\right)^2 }{if \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L2}\) } + { \left(\frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}}\right)^2 }{if \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L2SQR}\) } \f] */ NORM_L2SQR = 5, diff --git a/modules/core/misc/java/src/java/core+CvType.java b/modules/core/misc/java/src/java/core+CvType.java index a03b794bb9..fcf616fe02 100644 --- a/modules/core/misc/java/src/java/core+CvType.java +++ b/modules/core/misc/java/src/java/core+CvType.java @@ -34,11 +34,11 @@ public final class CvType { public static final int makeType(int depth, int channels) { if (channels <= 0 || channels >= CV_CN_MAX) { - throw new java.lang.UnsupportedOperationException( + throw new UnsupportedOperationException( "Channels count should be 1.." + (CV_CN_MAX - 1)); } if (depth < 0 || depth >= CV_DEPTH_MAX) { - throw new java.lang.UnsupportedOperationException( + throw new UnsupportedOperationException( "Data type depth should be 0.." + (CV_DEPTH_MAX - 1)); } return (depth & (CV_DEPTH_MAX - 1)) + ((channels - 1) << CV_CN_SHIFT); @@ -103,7 +103,7 @@ public final class CvType { case CV_64F: return 8 * channels(type); default: - throw new java.lang.UnsupportedOperationException( + throw new UnsupportedOperationException( "Unsupported CvType value: " + type); } } @@ -136,7 +136,7 @@ public final class CvType { s = "CV_16F"; break; default: - throw new java.lang.UnsupportedOperationException( + throw new UnsupportedOperationException( "Unsupported CvType value: " + type); } diff --git a/modules/core/misc/java/src/java/core+Mat.java b/modules/core/misc/java/src/java/core+Mat.java index e42fca9897..3bcb1ee9f7 100644 --- a/modules/core/misc/java/src/java/core+Mat.java +++ b/modules/core/misc/java/src/java/core+Mat.java @@ -11,7 +11,7 @@ public class Mat { public Mat(long addr) { if (addr == 0) - throw new java.lang.UnsupportedOperationException("Native object address is NULL"); + throw new UnsupportedOperationException("Native object address is NULL"); nativeObj = addr; } @@ -1074,7 +1074,7 @@ public class Mat { public int put(int row, int col, double... data) { int t = type(); if (data == null || data.length % CvType.channels(t) != 0) - throw new java.lang.UnsupportedOperationException( + throw new UnsupportedOperationException( "Provided data element number (" + (data == null ? 0 : data.length) + ") should be multiple of the Mat channels count (" + @@ -1086,7 +1086,7 @@ public class Mat { public int put(int[] idx, double... data) { int t = type(); if (data == null || data.length % CvType.channels(t) != 0) - throw new java.lang.UnsupportedOperationException( + throw new UnsupportedOperationException( "Provided data element number (" + (data == null ? 0 : data.length) + ") should be multiple of the Mat channels count (" + @@ -1100,7 +1100,7 @@ public class Mat { public int put(int row, int col, float[] data) { int t = type(); if (data == null || data.length % CvType.channels(t) != 0) - throw new java.lang.UnsupportedOperationException( + throw new UnsupportedOperationException( "Provided data element number (" + (data == null ? 0 : data.length) + ") should be multiple of the Mat channels count (" + @@ -1108,14 +1108,14 @@ public class Mat { if (CvType.depth(t) == CvType.CV_32F) { return nPutF(nativeObj, row, col, data.length, data); } - throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + throw new UnsupportedOperationException("Mat data type is not compatible: " + t); } // javadoc:Mat::put(idx,data) public int put(int[] idx, float[] data) { int t = type(); if (data == null || data.length % CvType.channels(t) != 0) - throw new java.lang.UnsupportedOperationException( + throw new UnsupportedOperationException( "Provided data element number (" + (data == null ? 0 : data.length) + ") should be multiple of the Mat channels count (" + @@ -1125,14 +1125,14 @@ public class Mat { if (CvType.depth(t) == CvType.CV_32F) { return nPutFIdx(nativeObj, idx, data.length, data); } - throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + throw new UnsupportedOperationException("Mat data type is not compatible: " + t); } // javadoc:Mat::put(row,col,data) public int put(int row, int col, int[] data) { int t = type(); if (data == null || data.length % CvType.channels(t) != 0) - throw new java.lang.UnsupportedOperationException( + throw new UnsupportedOperationException( "Provided data element number (" + (data == null ? 0 : data.length) + ") should be multiple of the Mat channels count (" + @@ -1140,14 +1140,14 @@ public class Mat { if (CvType.depth(t) == CvType.CV_32S) { return nPutI(nativeObj, row, col, data.length, data); } - throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + throw new UnsupportedOperationException("Mat data type is not compatible: " + t); } // javadoc:Mat::put(idx,data) public int put(int[] idx, int[] data) { int t = type(); if (data == null || data.length % CvType.channels(t) != 0) - throw new java.lang.UnsupportedOperationException( + throw new UnsupportedOperationException( "Provided data element number (" + (data == null ? 0 : data.length) + ") should be multiple of the Mat channels count (" + @@ -1157,14 +1157,14 @@ public class Mat { if (CvType.depth(t) == CvType.CV_32S) { return nPutIIdx(nativeObj, idx, data.length, data); } - throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + throw new UnsupportedOperationException("Mat data type is not compatible: " + t); } // javadoc:Mat::put(row,col,data) public int put(int row, int col, short[] data) { int t = type(); if (data == null || data.length % CvType.channels(t) != 0) - throw new java.lang.UnsupportedOperationException( + throw new UnsupportedOperationException( "Provided data element number (" + (data == null ? 0 : data.length) + ") should be multiple of the Mat channels count (" + @@ -1172,14 +1172,14 @@ public class Mat { if (CvType.depth(t) == CvType.CV_16U || CvType.depth(t) == CvType.CV_16S) { return nPutS(nativeObj, row, col, data.length, data); } - throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + throw new UnsupportedOperationException("Mat data type is not compatible: " + t); } // javadoc:Mat::put(idx,data) public int put(int[] idx, short[] data) { int t = type(); if (data == null || data.length % CvType.channels(t) != 0) - throw new java.lang.UnsupportedOperationException( + throw new UnsupportedOperationException( "Provided data element number (" + (data == null ? 0 : data.length) + ") should be multiple of the Mat channels count (" + @@ -1189,14 +1189,14 @@ public class Mat { if (CvType.depth(t) == CvType.CV_16U || CvType.depth(t) == CvType.CV_16S) { return nPutSIdx(nativeObj, idx, data.length, data); } - throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + throw new UnsupportedOperationException("Mat data type is not compatible: " + t); } // javadoc:Mat::put(row,col,data) public int put(int row, int col, byte[] data) { int t = type(); if (data == null || data.length % CvType.channels(t) != 0) - throw new java.lang.UnsupportedOperationException( + throw new UnsupportedOperationException( "Provided data element number (" + (data == null ? 0 : data.length) + ") should be multiple of the Mat channels count (" + @@ -1204,14 +1204,14 @@ public class Mat { if (CvType.depth(t) == CvType.CV_8U || CvType.depth(t) == CvType.CV_8S) { return nPutB(nativeObj, row, col, data.length, data); } - throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + throw new UnsupportedOperationException("Mat data type is not compatible: " + t); } // javadoc:Mat::put(idx,data) public int put(int[] idx, byte[] data) { int t = type(); if (data == null || data.length % CvType.channels(t) != 0) - throw new java.lang.UnsupportedOperationException( + throw new UnsupportedOperationException( "Provided data element number (" + (data == null ? 0 : data.length) + ") should be multiple of the Mat channels count (" + @@ -1221,14 +1221,14 @@ public class Mat { if (CvType.depth(t) == CvType.CV_8U || CvType.depth(t) == CvType.CV_8S) { return nPutBIdx(nativeObj, idx, data.length, data); } - throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + throw new UnsupportedOperationException("Mat data type is not compatible: " + t); } // javadoc:Mat::put(row,col,data,offset,length) public int put(int row, int col, byte[] data, int offset, int length) { int t = type(); if (data == null || length % CvType.channels(t) != 0) - throw new java.lang.UnsupportedOperationException( + throw new UnsupportedOperationException( "Provided data element number (" + (data == null ? 0 : data.length) + ") should be multiple of the Mat channels count (" + @@ -1236,14 +1236,14 @@ public class Mat { if (CvType.depth(t) == CvType.CV_8U || CvType.depth(t) == CvType.CV_8S) { return nPutBwOffset(nativeObj, row, col, length, offset, data); } - throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + throw new UnsupportedOperationException("Mat data type is not compatible: " + t); } // javadoc:Mat::put(idx,data,offset,length) public int put(int[] idx, byte[] data, int offset, int length) { int t = type(); if (data == null || length % CvType.channels(t) != 0) - throw new java.lang.UnsupportedOperationException( + throw new UnsupportedOperationException( "Provided data element number (" + (data == null ? 0 : data.length) + ") should be multiple of the Mat channels count (" + @@ -1253,14 +1253,14 @@ public class Mat { if (CvType.depth(t) == CvType.CV_8U || CvType.depth(t) == CvType.CV_8S) { return nPutBwIdxOffset(nativeObj, idx, length, offset, data); } - throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + throw new UnsupportedOperationException("Mat data type is not compatible: " + t); } // javadoc:Mat::get(row,col,data) public int get(int row, int col, byte[] data) { int t = type(); if (data == null || data.length % CvType.channels(t) != 0) - throw new java.lang.UnsupportedOperationException( + throw new UnsupportedOperationException( "Provided data element number (" + (data == null ? 0 : data.length) + ") should be multiple of the Mat channels count (" + @@ -1268,14 +1268,14 @@ public class Mat { if (CvType.depth(t) == CvType.CV_8U || CvType.depth(t) == CvType.CV_8S) { return nGetB(nativeObj, row, col, data.length, data); } - throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + throw new UnsupportedOperationException("Mat data type is not compatible: " + t); } // javadoc:Mat::get(idx,data) public int get(int[] idx, byte[] data) { int t = type(); if (data == null || data.length % CvType.channels(t) != 0) - throw new java.lang.UnsupportedOperationException( + throw new UnsupportedOperationException( "Provided data element number (" + (data == null ? 0 : data.length) + ") should be multiple of the Mat channels count (" + @@ -1285,14 +1285,14 @@ public class Mat { if (CvType.depth(t) == CvType.CV_8U || CvType.depth(t) == CvType.CV_8S) { return nGetBIdx(nativeObj, idx, data.length, data); } - throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + throw new UnsupportedOperationException("Mat data type is not compatible: " + t); } // javadoc:Mat::get(row,col,data) public int get(int row, int col, short[] data) { int t = type(); if (data == null || data.length % CvType.channels(t) != 0) - throw new java.lang.UnsupportedOperationException( + throw new UnsupportedOperationException( "Provided data element number (" + (data == null ? 0 : data.length) + ") should be multiple of the Mat channels count (" + @@ -1300,14 +1300,14 @@ public class Mat { if (CvType.depth(t) == CvType.CV_16U || CvType.depth(t) == CvType.CV_16S) { return nGetS(nativeObj, row, col, data.length, data); } - throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + throw new UnsupportedOperationException("Mat data type is not compatible: " + t); } // javadoc:Mat::get(idx,data) public int get(int[] idx, short[] data) { int t = type(); if (data == null || data.length % CvType.channels(t) != 0) - throw new java.lang.UnsupportedOperationException( + throw new UnsupportedOperationException( "Provided data element number (" + (data == null ? 0 : data.length) + ") should be multiple of the Mat channels count (" + @@ -1317,14 +1317,14 @@ public class Mat { if (CvType.depth(t) == CvType.CV_16U || CvType.depth(t) == CvType.CV_16S) { return nGetSIdx(nativeObj, idx, data.length, data); } - throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + throw new UnsupportedOperationException("Mat data type is not compatible: " + t); } // javadoc:Mat::get(row,col,data) public int get(int row, int col, int[] data) { int t = type(); if (data == null || data.length % CvType.channels(t) != 0) - throw new java.lang.UnsupportedOperationException( + throw new UnsupportedOperationException( "Provided data element number (" + (data == null ? 0 : data.length) + ") should be multiple of the Mat channels count (" + @@ -1332,14 +1332,14 @@ public class Mat { if (CvType.depth(t) == CvType.CV_32S) { return nGetI(nativeObj, row, col, data.length, data); } - throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + throw new UnsupportedOperationException("Mat data type is not compatible: " + t); } // javadoc:Mat::get(idx,data) public int get(int[] idx, int[] data) { int t = type(); if (data == null || data.length % CvType.channels(t) != 0) - throw new java.lang.UnsupportedOperationException( + throw new UnsupportedOperationException( "Provided data element number (" + (data == null ? 0 : data.length) + ") should be multiple of the Mat channels count (" + @@ -1349,14 +1349,14 @@ public class Mat { if (CvType.depth(t) == CvType.CV_32S) { return nGetIIdx(nativeObj, idx, data.length, data); } - throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + throw new UnsupportedOperationException("Mat data type is not compatible: " + t); } // javadoc:Mat::get(row,col,data) public int get(int row, int col, float[] data) { int t = type(); if (data == null || data.length % CvType.channels(t) != 0) - throw new java.lang.UnsupportedOperationException( + throw new UnsupportedOperationException( "Provided data element number (" + (data == null ? 0 : data.length) + ") should be multiple of the Mat channels count (" + @@ -1364,14 +1364,14 @@ public class Mat { if (CvType.depth(t) == CvType.CV_32F) { return nGetF(nativeObj, row, col, data.length, data); } - throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + throw new UnsupportedOperationException("Mat data type is not compatible: " + t); } // javadoc:Mat::get(idx,data) public int get(int[] idx, float[] data) { int t = type(); if (data == null || data.length % CvType.channels(t) != 0) - throw new java.lang.UnsupportedOperationException( + throw new UnsupportedOperationException( "Provided data element number (" + (data == null ? 0 : data.length) + ") should be multiple of the Mat channels count (" + @@ -1381,14 +1381,14 @@ public class Mat { if (CvType.depth(t) == CvType.CV_32F) { return nGetFIdx(nativeObj, idx, data.length, data); } - throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + throw new UnsupportedOperationException("Mat data type is not compatible: " + t); } // javadoc:Mat::get(row,col,data) public int get(int row, int col, double[] data) { int t = type(); if (data == null || data.length % CvType.channels(t) != 0) - throw new java.lang.UnsupportedOperationException( + throw new UnsupportedOperationException( "Provided data element number (" + (data == null ? 0 : data.length) + ") should be multiple of the Mat channels count (" + @@ -1396,14 +1396,14 @@ public class Mat { if (CvType.depth(t) == CvType.CV_64F) { return nGetD(nativeObj, row, col, data.length, data); } - throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + throw new UnsupportedOperationException("Mat data type is not compatible: " + t); } // javadoc:Mat::get(idx,data) public int get(int[] idx, double[] data) { int t = type(); if (data == null || data.length % CvType.channels(t) != 0) - throw new java.lang.UnsupportedOperationException( + throw new UnsupportedOperationException( "Provided data element number (" + (data == null ? 0 : data.length) + ") should be multiple of the Mat channels count (" + @@ -1413,7 +1413,7 @@ public class Mat { if (CvType.depth(t) == CvType.CV_64F) { return nGetDIdx(nativeObj, idx, data.length, data); } - throw new java.lang.UnsupportedOperationException("Mat data type is not compatible: " + t); + throw new UnsupportedOperationException("Mat data type is not compatible: " + t); } // javadoc:Mat::get(row,col) diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index 02cc825cff..dce2bd7b73 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -816,6 +816,7 @@ CV__DNN_INLINE_NS_BEGIN * * `*.t7` | `*.net` (Torch, http://torch.ch/) * * `*.weights` (Darknet, https://pjreddie.com/darknet/) * * `*.bin` (DLDT, https://software.intel.com/openvino-toolkit) + * * `*.onnx` (ONNX, https://onnx.ai/) * @param[in] config Text file contains network configuration. It could be a * file with the following extensions: * * `*.prototxt` (Caffe, http://caffe.berkeleyvision.org/) @@ -864,6 +865,23 @@ CV__DNN_INLINE_NS_BEGIN */ CV_EXPORTS_W Net readNetFromONNX(const String &onnxFile); + /** @brief Reads a network model from ONNX + * in-memory buffer. + * @param buffer memory address of the first byte of the buffer. + * @param sizeBuffer size of the buffer. + * @returns Network object that ready to do forward, throw an exception + * in failure cases. + */ + CV_EXPORTS Net readNetFromONNX(const char* buffer, size_t sizeBuffer); + + /** @brief Reads a network model from ONNX + * in-memory buffer. + * @param buffer in-memory buffer that stores the ONNX model bytes. + * @returns Network object that ready to do forward, throw an exception + * in failure cases. + */ + CV_EXPORTS_W Net readNetFromONNX(const std::vector& buffer); + /** @brief Creates blob from .pb file. * @param path to the .pb file with input tensor. * @returns Mat. diff --git a/modules/dnn/src/layers/batch_norm_layer.cpp b/modules/dnn/src/layers/batch_norm_layer.cpp index 4c69c247c4..b2fd75aef1 100644 --- a/modules/dnn/src/layers/batch_norm_layer.cpp +++ b/modules/dnn/src/layers/batch_norm_layer.cpp @@ -29,6 +29,8 @@ class BatchNormLayerImpl CV_FINAL : public BatchNormLayer public: Mat weights_, bias_; UMat umat_weight, umat_bias; + mutable int dims; + BatchNormLayerImpl(const LayerParams& params) { @@ -142,6 +144,7 @@ public: std::vector &outputs, std::vector &internals) const CV_OVERRIDE { + dims = inputs[0].size(); if (!useGlobalStats && inputs[0][0] != 1) CV_Error(Error::StsNotImplemented, "Batch normalization in training mode with batch size > 1"); Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals); @@ -150,9 +153,9 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { - return backendId == DNN_BACKEND_OPENCV || + return (backendId == DNN_BACKEND_OPENCV) || (backendId == DNN_BACKEND_HALIDE && haveHalide()) || - (backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine()); + (backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && (preferableTarget == DNN_TARGET_CPU || dims == 4)); } #ifdef HAVE_OPENCL @@ -178,11 +181,12 @@ public: } UMat &inpBlob = inputs[0]; - CV_Assert(inpBlob.dims == 2 || inpBlob.dims == 4); int groups = inpBlob.size[0]; int channels = inpBlob.size[1]; - int rows = inpBlob.dims > 2 ? inpBlob.size[2] : 1; - int cols = inpBlob.dims > 2 ? inpBlob.size[3] : 1; + int planeSize = 1; + for (size_t i = 2; i < inpBlob.dims; i++) { + planeSize *= inpBlob.size[i]; + } String opts = (use_half) ? " -DDtype=half" : " -DDtype=float"; for (size_t ii = 0; ii < outputs.size(); ii++) @@ -196,7 +200,7 @@ public: } else { - MatShape s = shape(groups * channels, rows * cols); + MatShape s = shape(groups * channels, planeSize); UMat src = inputs[ii].reshape(1, s.size(), &s[0]); UMat dst = outputs[ii].reshape(1, s.size(), &s[0]); int number = (s[1] % 8 == 0) ? 8 : ((s[1] % 4 == 0) ? 4 : 1); @@ -248,9 +252,10 @@ public: CV_Assert(inputs.size() == 1); Mat &inpBlob = inputs[0]; - CV_Assert(inpBlob.dims == 2 || inpBlob.dims == 4); - int rows = inpBlob.dims > 2 ? inpBlob.size[2] : 1; - int cols = inpBlob.dims > 2 ? inpBlob.size[3] : 1; + int planeSize = 1; + for (size_t i = 2; i < inpBlob.dims; i++) { + planeSize *= inpBlob.size[i]; + } for (size_t ii = 0; ii < outputs.size(); ii++) { @@ -262,8 +267,8 @@ public: { float w = weights_.at(n); float b = bias_.at(n); - Mat inpBlobPlane(rows, cols, CV_32F, inpBlob.ptr(num, n)); - Mat outBlobPlane(rows, cols, CV_32F, outBlob.ptr(num, n)); + Mat inpBlobPlane(1, planeSize, CV_32F, inpBlob.ptr(num, n)); + Mat outBlobPlane(1, planeSize, CV_32F, outBlob.ptr(num, n)); inpBlobPlane.convertTo(outBlobPlane, CV_32F, w, b); } } diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index e55d7cd5a4..ac91907c5d 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -57,6 +57,24 @@ public: CV_Error(Error::StsUnsupportedFormat, "Failed to parse onnx model"); } + ONNXImporter(const char* buffer, size_t sizeBuffer) + { + struct _Buf : public std::streambuf + { + _Buf(const char* buffer, size_t sizeBuffer) + { + char* p = const_cast(buffer); + setg(p, p, p + sizeBuffer); + } + }; + + _Buf buf(buffer, sizeBuffer); + std::istream input(&buf); + + if (!model_proto.ParseFromIstream(&input)) + CV_Error(Error::StsUnsupportedFormat, "Failed to parse onnx model from in-memory byte array."); + } + void populateNet(Net dstNet); }; @@ -768,37 +786,42 @@ void ONNXImporter::populateNet(Net dstNet) } replaceLayerParam(layerParams, "mode", "interpolation"); } + else if (layer_type == "LogSoftmax") + { + layerParams.type = "Softmax"; + layerParams.set("log_softmax", true); + } else { for (int j = 0; j < node_proto.input_size(); j++) { if (layer_id.find(node_proto.input(j)) == layer_id.end()) layerParams.blobs.push_back(getBlob(node_proto, constBlobs, j)); } - } + } - int id = dstNet.addLayer(layerParams.name, layerParams.type, layerParams); - layer_id.insert(std::make_pair(layerParams.name, LayerInfo(id, 0))); + int id = dstNet.addLayer(layerParams.name, layerParams.type, layerParams); + layer_id.insert(std::make_pair(layerParams.name, LayerInfo(id, 0))); - std::vector layerInpShapes, layerOutShapes, layerInternalShapes; - for (int j = 0; j < node_proto.input_size(); j++) { - layerId = layer_id.find(node_proto.input(j)); - if (layerId != layer_id.end()) { - dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, j); - // Collect input shapes. - shapeIt = outShapes.find(node_proto.input(j)); - CV_Assert(shapeIt != outShapes.end()); - layerInpShapes.push_back(shapeIt->second); - } - } + std::vector layerInpShapes, layerOutShapes, layerInternalShapes; + for (int j = 0; j < node_proto.input_size(); j++) { + layerId = layer_id.find(node_proto.input(j)); + if (layerId != layer_id.end()) { + dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, j); + // Collect input shapes. + shapeIt = outShapes.find(node_proto.input(j)); + CV_Assert(shapeIt != outShapes.end()); + layerInpShapes.push_back(shapeIt->second); + } + } - // Compute shape of output blob for this layer. - Ptr layer = dstNet.getLayer(id); - layer->getMemoryShapes(layerInpShapes, 0, layerOutShapes, layerInternalShapes); - CV_Assert(!layerOutShapes.empty()); - outShapes[layerParams.name] = layerOutShapes[0]; - } - } + // Compute shape of output blob for this layer. + Ptr layer = dstNet.getLayer(id); + layer->getMemoryShapes(layerInpShapes, 0, layerOutShapes, layerInternalShapes); + CV_Assert(!layerOutShapes.empty()); + outShapes[layerParams.name] = layerOutShapes[0]; + } +} Net readNetFromONNX(const String& onnxFile) { @@ -808,6 +831,19 @@ Net readNetFromONNX(const String& onnxFile) return net; } +Net readNetFromONNX(const char* buffer, size_t sizeBuffer) +{ + ONNXImporter onnxImporter(buffer, sizeBuffer); + Net net; + onnxImporter.populateNet(net); + return net; +} + +Net readNetFromONNX(const std::vector& buffer) +{ + return readNetFromONNX(reinterpret_cast(buffer.data()), buffer.size()); +} + Mat readTensorFromONNX(const String& path) { opencv_onnx::TensorProto tensor_proto = opencv_onnx::TensorProto(); diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 41985c834d..5f34096534 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1423,6 +1423,43 @@ void TFImporter::populateNet(Net dstNet) connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); } + else if (type == "StridedSlice") + { + CV_Assert(layer.input_size() == 4); + Mat begins = getTensorContent(getConstBlob(layer, value_id, 1)); + Mat ends = getTensorContent(getConstBlob(layer, value_id, 2)); + Mat strides = getTensorContent(getConstBlob(layer, value_id, 3)); + CV_CheckTypeEQ(begins.type(), CV_32SC1, ""); + CV_CheckTypeEQ(ends.type(), CV_32SC1, ""); + CV_CheckTypeEQ(strides.type(), CV_32SC1, ""); + const int num = begins.total(); + CV_Assert_N(num == ends.total(), num == strides.total()); + + int end_mask = getLayerAttr(layer, "end_mask").i(); + for (int i = 0; i < num; ++i) + { + if (end_mask & (1 << i)) + ends.at(i) = -1; + if (strides.at(i) != 1) + CV_Error(Error::StsNotImplemented, + format("StridedSlice with stride %d", strides.at(i))); + } + if (begins.total() == 4 && getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) + { + // Swap NHWC parameters' order to NCHW. + std::swap(begins.at(2), begins.at(3)); + std::swap(begins.at(1), begins.at(2)); + std::swap(ends.at(2), ends.at(3)); + std::swap(ends.at(1), ends.at(2)); + } + layerParams.set("begin", DictValue::arrayInt((int*)begins.data, begins.total())); + layerParams.set("end", DictValue::arrayInt((int*)ends.data, ends.total())); + + int id = dstNet.addLayer(name, "Slice", layerParams); + layer_id[name] = id; + + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); + } else if (type == "Mul") { bool haveConst = false; diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index f1b0a81e8e..e66012c304 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -167,6 +167,13 @@ TEST_P(Test_ONNX_layers, BatchNormalization) testONNXModels("batch_norm"); } +TEST_P(Test_ONNX_layers, BatchNormalization3D) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU) + throw SkipTestException(""); + testONNXModels("batch_norm_3d"); +} + TEST_P(Test_ONNX_layers, Transpose) { if (backend == DNN_BACKEND_INFERENCE_ENGINE && @@ -238,6 +245,12 @@ TEST_P(Test_ONNX_layers, Reshape) testONNXModels("unsqueeze"); } +TEST_P(Test_ONNX_layers, Softmax) +{ + testONNXModels("softmax"); + testONNXModels("log_softmax", npy, 0, 0, false, false); +} + INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_ONNX_layers, dnnBackendsAndTargets()); class Test_ONNX_nets : public Test_ONNX_layers {}; diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index e662da53e7..1a70e8f471 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -188,6 +188,13 @@ TEST_P(Test_TensorFlow_layers, batch_norm) runTensorFlowNet("mvn_batch_norm_1x1"); } +TEST_P(Test_TensorFlow_layers, batch_norm3D) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU) + throw SkipTestException(""); + runTensorFlowNet("batch_norm3d"); +} + TEST_P(Test_TensorFlow_layers, slim_batch_norm) { if (backend == DNN_BACKEND_INFERENCE_ENGINE) @@ -656,6 +663,7 @@ TEST_P(Test_TensorFlow_layers, slice) (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) throw SkipTestException(""); runTensorFlowNet("slice_4d"); + runTensorFlowNet("strided_slice"); } TEST_P(Test_TensorFlow_layers, softmax) diff --git a/modules/imgproc/misc/java/src/java/imgproc+Moments.java b/modules/imgproc/misc/java/src/java/imgproc+Moments.java index 2eeebc9875..5c3d94c78c 100644 --- a/modules/imgproc/misc/java/src/java/imgproc+Moments.java +++ b/modules/imgproc/misc/java/src/java/imgproc+Moments.java @@ -1,7 +1,5 @@ package org.opencv.imgproc; -import java.lang.Math; - //javadoc:Moments public class Moments { diff --git a/modules/imgproc/src/color_lab.cpp b/modules/imgproc/src/color_lab.cpp index cb5c0fdf53..e488d26a8e 100644 --- a/modules/imgproc/src/color_lab.cpp +++ b/modules/imgproc/src/color_lab.cpp @@ -56,63 +56,42 @@ template static inline _Tp splineInterpolate(_Tp x, const _Tp* tab return ((tab[3]*x + tab[2])*x + tab[1])*x + tab[0]; } -#if CV_NEON -template static inline void splineInterpolate(float32x4_t& v_x, const _Tp* tab, int n) +#if CV_SIMD + +template static inline cv::v_float32 splineInterpolate(const cv::v_float32& x, const _Tp* tab, int n) { - int32x4_t v_ix = vcvtq_s32_f32(vminq_f32(vmaxq_f32(v_x, vdupq_n_f32(0)), vdupq_n_f32(n - 1))); - v_x = vsubq_f32(v_x, vcvtq_f32_s32(v_ix)); - v_ix = vshlq_n_s32(v_ix, 2); + using namespace cv; + v_int32 ix = v_min(v_max(v_trunc(x), vx_setzero_s32()), vx_setall_s32(n-1)); + cv::v_float32 xx = x - v_cvt_f32(ix); + ix = ix << 2; - int CV_DECL_ALIGNED(16) ix[4]; - vst1q_s32(ix, v_ix); + v_float32 t[4]; + // assume that v_float32::nlanes == v_int32::nlanes + if(v_float32::nlanes == 4) + { +#if CV_SIMD_WIDTH == 16 + int32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) idx[4]; + v_store_aligned(idx, ix); + v_float32x4 tt[4]; + tt[0] = v_load(tab + idx[0]); + tt[1] = v_load(tab + idx[1]); + tt[2] = v_load(tab + idx[2]); + tt[3] = v_load(tab + idx[3]); + v_transpose4x4(tt[0], tt[1], tt[2], tt[3], + t[0], t[1], t[2], t[3]); +#endif + } + else + { + t[0] = v_lut(tab + 0, ix); + t[1] = v_lut(tab + 1, ix); + t[2] = v_lut(tab + 2, ix); + t[3] = v_lut(tab + 3, ix); + } - float32x4_t v_tab0 = vld1q_f32(tab + ix[0]); - float32x4_t v_tab1 = vld1q_f32(tab + ix[1]); - float32x4_t v_tab2 = vld1q_f32(tab + ix[2]); - float32x4_t v_tab3 = vld1q_f32(tab + ix[3]); - - float32x4x2_t v01 = vtrnq_f32(v_tab0, v_tab1); - float32x4x2_t v23 = vtrnq_f32(v_tab2, v_tab3); - - v_tab0 = vcombine_f32(vget_low_f32(v01.val[0]), vget_low_f32(v23.val[0])); - v_tab1 = vcombine_f32(vget_low_f32(v01.val[1]), vget_low_f32(v23.val[1])); - v_tab2 = vcombine_f32(vget_high_f32(v01.val[0]), vget_high_f32(v23.val[0])); - v_tab3 = vcombine_f32(vget_high_f32(v01.val[1]), vget_high_f32(v23.val[1])); - - v_x = vmlaq_f32(v_tab0, vmlaq_f32(v_tab1, vmlaq_f32(v_tab2, v_tab3, v_x), v_x), v_x); + return v_fma(v_fma(v_fma(t[3], xx, t[2]), xx, t[1]), xx, t[0]); } -#elif CV_SSE2 -template static inline void splineInterpolate(__m128& v_x, const _Tp* tab, int n) -{ - __m128i v_ix = _mm_cvttps_epi32(_mm_min_ps(_mm_max_ps(v_x, _mm_setzero_ps()), _mm_set1_ps(float(n - 1)))); - v_x = _mm_sub_ps(v_x, _mm_cvtepi32_ps(v_ix)); - v_ix = _mm_slli_epi32(v_ix, 2); - int CV_DECL_ALIGNED(16) ix[4]; - _mm_store_si128((__m128i *)ix, v_ix); - - __m128 v_tab0 = _mm_loadu_ps(tab + ix[0]); - __m128 v_tab1 = _mm_loadu_ps(tab + ix[1]); - __m128 v_tab2 = _mm_loadu_ps(tab + ix[2]); - __m128 v_tab3 = _mm_loadu_ps(tab + ix[3]); - - __m128 v_tmp0 = _mm_unpacklo_ps(v_tab0, v_tab1); - __m128 v_tmp1 = _mm_unpacklo_ps(v_tab2, v_tab3); - __m128 v_tmp2 = _mm_unpackhi_ps(v_tab0, v_tab1); - __m128 v_tmp3 = _mm_unpackhi_ps(v_tab2, v_tab3); - - v_tab0 = _mm_shuffle_ps(v_tmp0, v_tmp1, 0x44); - v_tab2 = _mm_shuffle_ps(v_tmp2, v_tmp3, 0x44); - v_tab1 = _mm_shuffle_ps(v_tmp0, v_tmp1, 0xee); - v_tab3 = _mm_shuffle_ps(v_tmp2, v_tmp3, 0xee); - - __m128 v_l = _mm_mul_ps(v_x, v_tab3); - v_l = _mm_add_ps(v_l, v_tab2); - v_l = _mm_mul_ps(v_l, v_x); - v_l = _mm_add_ps(v_l, v_tab1); - v_l = _mm_mul_ps(v_l, v_x); - v_x = _mm_add_ps(v_l, v_tab0); -} #endif namespace cv @@ -201,7 +180,6 @@ template struct RGB2XYZ_f float coeffs[9]; }; -#if CV_NEON template <> struct RGB2XYZ_f @@ -218,175 +196,59 @@ struct RGB2XYZ_f std::swap(coeffs[3], coeffs[5]); std::swap(coeffs[6], coeffs[8]); } - - v_c0 = vdupq_n_f32(coeffs[0]); - v_c1 = vdupq_n_f32(coeffs[1]); - v_c2 = vdupq_n_f32(coeffs[2]); - v_c3 = vdupq_n_f32(coeffs[3]); - v_c4 = vdupq_n_f32(coeffs[4]); - v_c5 = vdupq_n_f32(coeffs[5]); - v_c6 = vdupq_n_f32(coeffs[6]); - v_c7 = vdupq_n_f32(coeffs[7]); - v_c8 = vdupq_n_f32(coeffs[8]); } void operator()(const float* src, float* dst, int n) const { - int scn = srccn, i = 0; + CV_INSTRUMENT_REGION(); + + int scn = srccn; float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; - - n *= 3; - - if (scn == 3) - for ( ; i <= n - 12; i += 12, src += 12) + int i = 0; +#if CV_SIMD + const int vsize = v_float32::nlanes; + v_float32 vc0 = vx_setall_f32(C0), vc1 = vx_setall_f32(C1), vc2 = vx_setall_f32(C2); + v_float32 vc3 = vx_setall_f32(C3), vc4 = vx_setall_f32(C4), vc5 = vx_setall_f32(C5); + v_float32 vc6 = vx_setall_f32(C6), vc7 = vx_setall_f32(C7), vc8 = vx_setall_f32(C8); + for( ; i <= n-vsize; + i += vsize, src += scn*vsize, dst += vsize*3) + { + v_float32 b, g, r, a; + if(scn == 4) { - float32x4x3_t v_src = vld3q_f32(src), v_dst; - v_dst.val[0] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c0), v_src.val[1], v_c1), v_src.val[2], v_c2); - v_dst.val[1] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c3), v_src.val[1], v_c4), v_src.val[2], v_c5); - v_dst.val[2] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c6), v_src.val[1], v_c7), v_src.val[2], v_c8); - vst3q_f32(dst + i, v_dst); + v_load_deinterleave(src, b, g, r, a); } - else - for ( ; i <= n - 12; i += 12, src += 16) + else // scn == 3 { - float32x4x4_t v_src = vld4q_f32(src); - float32x4x3_t v_dst; - v_dst.val[0] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c0), v_src.val[1], v_c1), v_src.val[2], v_c2); - v_dst.val[1] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c3), v_src.val[1], v_c4), v_src.val[2], v_c5); - v_dst.val[2] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c6), v_src.val[1], v_c7), v_src.val[2], v_c8); - vst3q_f32(dst + i, v_dst); + v_load_deinterleave(src, b, g, r); } - for ( ; i < n; i += 3, src += scn) - { - float X = saturate_cast(src[0]*C0 + src[1]*C1 + src[2]*C2); - float Y = saturate_cast(src[0]*C3 + src[1]*C4 + src[2]*C5); - float Z = saturate_cast(src[0]*C6 + src[1]*C7 + src[2]*C8); - dst[i] = X; dst[i+1] = Y; dst[i+2] = Z; + v_float32 x, y, z; + x = v_fma(b, vc0, v_fma(g, vc1, r*vc2)); + y = v_fma(b, vc3, v_fma(g, vc4, r*vc5)); + z = v_fma(b, vc6, v_fma(g, vc7, r*vc8)); + + v_store_interleave(dst, x, y, z); } - } - - int srccn; - float coeffs[9]; - float32x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8; -}; - -#elif CV_SSE2 - -template <> -struct RGB2XYZ_f -{ - typedef float channel_type; - - RGB2XYZ_f(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn) - { - for(int i = 0; i < 9; i++) - coeffs[i] = _coeffs ? _coeffs[i] : (float)sRGB2XYZ_D65[i]; - if(blueIdx == 0) - { - std::swap(coeffs[0], coeffs[2]); - std::swap(coeffs[3], coeffs[5]); - std::swap(coeffs[6], coeffs[8]); - } - - v_c0 = _mm_set1_ps(coeffs[0]); - v_c1 = _mm_set1_ps(coeffs[1]); - v_c2 = _mm_set1_ps(coeffs[2]); - v_c3 = _mm_set1_ps(coeffs[3]); - v_c4 = _mm_set1_ps(coeffs[4]); - v_c5 = _mm_set1_ps(coeffs[5]); - v_c6 = _mm_set1_ps(coeffs[6]); - v_c7 = _mm_set1_ps(coeffs[7]); - v_c8 = _mm_set1_ps(coeffs[8]); - - haveSIMD = checkHardwareSupport(CV_CPU_SSE2); - } - - void process(__m128 v_r, __m128 v_g, __m128 v_b, - __m128 & v_x, __m128 & v_y, __m128 & v_z) const - { - v_x = _mm_mul_ps(v_r, v_c0); - v_x = _mm_add_ps(v_x, _mm_mul_ps(v_g, v_c1)); - v_x = _mm_add_ps(v_x, _mm_mul_ps(v_b, v_c2)); - - v_y = _mm_mul_ps(v_r, v_c3); - v_y = _mm_add_ps(v_y, _mm_mul_ps(v_g, v_c4)); - v_y = _mm_add_ps(v_y, _mm_mul_ps(v_b, v_c5)); - - v_z = _mm_mul_ps(v_r, v_c6); - v_z = _mm_add_ps(v_z, _mm_mul_ps(v_g, v_c7)); - v_z = _mm_add_ps(v_z, _mm_mul_ps(v_b, v_c8)); - } - - void operator()(const float* src, float* dst, int n) const - { - int scn = srccn, i = 0; - float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], - C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], - C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; - - n *= 3; - - if (haveSIMD) - { - for ( ; i <= n - 24; i += 24, src += 8 * scn) - { - __m128 v_r0 = _mm_loadu_ps(src); - __m128 v_r1 = _mm_loadu_ps(src + 4); - __m128 v_g0 = _mm_loadu_ps(src + 8); - __m128 v_g1 = _mm_loadu_ps(src + 12); - __m128 v_b0 = _mm_loadu_ps(src + 16); - __m128 v_b1 = _mm_loadu_ps(src + 20); - - if (scn == 4) - { - __m128 v_a0 = _mm_loadu_ps(src + 24); - __m128 v_a1 = _mm_loadu_ps(src + 28); - - _mm_deinterleave_ps(v_r0, v_r1, v_g0, v_g1, - v_b0, v_b1, v_a0, v_a1); - } - else - _mm_deinterleave_ps(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); - - __m128 v_x0, v_y0, v_z0; - process(v_r0, v_g0, v_b0, - v_x0, v_y0, v_z0); - - __m128 v_x1, v_y1, v_z1; - process(v_r1, v_g1, v_b1, - v_x1, v_y1, v_z1); - - _mm_interleave_ps(v_x0, v_x1, v_y0, v_y1, v_z0, v_z1); - - _mm_storeu_ps(dst + i, v_x0); - _mm_storeu_ps(dst + i + 4, v_x1); - _mm_storeu_ps(dst + i + 8, v_y0); - _mm_storeu_ps(dst + i + 12, v_y1); - _mm_storeu_ps(dst + i + 16, v_z0); - _mm_storeu_ps(dst + i + 20, v_z1); - } - } - - for ( ; i < n; i += 3, src += scn) - { - float X = saturate_cast(src[0]*C0 + src[1]*C1 + src[2]*C2); - float Y = saturate_cast(src[0]*C3 + src[1]*C4 + src[2]*C5); - float Z = saturate_cast(src[0]*C6 + src[1]*C7 + src[2]*C8); - dst[i] = X; dst[i+1] = Y; dst[i+2] = Z; - } - } - - int srccn; - float coeffs[9]; - __m128 v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8; - bool haveSIMD; -}; - - #endif + for( ; i < n; i++, src += scn, dst += 3) + { + float b = src[0], g = src[1], r = src[2]; + + float X = saturate_cast(b*C0 + g*C1 + r*C2); + float Y = saturate_cast(b*C3 + g*C4 + r*C5); + float Z = saturate_cast(b*C6 + g*C7 + r*C8); + + dst[0] = X; dst[1] = Y; dst[2] = Z; + } + } + + int srccn; + float coeffs[9]; +}; + template struct RGB2XYZ_i { @@ -424,235 +286,244 @@ template struct RGB2XYZ_i int coeffs[9]; }; -#if CV_NEON template <> struct RGB2XYZ_i { typedef uchar channel_type; + static const int shift = xyz_shift; RGB2XYZ_i(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn) { for( int i = 0; i < 9; i++ ) - coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : sRGB2XYZ_D65_i[i]; + coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << shift)) : sRGB2XYZ_D65_i[i]; if(blueIdx == 0) { std::swap(coeffs[0], coeffs[2]); std::swap(coeffs[3], coeffs[5]); std::swap(coeffs[6], coeffs[8]); } - - v_c0 = vdup_n_u16(coeffs[0]); - v_c1 = vdup_n_u16(coeffs[1]); - v_c2 = vdup_n_u16(coeffs[2]); - v_c3 = vdup_n_u16(coeffs[3]); - v_c4 = vdup_n_u16(coeffs[4]); - v_c5 = vdup_n_u16(coeffs[5]); - v_c6 = vdup_n_u16(coeffs[6]); - v_c7 = vdup_n_u16(coeffs[7]); - v_c8 = vdup_n_u16(coeffs[8]); - v_delta = vdupq_n_u32(1 << (xyz_shift - 1)); } void operator()(const uchar * src, uchar * dst, int n) const { + CV_INSTRUMENT_REGION(); + int scn = srccn, i = 0; int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; - n *= 3; - for ( ; i <= n - 24; i += 24, src += scn * 8) +#if CV_SIMD + const int vsize = v_uint8::nlanes; + int descaleShift = 1 << (shift-1); + v_int16 vdescale = vx_setall_s16((short)descaleShift); + v_int16 cxbg, cxr1, cybg, cyr1, czbg, czr1; + v_int16 dummy; + v_zip(vx_setall_s16((short)C0), vx_setall_s16((short)C1), cxbg, dummy); + v_zip(vx_setall_s16((short)C2), vx_setall_s16( 1), cxr1, dummy); + v_zip(vx_setall_s16((short)C3), vx_setall_s16((short)C4), cybg, dummy); + v_zip(vx_setall_s16((short)C5), vx_setall_s16( 1), cyr1, dummy); + v_zip(vx_setall_s16((short)C6), vx_setall_s16((short)C7), czbg, dummy); + v_zip(vx_setall_s16((short)C8), vx_setall_s16( 1), czr1, dummy); + + for( ; i <= n-vsize; + i += vsize, src += scn*vsize, dst += 3*vsize) { - uint8x8x3_t v_dst; - uint16x8x3_t v_src16; - - if (scn == 3) + v_uint8 b, g, r, a; + if(scn == 4) { - uint8x8x3_t v_src = vld3_u8(src); - v_src16.val[0] = vmovl_u8(v_src.val[0]); - v_src16.val[1] = vmovl_u8(v_src.val[1]); - v_src16.val[2] = vmovl_u8(v_src.val[2]); + v_load_deinterleave(src, b, g, r, a); } - else + else // scn == 3 { - uint8x8x4_t v_src = vld4_u8(src); - v_src16.val[0] = vmovl_u8(v_src.val[0]); - v_src16.val[1] = vmovl_u8(v_src.val[1]); - v_src16.val[2] = vmovl_u8(v_src.val[2]); + v_load_deinterleave(src, b, g, r); } - uint16x4_t v_s0 = vget_low_u16(v_src16.val[0]), - v_s1 = vget_low_u16(v_src16.val[1]), - v_s2 = vget_low_u16(v_src16.val[2]); + v_uint16 b0, b1, g0, g1, r0, r1; + v_expand(b, b0, b1); + v_expand(g, g0, g1); + v_expand(r, r0, r1); - uint32x4_t v_X0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); - uint32x4_t v_Y0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); - uint32x4_t v_Z0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); - v_X0 = vshrq_n_u32(vaddq_u32(v_X0, v_delta), xyz_shift); - v_Y0 = vshrq_n_u32(vaddq_u32(v_Y0, v_delta), xyz_shift); - v_Z0 = vshrq_n_u32(vaddq_u32(v_Z0, v_delta), xyz_shift); + v_int16 sb0, sb1, sg0, sg1, sr0, sr1; + sr0 = v_reinterpret_as_s16(r0); sr1 = v_reinterpret_as_s16(r1); + sg0 = v_reinterpret_as_s16(g0); sg1 = v_reinterpret_as_s16(g1); + sb0 = v_reinterpret_as_s16(b0); sb1 = v_reinterpret_as_s16(b1); - v_s0 = vget_high_u16(v_src16.val[0]), - v_s1 = vget_high_u16(v_src16.val[1]), - v_s2 = vget_high_u16(v_src16.val[2]); + v_int16 bg[4], rd[4]; + v_zip(sb0, sg0, bg[0], bg[1]); + v_zip(sb1, sg1, bg[2], bg[3]); + v_zip(sr0, vdescale, rd[0], rd[1]); + v_zip(sr1, vdescale, rd[2], rd[3]); - uint32x4_t v_X1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); - uint32x4_t v_Y1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); - uint32x4_t v_Z1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); - v_X1 = vshrq_n_u32(vaddq_u32(v_X1, v_delta), xyz_shift); - v_Y1 = vshrq_n_u32(vaddq_u32(v_Y1, v_delta), xyz_shift); - v_Z1 = vshrq_n_u32(vaddq_u32(v_Z1, v_delta), xyz_shift); + v_uint32 vx[4], vy[4], vz[4]; + for(int j = 0; j < 4; j++) + { + vx[j] = v_reinterpret_as_u32(v_dotprod(bg[j], cxbg) + v_dotprod(rd[j], cxr1)) >> shift; + vy[j] = v_reinterpret_as_u32(v_dotprod(bg[j], cybg) + v_dotprod(rd[j], cyr1)) >> shift; + vz[j] = v_reinterpret_as_u32(v_dotprod(bg[j], czbg) + v_dotprod(rd[j], czr1)) >> shift; + } - v_dst.val[0] = vqmovn_u16(vcombine_u16(vmovn_u32(v_X0), vmovn_u32(v_X1))); - v_dst.val[1] = vqmovn_u16(vcombine_u16(vmovn_u32(v_Y0), vmovn_u32(v_Y1))); - v_dst.val[2] = vqmovn_u16(vcombine_u16(vmovn_u32(v_Z0), vmovn_u32(v_Z1))); + v_uint16 x0, x1, y0, y1, z0, z1; + x0 = v_pack(vx[0], vx[1]); + x1 = v_pack(vx[2], vx[3]); + y0 = v_pack(vy[0], vy[1]); + y1 = v_pack(vy[2], vy[3]); + z0 = v_pack(vz[0], vz[1]); + z1 = v_pack(vz[2], vz[3]); - vst3_u8(dst + i, v_dst); + v_uint8 x, y, z; + x = v_pack(x0, x1); + y = v_pack(y0, y1); + z = v_pack(z0, z1); + + v_store_interleave(dst, x, y, z); } +#endif - for ( ; i < n; i += 3, src += scn) + for ( ; i < n; i++, src += scn, dst += 3) { - int X = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, xyz_shift); - int Y = CV_DESCALE(src[0]*C3 + src[1]*C4 + src[2]*C5, xyz_shift); - int Z = CV_DESCALE(src[0]*C6 + src[1]*C7 + src[2]*C8, xyz_shift); - dst[i] = saturate_cast(X); - dst[i+1] = saturate_cast(Y); - dst[i+2] = saturate_cast(Z); + uchar b = src[0], g = src[1], r = src[2]; + + int X = CV_DESCALE(b*C0 + g*C1 + r*C2, shift); + int Y = CV_DESCALE(b*C3 + g*C4 + r*C5, shift); + int Z = CV_DESCALE(b*C6 + g*C7 + r*C8, shift); + dst[0] = saturate_cast(X); + dst[1] = saturate_cast(Y); + dst[2] = saturate_cast(Z); } } int srccn, coeffs[9]; - uint16x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8; - uint32x4_t v_delta; }; + template <> struct RGB2XYZ_i { typedef ushort channel_type; + static const int shift = xyz_shift; + static const int fix_shift = (int)(sizeof(short)*8 - shift); RGB2XYZ_i(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn) { for( int i = 0; i < 9; i++ ) - coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : sRGB2XYZ_D65_i[i]; + coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << shift)) : sRGB2XYZ_D65_i[i]; if(blueIdx == 0) { std::swap(coeffs[0], coeffs[2]); std::swap(coeffs[3], coeffs[5]); std::swap(coeffs[6], coeffs[8]); } - - v_c0 = vdup_n_u16(coeffs[0]); - v_c1 = vdup_n_u16(coeffs[1]); - v_c2 = vdup_n_u16(coeffs[2]); - v_c3 = vdup_n_u16(coeffs[3]); - v_c4 = vdup_n_u16(coeffs[4]); - v_c5 = vdup_n_u16(coeffs[5]); - v_c6 = vdup_n_u16(coeffs[6]); - v_c7 = vdup_n_u16(coeffs[7]); - v_c8 = vdup_n_u16(coeffs[8]); - v_delta = vdupq_n_u32(1 << (xyz_shift - 1)); } void operator()(const ushort * src, ushort * dst, int n) const { + CV_INSTRUMENT_REGION(); + int scn = srccn, i = 0; int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; - n *= 3; +#if CV_SIMD + const int vsize = v_uint16::nlanes; + const int descaleShift = 1 << (shift-1); + v_int16 vdescale = vx_setall_s16(descaleShift); + v_int16 vc0 = vx_setall_s16((short)C0), vc1 = vx_setall_s16((short)C1), vc2 = vx_setall_s16((short)C2); + v_int16 vc3 = vx_setall_s16((short)C3), vc4 = vx_setall_s16((short)C4), vc5 = vx_setall_s16((short)C5); + v_int16 vc6 = vx_setall_s16((short)C6), vc7 = vx_setall_s16((short)C7), vc8 = vx_setall_s16((short)C8); + v_int16 zero = vx_setzero_s16(), one = vx_setall_s16(1); + v_int16 cxbg, cxr1, cybg, cyr1, czbg, czr1; + v_int16 dummy; + v_zip(vc0, vc1, cxbg, dummy); + v_zip(vc2, one, cxr1, dummy); + v_zip(vc3, vc4, cybg, dummy); + v_zip(vc5, one, cyr1, dummy); + v_zip(vc6, vc7, czbg, dummy); + v_zip(vc8, one, czr1, dummy); - for ( ; i <= n - 24; i += 24, src += scn * 8) + for (; i <= n-vsize; + i += vsize, src += scn*vsize, dst += 3*vsize) { - uint16x8x3_t v_src, v_dst; - - if (scn == 3) - v_src = vld3q_u16(src); - else + v_uint16 b, g, r, a; + if(scn == 4) { - uint16x8x4_t v_src4 = vld4q_u16(src); - v_src.val[0] = v_src4.val[0]; - v_src.val[1] = v_src4.val[1]; - v_src.val[2] = v_src4.val[2]; + v_load_deinterleave(src, b, g, r, a); + } + else // scn == 3 + { + v_load_deinterleave(src, b, g, r); } - uint16x4_t v_s0 = vget_low_u16(v_src.val[0]), - v_s1 = vget_low_u16(v_src.val[1]), - v_s2 = vget_low_u16(v_src.val[2]); + v_int16 sb, sg, sr; + sr = v_reinterpret_as_s16(r); + sg = v_reinterpret_as_s16(g); + sb = v_reinterpret_as_s16(b); - uint32x4_t v_X0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); - uint32x4_t v_Y0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); - uint32x4_t v_Z0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); - v_X0 = vshrq_n_u32(vaddq_u32(v_X0, v_delta), xyz_shift); - v_Y0 = vshrq_n_u32(vaddq_u32(v_Y0, v_delta), xyz_shift); - v_Z0 = vshrq_n_u32(vaddq_u32(v_Z0, v_delta), xyz_shift); + // fixing 16bit signed multiplication + v_int16 xmr, xmg, xmb; + v_int16 ymr, ymg, ymb; + v_int16 zmr, zmg, zmb; - v_s0 = vget_high_u16(v_src.val[0]), - v_s1 = vget_high_u16(v_src.val[1]), - v_s2 = vget_high_u16(v_src.val[2]); + v_int16 mr = sr < zero, mg = sg < zero, mb = sb < zero; - uint32x4_t v_X1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); - uint32x4_t v_Y1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); - uint32x4_t v_Z1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); - v_X1 = vshrq_n_u32(vaddq_u32(v_X1, v_delta), xyz_shift); - v_Y1 = vshrq_n_u32(vaddq_u32(v_Y1, v_delta), xyz_shift); - v_Z1 = vshrq_n_u32(vaddq_u32(v_Z1, v_delta), xyz_shift); + xmb = mb & vc0; + xmg = mg & vc1; + xmr = mr & vc2; + ymb = mb & vc3; + ymg = mg & vc4; + ymr = mr & vc5; + zmb = mb & vc6; + zmg = mg & vc7; + zmr = mr & vc8; - v_dst.val[0] = vcombine_u16(vqmovn_u32(v_X0), vqmovn_u32(v_X1)); - v_dst.val[1] = vcombine_u16(vqmovn_u32(v_Y0), vqmovn_u32(v_Y1)); - v_dst.val[2] = vcombine_u16(vqmovn_u32(v_Z0), vqmovn_u32(v_Z1)); + v_int32 xfix0, xfix1, yfix0, yfix1, zfix0, zfix1; + v_expand(xmr + xmg + xmb, xfix0, xfix1); + v_expand(ymr + ymg + ymb, yfix0, yfix1); + v_expand(zmr + zmg + zmb, zfix0, zfix1); - vst3q_u16(dst + i, v_dst); + xfix0 = xfix0 << 16; + xfix1 = xfix1 << 16; + yfix0 = yfix0 << 16; + yfix1 = yfix1 << 16; + zfix0 = zfix0 << 16; + zfix1 = zfix1 << 16; + + v_int16 bg0, bg1, rd0, rd1; + v_zip(sb, sg, bg0, bg1); + v_zip(sr, vdescale, rd0, rd1); + + v_uint32 x0, x1, y0, y1, z0, z1; + + x0 = v_reinterpret_as_u32(v_dotprod(bg0, cxbg) + v_dotprod(rd0, cxr1) + xfix0) >> shift; + x1 = v_reinterpret_as_u32(v_dotprod(bg1, cxbg) + v_dotprod(rd1, cxr1) + xfix1) >> shift; + y0 = v_reinterpret_as_u32(v_dotprod(bg0, cybg) + v_dotprod(rd0, cyr1) + yfix0) >> shift; + y1 = v_reinterpret_as_u32(v_dotprod(bg1, cybg) + v_dotprod(rd1, cyr1) + yfix1) >> shift; + z0 = v_reinterpret_as_u32(v_dotprod(bg0, czbg) + v_dotprod(rd0, czr1) + zfix0) >> shift; + z1 = v_reinterpret_as_u32(v_dotprod(bg1, czbg) + v_dotprod(rd1, czr1) + zfix1) >> shift; + + v_uint16 x, y, z; + x = v_pack(x0, x1); + y = v_pack(y0, y1); + z = v_pack(z0, z1); + + v_store_interleave(dst, x, y, z); } - - for ( ; i <= n - 12; i += 12, src += scn * 4) +#endif + for ( ; i < n; i++, src += scn, dst += 3) { - uint16x4x3_t v_dst; - uint16x4_t v_s0, v_s1, v_s2; - - if (scn == 3) - { - uint16x4x3_t v_src = vld3_u16(src); - v_s0 = v_src.val[0]; - v_s1 = v_src.val[1]; - v_s2 = v_src.val[2]; - } - else - { - uint16x4x4_t v_src = vld4_u16(src); - v_s0 = v_src.val[0]; - v_s1 = v_src.val[1]; - v_s2 = v_src.val[2]; - } - - uint32x4_t v_X = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); - uint32x4_t v_Y = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); - uint32x4_t v_Z = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); - - v_dst.val[0] = vqmovn_u32(vshrq_n_u32(vaddq_u32(v_X, v_delta), xyz_shift)); - v_dst.val[1] = vqmovn_u32(vshrq_n_u32(vaddq_u32(v_Y, v_delta), xyz_shift)); - v_dst.val[2] = vqmovn_u32(vshrq_n_u32(vaddq_u32(v_Z, v_delta), xyz_shift)); - - vst3_u16(dst + i, v_dst); - } - - for ( ; i < n; i += 3, src += scn) - { - int X = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, xyz_shift); - int Y = CV_DESCALE(src[0]*C3 + src[1]*C4 + src[2]*C5, xyz_shift); - int Z = CV_DESCALE(src[0]*C6 + src[1]*C7 + src[2]*C8, xyz_shift); - dst[i] = saturate_cast(X); - dst[i+1] = saturate_cast(Y); - dst[i+2] = saturate_cast(Z); + ushort b = src[0], g = src[1], r = src[2]; + int X = CV_DESCALE(b*C0 + g*C1 + r*C2, shift); + int Y = CV_DESCALE(b*C3 + g*C4 + r*C5, shift); + int Z = CV_DESCALE(b*C6 + g*C7 + r*C8, shift); + dst[0] = saturate_cast(X); + dst[1] = saturate_cast(Y); + dst[2] = saturate_cast(Z); } } int srccn, coeffs[9]; - uint16x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8; - uint32x4_t v_delta; }; -#endif template struct XYZ2RGB_f { @@ -693,7 +564,6 @@ template struct XYZ2RGB_f float coeffs[9]; }; -#if CV_SSE2 template <> struct XYZ2RGB_f @@ -711,113 +581,61 @@ struct XYZ2RGB_f std::swap(coeffs[1], coeffs[7]); std::swap(coeffs[2], coeffs[8]); } - - v_c0 = _mm_set1_ps(coeffs[0]); - v_c1 = _mm_set1_ps(coeffs[1]); - v_c2 = _mm_set1_ps(coeffs[2]); - v_c3 = _mm_set1_ps(coeffs[3]); - v_c4 = _mm_set1_ps(coeffs[4]); - v_c5 = _mm_set1_ps(coeffs[5]); - v_c6 = _mm_set1_ps(coeffs[6]); - v_c7 = _mm_set1_ps(coeffs[7]); - v_c8 = _mm_set1_ps(coeffs[8]); - - v_alpha = _mm_set1_ps(ColorChannel::max()); - - haveSIMD = checkHardwareSupport(CV_CPU_SSE2); - } - - void process(__m128 v_x, __m128 v_y, __m128 v_z, - __m128 & v_r, __m128 & v_g, __m128 & v_b) const - { - v_b = _mm_mul_ps(v_x, v_c0); - v_b = _mm_add_ps(v_b, _mm_mul_ps(v_y, v_c1)); - v_b = _mm_add_ps(v_b, _mm_mul_ps(v_z, v_c2)); - - v_g = _mm_mul_ps(v_x, v_c3); - v_g = _mm_add_ps(v_g, _mm_mul_ps(v_y, v_c4)); - v_g = _mm_add_ps(v_g, _mm_mul_ps(v_z, v_c5)); - - v_r = _mm_mul_ps(v_x, v_c6); - v_r = _mm_add_ps(v_r, _mm_mul_ps(v_y, v_c7)); - v_r = _mm_add_ps(v_r, _mm_mul_ps(v_z, v_c8)); } void operator()(const float* src, float* dst, int n) const { + CV_INSTRUMENT_REGION(); + int dcn = dstcn; float alpha = ColorChannel::max(); float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; - n *= 3; int i = 0; - - if (haveSIMD) +#if CV_SIMD + const int vsize = v_float32::nlanes; + v_float32 valpha = vx_setall_f32(alpha); + v_float32 vc0 = vx_setall_f32(C0), vc1 = vx_setall_f32(C1), vc2 = vx_setall_f32(C2); + v_float32 vc3 = vx_setall_f32(C3), vc4 = vx_setall_f32(C4), vc5 = vx_setall_f32(C5); + v_float32 vc6 = vx_setall_f32(C6), vc7 = vx_setall_f32(C7), vc8 = vx_setall_f32(C8); + for( ; i <= n-vsize; + i += vsize, src += 3*vsize, dst += dcn*vsize) { - for ( ; i <= n - 24; i += 24, dst += 8 * dcn) + v_float32 x, y, z; + v_load_deinterleave(src, x, y, z); + + v_float32 b, g, r; + b = v_fma(x, vc0, v_fma(y, vc1, z*vc2)); + g = v_fma(x, vc3, v_fma(y, vc4, z*vc5)); + r = v_fma(x, vc6, v_fma(y, vc7, z*vc8)); + + if(dcn == 4) { - __m128 v_x0 = _mm_loadu_ps(src + i); - __m128 v_x1 = _mm_loadu_ps(src + i + 4); - __m128 v_y0 = _mm_loadu_ps(src + i + 8); - __m128 v_y1 = _mm_loadu_ps(src + i + 12); - __m128 v_z0 = _mm_loadu_ps(src + i + 16); - __m128 v_z1 = _mm_loadu_ps(src + i + 20); - - _mm_deinterleave_ps(v_x0, v_x1, v_y0, v_y1, v_z0, v_z1); - - __m128 v_r0, v_g0, v_b0; - process(v_x0, v_y0, v_z0, - v_r0, v_g0, v_b0); - - __m128 v_r1, v_g1, v_b1; - process(v_x1, v_y1, v_z1, - v_r1, v_g1, v_b1); - - __m128 v_a0 = v_alpha, v_a1 = v_alpha; - - if (dcn == 4) - _mm_interleave_ps(v_b0, v_b1, v_g0, v_g1, - v_r0, v_r1, v_a0, v_a1); - else - _mm_interleave_ps(v_b0, v_b1, v_g0, v_g1, v_r0, v_r1); - - _mm_storeu_ps(dst, v_b0); - _mm_storeu_ps(dst + 4, v_b1); - _mm_storeu_ps(dst + 8, v_g0); - _mm_storeu_ps(dst + 12, v_g1); - _mm_storeu_ps(dst + 16, v_r0); - _mm_storeu_ps(dst + 20, v_r1); - - if (dcn == 4) - { - _mm_storeu_ps(dst + 24, v_a0); - _mm_storeu_ps(dst + 28, v_a1); - } + v_store_interleave(dst, b, g, r, valpha); + } + else // dcn == 3 + { + v_store_interleave(dst, b, g, r); } - } - - for( ; i < n; i += 3, dst += dcn) +#endif + for( ; i < n; i++, src += 3, dst += dcn) { - float B = src[i]*C0 + src[i+1]*C1 + src[i+2]*C2; - float G = src[i]*C3 + src[i+1]*C4 + src[i+2]*C5; - float R = src[i]*C6 + src[i+1]*C7 + src[i+2]*C8; + float x = src[0], y = src[1], z = src[2]; + float B = saturate_cast(x*C0 + y*C1 + z*C2); + float G = saturate_cast(x*C3 + y*C4 + z*C5); + float R = saturate_cast(x*C6 + y*C7 + z*C8); dst[0] = B; dst[1] = G; dst[2] = R; if( dcn == 4 ) dst[3] = alpha; } } + int dstcn, blueIdx; float coeffs[9]; - - __m128 v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8; - __m128 v_alpha; - bool haveSIMD; }; -#endif // CV_SSE2 - template struct XYZ2RGB_i { @@ -859,18 +677,18 @@ template struct XYZ2RGB_i int coeffs[9]; }; -#if CV_NEON template <> struct XYZ2RGB_i { typedef uchar channel_type; + static const int shift = xyz_shift; XYZ2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs) : dstcn(_dstcn), blueIdx(_blueIdx) { for(int i = 0; i < 9; i++) - coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : XYZ2sRGB_D65_i[i]; + coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << shift)) : XYZ2sRGB_D65_i[i]; if(blueIdx == 0) { @@ -878,87 +696,90 @@ struct XYZ2RGB_i std::swap(coeffs[1], coeffs[7]); std::swap(coeffs[2], coeffs[8]); } - - v_c0 = vdup_n_s16(coeffs[0]); - v_c1 = vdup_n_s16(coeffs[1]); - v_c2 = vdup_n_s16(coeffs[2]); - v_c3 = vdup_n_s16(coeffs[3]); - v_c4 = vdup_n_s16(coeffs[4]); - v_c5 = vdup_n_s16(coeffs[5]); - v_c6 = vdup_n_s16(coeffs[6]); - v_c7 = vdup_n_s16(coeffs[7]); - v_c8 = vdup_n_s16(coeffs[8]); - v_delta = vdupq_n_s32(1 << (xyz_shift - 1)); - v_alpha = vmovn_u16(vdupq_n_u16(ColorChannel::max())); } void operator()(const uchar* src, uchar* dst, int n) const { + CV_INSTRUMENT_REGION(); + int dcn = dstcn, i = 0; uchar alpha = ColorChannel::max(); int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; - n *= 3; +#if CV_SIMD + const int vsize = v_uint8::nlanes; + const int descaleShift = 1 << (shift - 1); + v_uint8 valpha = vx_setall_u8(alpha); + v_int16 vdescale = vx_setall_s16(descaleShift); + v_int16 cbxy, cbz1, cgxy, cgz1, crxy, crz1; + v_int16 dummy; + v_zip(vx_setall_s16((short)C0), vx_setall_s16((short)C1), cbxy, dummy); + v_zip(vx_setall_s16((short)C2), vx_setall_s16( 1), cbz1, dummy); + v_zip(vx_setall_s16((short)C3), vx_setall_s16((short)C4), cgxy, dummy); + v_zip(vx_setall_s16((short)C5), vx_setall_s16( 1), cgz1, dummy); + v_zip(vx_setall_s16((short)C6), vx_setall_s16((short)C7), crxy, dummy); + v_zip(vx_setall_s16((short)C8), vx_setall_s16( 1), crz1, dummy); - for ( ; i <= n - 24; i += 24, dst += dcn * 8) + for ( ; i <= n-vsize; + i += vsize, src += 3*vsize, dst += dcn*vsize) { - uint8x8x3_t v_src = vld3_u8(src + i); - int16x8x3_t v_src16; - v_src16.val[0] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[0])); - v_src16.val[1] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[1])); - v_src16.val[2] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[2])); + v_uint8 x, y, z; + v_load_deinterleave(src, x, y, z); - int16x4_t v_s0 = vget_low_s16(v_src16.val[0]), - v_s1 = vget_low_s16(v_src16.val[1]), - v_s2 = vget_low_s16(v_src16.val[2]); + v_uint16 ux0, ux1, uy0, uy1, uz0, uz1; + v_expand(x, ux0, ux1); + v_expand(y, uy0, uy1); + v_expand(z, uz0, uz1); + v_int16 x0, x1, y0, y1, z0, z1; + x0 = v_reinterpret_as_s16(ux0); + x1 = v_reinterpret_as_s16(ux1); + y0 = v_reinterpret_as_s16(uy0); + y1 = v_reinterpret_as_s16(uy1); + z0 = v_reinterpret_as_s16(uz0); + z1 = v_reinterpret_as_s16(uz1); - int32x4_t v_X0 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); - int32x4_t v_Y0 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); - int32x4_t v_Z0 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); - v_X0 = vshrq_n_s32(vaddq_s32(v_X0, v_delta), xyz_shift); - v_Y0 = vshrq_n_s32(vaddq_s32(v_Y0, v_delta), xyz_shift); - v_Z0 = vshrq_n_s32(vaddq_s32(v_Z0, v_delta), xyz_shift); + v_int32 b[4], g[4], r[4]; - v_s0 = vget_high_s16(v_src16.val[0]), - v_s1 = vget_high_s16(v_src16.val[1]), - v_s2 = vget_high_s16(v_src16.val[2]); + v_int16 xy[4], zd[4]; + v_zip(x0, y0, xy[0], xy[1]); + v_zip(x1, y1, xy[2], xy[3]); + v_zip(z0, vdescale, zd[0], zd[1]); + v_zip(z1, vdescale, zd[2], zd[3]); - int32x4_t v_X1 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); - int32x4_t v_Y1 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); - int32x4_t v_Z1 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); - v_X1 = vshrq_n_s32(vaddq_s32(v_X1, v_delta), xyz_shift); - v_Y1 = vshrq_n_s32(vaddq_s32(v_Y1, v_delta), xyz_shift); - v_Z1 = vshrq_n_s32(vaddq_s32(v_Z1, v_delta), xyz_shift); - - uint8x8_t v_b = vqmovun_s16(vcombine_s16(vqmovn_s32(v_X0), vqmovn_s32(v_X1))); - uint8x8_t v_g = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Y0), vqmovn_s32(v_Y1))); - uint8x8_t v_r = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Z0), vqmovn_s32(v_Z1))); - - if (dcn == 3) + for(int j = 0; j < 4; j++) { - uint8x8x3_t v_dst; - v_dst.val[0] = v_b; - v_dst.val[1] = v_g; - v_dst.val[2] = v_r; - vst3_u8(dst, v_dst); + b[j] = (v_dotprod(xy[j], cbxy) + v_dotprod(zd[j], cbz1)) >> shift; + g[j] = (v_dotprod(xy[j], cgxy) + v_dotprod(zd[j], cgz1)) >> shift; + r[j] = (v_dotprod(xy[j], crxy) + v_dotprod(zd[j], crz1)) >> shift; } - else + + v_uint16 b0, b1, g0, g1, r0, r1; + b0 = v_pack_u(b[0], b[1]); b1 = v_pack_u(b[2], b[3]); + g0 = v_pack_u(g[0], g[1]); g1 = v_pack_u(g[2], g[3]); + r0 = v_pack_u(r[0], r[1]); r1 = v_pack_u(r[2], r[3]); + + v_uint8 bb, gg, rr; + bb = v_pack(b0, b1); + gg = v_pack(g0, g1); + rr = v_pack(r0, r1); + + if(dcn == 4) { - uint8x8x4_t v_dst; - v_dst.val[0] = v_b; - v_dst.val[1] = v_g; - v_dst.val[2] = v_r; - v_dst.val[3] = v_alpha; - vst4_u8(dst, v_dst); + v_store_interleave(dst, bb, gg, rr, valpha); + } + else // dcn == 3 + { + v_store_interleave(dst, bb, gg, rr); } } - - for ( ; i < n; i += 3, dst += dcn) +#endif + for ( ; i < n; i++, src += 3, dst += dcn) { - int B = CV_DESCALE(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2, xyz_shift); - int G = CV_DESCALE(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5, xyz_shift); - int R = CV_DESCALE(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8, xyz_shift); + uchar x = src[0], y = src[1], z = src[2]; + int B = CV_DESCALE(x*C0 + y*C1 + z*C2, shift); + int G = CV_DESCALE(x*C3 + y*C4 + z*C5, shift); + int R = CV_DESCALE(x*C6 + y*C7 + z*C8, shift); dst[0] = saturate_cast(B); dst[1] = saturate_cast(G); dst[2] = saturate_cast(R); if( dcn == 4 ) @@ -967,22 +788,20 @@ struct XYZ2RGB_i } int dstcn, blueIdx; int coeffs[9]; - - int16x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8; - uint8x8_t v_alpha; - int32x4_t v_delta; }; + template <> struct XYZ2RGB_i { typedef ushort channel_type; + static const int shift = xyz_shift; XYZ2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs) : dstcn(_dstcn), blueIdx(_blueIdx) { for(int i = 0; i < 9; i++) - coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : XYZ2sRGB_D65_i[i]; + coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << shift)) : XYZ2sRGB_D65_i[i]; if(blueIdx == 0) { @@ -990,120 +809,104 @@ struct XYZ2RGB_i std::swap(coeffs[1], coeffs[7]); std::swap(coeffs[2], coeffs[8]); } - - v_c0 = vdupq_n_s32(coeffs[0]); - v_c1 = vdupq_n_s32(coeffs[1]); - v_c2 = vdupq_n_s32(coeffs[2]); - v_c3 = vdupq_n_s32(coeffs[3]); - v_c4 = vdupq_n_s32(coeffs[4]); - v_c5 = vdupq_n_s32(coeffs[5]); - v_c6 = vdupq_n_s32(coeffs[6]); - v_c7 = vdupq_n_s32(coeffs[7]); - v_c8 = vdupq_n_s32(coeffs[8]); - v_delta = vdupq_n_s32(1 << (xyz_shift - 1)); - v_alpha = vdupq_n_u16(ColorChannel::max()); - v_alpha2 = vget_low_u16(v_alpha); } void operator()(const ushort* src, ushort* dst, int n) const { + CV_INSTRUMENT_REGION(); + int dcn = dstcn, i = 0; ushort alpha = ColorChannel::max(); int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; - n *= 3; +#if CV_SIMD + const int vsize = v_uint16::nlanes; + const int descaleShift = 1 << (shift-1); + v_uint16 valpha = vx_setall_u16(alpha); + v_int16 vdescale = vx_setall_s16(descaleShift); + v_int16 vc0 = vx_setall_s16((short)C0), vc1 = vx_setall_s16((short)C1), vc2 = vx_setall_s16((short)C2); + v_int16 vc3 = vx_setall_s16((short)C3), vc4 = vx_setall_s16((short)C4), vc5 = vx_setall_s16((short)C5); + v_int16 vc6 = vx_setall_s16((short)C6), vc7 = vx_setall_s16((short)C7), vc8 = vx_setall_s16((short)C8); + v_int16 zero = vx_setzero_s16(), one = vx_setall_s16(1); + v_int16 cbxy, cbz1, cgxy, cgz1, crxy, crz1; + v_int16 dummy; + v_zip(vc0, vc1, cbxy, dummy); + v_zip(vc2, one, cbz1, dummy); + v_zip(vc3, vc4, cgxy, dummy); + v_zip(vc5, one, cgz1, dummy); + v_zip(vc6, vc7, crxy, dummy); + v_zip(vc8, one, crz1, dummy); - for ( ; i <= n - 24; i += 24, dst += dcn * 8) + for( ; i <= n-vsize; + i += vsize, src += 3*vsize, dst += dcn*vsize) { - uint16x8x3_t v_src = vld3q_u16(src + i); - int32x4_t v_s0 = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[0]))), - v_s1 = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[1]))), - v_s2 = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[2]))); + v_uint16 x, y, z; + v_load_deinterleave(src, x, y, z); - int32x4_t v_X0 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); - int32x4_t v_Y0 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); - int32x4_t v_Z0 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); - v_X0 = vshrq_n_s32(vaddq_s32(v_X0, v_delta), xyz_shift); - v_Y0 = vshrq_n_s32(vaddq_s32(v_Y0, v_delta), xyz_shift); - v_Z0 = vshrq_n_s32(vaddq_s32(v_Z0, v_delta), xyz_shift); + v_int16 sx, sy, sz; + sx = v_reinterpret_as_s16(x); + sy = v_reinterpret_as_s16(y); + sz = v_reinterpret_as_s16(z); - v_s0 = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[0]))); - v_s1 = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[1]))); - v_s2 = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[2]))); + // fixing 16bit signed multiplication + v_int16 mx = sx < zero, my = sy < zero, mz = sz < zero; - int32x4_t v_X1 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); - int32x4_t v_Y1 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); - int32x4_t v_Z1 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); - v_X1 = vshrq_n_s32(vaddq_s32(v_X1, v_delta), xyz_shift); - v_Y1 = vshrq_n_s32(vaddq_s32(v_Y1, v_delta), xyz_shift); - v_Z1 = vshrq_n_s32(vaddq_s32(v_Z1, v_delta), xyz_shift); + v_int16 bmx, bmy, bmz; + v_int16 gmx, gmy, gmz; + v_int16 rmx, rmy, rmz; - uint16x8_t v_b = vcombine_u16(vqmovun_s32(v_X0), vqmovun_s32(v_X1)); - uint16x8_t v_g = vcombine_u16(vqmovun_s32(v_Y0), vqmovun_s32(v_Y1)); - uint16x8_t v_r = vcombine_u16(vqmovun_s32(v_Z0), vqmovun_s32(v_Z1)); + bmx = mx & vc0; + bmy = my & vc1; + bmz = mz & vc2; + gmx = mx & vc3; + gmy = my & vc4; + gmz = mz & vc5; + rmx = mx & vc6; + rmy = my & vc7; + rmz = mz & vc8; - if (dcn == 3) + v_int32 bfix0, bfix1, gfix0, gfix1, rfix0, rfix1; + v_expand(bmx + bmy + bmz, bfix0, bfix1); + v_expand(gmx + gmy + gmz, gfix0, gfix1); + v_expand(rmx + rmy + rmz, rfix0, rfix1); + + bfix0 = bfix0 << 16; bfix1 = bfix1 << 16; + gfix0 = gfix0 << 16; gfix1 = gfix1 << 16; + rfix0 = rfix0 << 16; rfix1 = rfix1 << 16; + + v_int16 xy0, xy1, zd0, zd1; + v_zip(sx, sy, xy0, xy1); + v_zip(sz, vdescale, zd0, zd1); + + v_int32 b0, b1, g0, g1, r0, r1; + + b0 = (v_dotprod(xy0, cbxy) + v_dotprod(zd0, cbz1) + bfix0) >> shift; + b1 = (v_dotprod(xy1, cbxy) + v_dotprod(zd1, cbz1) + bfix1) >> shift; + g0 = (v_dotprod(xy0, cgxy) + v_dotprod(zd0, cgz1) + gfix0) >> shift; + g1 = (v_dotprod(xy1, cgxy) + v_dotprod(zd1, cgz1) + gfix1) >> shift; + r0 = (v_dotprod(xy0, crxy) + v_dotprod(zd0, crz1) + rfix0) >> shift; + r1 = (v_dotprod(xy1, crxy) + v_dotprod(zd1, crz1) + rfix1) >> shift; + + v_uint16 b, g, r; + b = v_pack_u(b0, b1); g = v_pack_u(g0, g1); r = v_pack_u(r0, r1); + + if(dcn == 4) { - uint16x8x3_t v_dst; - v_dst.val[0] = v_b; - v_dst.val[1] = v_g; - v_dst.val[2] = v_r; - vst3q_u16(dst, v_dst); + v_store_interleave(dst, b, g, r, valpha); } - else + else // dcn == 3 { - uint16x8x4_t v_dst; - v_dst.val[0] = v_b; - v_dst.val[1] = v_g; - v_dst.val[2] = v_r; - v_dst.val[3] = v_alpha; - vst4q_u16(dst, v_dst); + v_store_interleave(dst, b, g, r); } } - - for ( ; i <= n - 12; i += 12, dst += dcn * 4) +#endif + for ( ; i < n; i++, src += 3, dst += dcn) { - uint16x4x3_t v_src = vld3_u16(src + i); - int32x4_t v_s0 = vreinterpretq_s32_u32(vmovl_u16(v_src.val[0])), - v_s1 = vreinterpretq_s32_u32(vmovl_u16(v_src.val[1])), - v_s2 = vreinterpretq_s32_u32(vmovl_u16(v_src.val[2])); - - int32x4_t v_X = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); - int32x4_t v_Y = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); - int32x4_t v_Z = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); - v_X = vshrq_n_s32(vaddq_s32(v_X, v_delta), xyz_shift); - v_Y = vshrq_n_s32(vaddq_s32(v_Y, v_delta), xyz_shift); - v_Z = vshrq_n_s32(vaddq_s32(v_Z, v_delta), xyz_shift); - - uint16x4_t v_b = vqmovun_s32(v_X); - uint16x4_t v_g = vqmovun_s32(v_Y); - uint16x4_t v_r = vqmovun_s32(v_Z); - - if (dcn == 3) - { - uint16x4x3_t v_dst; - v_dst.val[0] = v_b; - v_dst.val[1] = v_g; - v_dst.val[2] = v_r; - vst3_u16(dst, v_dst); - } - else - { - uint16x4x4_t v_dst; - v_dst.val[0] = v_b; - v_dst.val[1] = v_g; - v_dst.val[2] = v_r; - v_dst.val[3] = v_alpha2; - vst4_u16(dst, v_dst); - } - } - - for ( ; i < n; i += 3, dst += dcn) - { - int B = CV_DESCALE(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2, xyz_shift); - int G = CV_DESCALE(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5, xyz_shift); - int R = CV_DESCALE(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8, xyz_shift); + ushort x = src[0], y = src[1], z = src[2]; + int B = CV_DESCALE(x*C0 + y*C1 + z*C2, shift); + int G = CV_DESCALE(x*C3 + y*C4 + z*C5, shift); + int R = CV_DESCALE(x*C6 + y*C7 + z*C8, shift); dst[0] = saturate_cast(B); dst[1] = saturate_cast(G); dst[2] = saturate_cast(R); if( dcn == 4 ) @@ -1112,16 +915,8 @@ struct XYZ2RGB_i } int dstcn, blueIdx; int coeffs[9]; - - int32x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8, v_delta; - uint16x4_t v_alpha2; - uint16x8_t v_alpha; }; -#endif - - - ///////////////////////////////////// RGB <-> L*a*b* ///////////////////////////////////// @@ -1482,8 +1277,8 @@ static void initLabTabs() y = cvRound(fy*fy*fy/softfloat(LUT_BASE*LUT_BASE)); } - LabToYF_b[i*2 ] = (ushort)y; // 2260 <= y <= BASE - LabToYF_b[i*2+1] = (ushort)ify; // 0 <= ify <= BASE + LabToYF_b[i*2 ] = (ushort)y; // 0 <= y <= BASE + LabToYF_b[i*2+1] = (ushort)ify; // 2260 <= ify <= BASE } //Lookup table for a,b to x,z conversion @@ -1563,7 +1358,7 @@ static inline void trilinearInterpolate(int cx, int cy, int cz, const int16_t* L c = CV_DESCALE(c, trilinear_shift*3); } -#if CV_SIMD128 +#if CV_SIMD_WIDTH == 16 // 8 inValues are in [0; LAB_BASE] static inline void trilinearPackedInterpolate(const v_uint16x8& inX, const v_uint16x8& inY, const v_uint16x8& inZ, @@ -1652,7 +1447,93 @@ static inline void trilinearPackedInterpolate(const v_uint16x8& inX, const v_uin #undef DOT_SHIFT_PACK } -#endif // CV_SIMD128 +#elif CV_SIMD + +// inValues are in [0; LAB_BASE] +static inline void trilinearPackedInterpolate(const v_uint16& inX, const v_uint16& inY, const v_uint16& inZ, + const int16_t* LUT, + v_uint16& outA, v_uint16& outB, v_uint16& outC) +{ + const int vsize = v_uint16::nlanes; + + // LUT idx of origin pt of cube + v_uint16 tx = inX >> (lab_base_shift - lab_lut_shift); + v_uint16 ty = inY >> (lab_base_shift - lab_lut_shift); + v_uint16 tz = inZ >> (lab_base_shift - lab_lut_shift); + + v_uint32 btmp00, btmp01, btmp10, btmp11, btmp20, btmp21; + v_uint32 baseIdx0, baseIdx1; + // baseIdx = tx*(3*8)+ty*(3*8*LAB_LUT_DIM)+tz*(3*8*LAB_LUT_DIM*LAB_LUT_DIM) + v_mul_expand(tx, vx_setall_u16(3*8), btmp00, btmp01); + v_mul_expand(ty, vx_setall_u16(3*8*LAB_LUT_DIM), btmp10, btmp11); + v_mul_expand(tz, vx_setall_u16(3*8*LAB_LUT_DIM*LAB_LUT_DIM), btmp20, btmp21); + baseIdx0 = btmp00 + btmp10 + btmp20; + baseIdx1 = btmp01 + btmp11 + btmp21; + + uint32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) vbaseIdx[vsize]; + v_store_aligned(vbaseIdx + 0*vsize/2, baseIdx0); + v_store_aligned(vbaseIdx + 1*vsize/2, baseIdx1); + + // fracX, fracY, fracZ are [0; TRILINEAR_BASE) + const uint16_t bitMask = (1 << trilinear_shift) - 1; + v_uint16 bitMaskReg = vx_setall_u16(bitMask); + v_uint16 fracX = (inX >> (lab_base_shift - 8 - 1)) & bitMaskReg; + v_uint16 fracY = (inY >> (lab_base_shift - 8 - 1)) & bitMaskReg; + v_uint16 fracZ = (inZ >> (lab_base_shift - 8 - 1)) & bitMaskReg; + + // trilinearIdx = 8*x + 8*TRILINEAR_BASE*y + 8*TRILINEAR_BASE*TRILINEAR_BASE*z + v_uint32 trilinearIdx0, trilinearIdx1; + v_uint32 fracX0, fracX1, fracY0, fracY1, fracZ0, fracZ1; + v_expand(fracX, fracX0, fracX1); + v_expand(fracY, fracY0, fracY1); + v_expand(fracZ, fracZ0, fracZ1); + + trilinearIdx0 = (fracX0 << 3) + (fracY0 << (3+trilinear_shift)) + (fracZ0 << (3+trilinear_shift*2)); + trilinearIdx1 = (fracX1 << 3) + (fracY1 << (3+trilinear_shift)) + (fracZ1 << (3+trilinear_shift*2)); + + uint32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) vtrilinearIdx[vsize]; + v_store_aligned(vtrilinearIdx + 0*vsize/2, trilinearIdx0); + v_store_aligned(vtrilinearIdx + 1*vsize/2, trilinearIdx1); + + v_uint32 a0, a1, b0, b1, c0, c1; + + uint32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) va[vsize], vb[vsize], vc[vsize]; + for(int j = 0; j < vsize; j++) + { + const int16_t* baseLUT = LUT + vbaseIdx[j]; + + v_int16x8 aa, bb, cc; + aa = v_load(baseLUT); + bb = v_load(baseLUT + 8); + cc = v_load(baseLUT + 16); + + v_int16x8 w = v_load(trilinearLUT + vtrilinearIdx[j]); + + va[j] = v_reduce_sum(v_dotprod(aa, w)); + vb[j] = v_reduce_sum(v_dotprod(bb, w)); + vc[j] = v_reduce_sum(v_dotprod(cc, w)); + } + + a0 = vx_load_aligned(va + 0*vsize/2); + a1 = vx_load_aligned(va + 1*vsize/2); + b0 = vx_load_aligned(vb + 0*vsize/2); + b1 = vx_load_aligned(vb + 1*vsize/2); + c0 = vx_load_aligned(vc + 0*vsize/2); + c1 = vx_load_aligned(vc + 1*vsize/2); + + // CV_DESCALE + const v_uint32 descaleShift = vx_setall_u32(1 << (trilinear_shift*3 - 1)); + a0 = (a0 + descaleShift) >> (trilinear_shift*3); + a1 = (a1 + descaleShift) >> (trilinear_shift*3); + b0 = (b0 + descaleShift) >> (trilinear_shift*3); + b1 = (b1 + descaleShift) >> (trilinear_shift*3); + c0 = (c0 + descaleShift) >> (trilinear_shift*3); + c1 = (c1 + descaleShift) >> (trilinear_shift*3); + + outA = v_pack(a0, a1); outB = v_pack(b0, b1); outC = v_pack(c0, c1); +} + +#endif // CV_SIMD struct RGB2Lab_b @@ -1663,7 +1544,6 @@ struct RGB2Lab_b const float* _whitept, bool _srgb) : srccn(_srccn), srgb(_srgb) { - static volatile int _3 = 3; initLabTabs(); softdouble whitePt[3]; @@ -1674,7 +1554,7 @@ struct RGB2Lab_b whitePt[i] = D65[i]; static const softdouble lshift(1 << lab_shift); - for( int i = 0; i < _3; i++ ) + for( int i = 0; i < 3; i++ ) { softdouble c[3]; for(int j = 0; j < 3; j++) @@ -1693,6 +1573,8 @@ struct RGB2Lab_b void operator()(const uchar* src, uchar* dst, int n) const { + CV_INSTRUMENT_REGION(); + const int Lscale = (116*255+50)/100; const int Lshift = -((16*255*(1 << lab_shift2) + 50)/100); const ushort* tab = srgb ? sRGBGammaTab_b : linearGammaTab_b; @@ -1700,10 +1582,158 @@ struct RGB2Lab_b int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; - n *= 3; i = 0; - for(; i < n; i += 3, src += scn ) + +#if CV_SIMD + const int vsize = v_uint8::nlanes; + const int xyzDescaleShift = 1 << (lab_shift - 1); + v_int16 vXYZdescale = vx_setall_s16(xyzDescaleShift); + v_int16 cxrg, cxb1, cyrg, cyb1, czrg, czb1; + v_int16 dummy; + v_zip(vx_setall_s16((short)C0), vx_setall_s16((short)C1), cxrg, dummy); + v_zip(vx_setall_s16((short)C2), vx_setall_s16( 1), cxb1, dummy); + v_zip(vx_setall_s16((short)C3), vx_setall_s16((short)C4), cyrg, dummy); + v_zip(vx_setall_s16((short)C5), vx_setall_s16( 1), cyb1, dummy); + v_zip(vx_setall_s16((short)C6), vx_setall_s16((short)C7), czrg, dummy); + v_zip(vx_setall_s16((short)C8), vx_setall_s16( 1), czb1, dummy); + const int labDescaleShift = 1 << (lab_shift2 - 1); + + for( ; i <= n - vsize; + i += vsize , src += scn*vsize, dst += 3*vsize) + { + v_uint8 R, G, B, A; + if(scn == 4) + { + v_load_deinterleave(src, R, G, B, A); + } + else // scn == 3 + { + v_load_deinterleave(src, R, G, B); + } + + // gamma substitution using tab + v_uint16 drgb[6]; + // [0 1 2 3 4 5 6] => [R0 R1 G0 G1 B0 B1] + v_expand(R, drgb[0], drgb[1]); + v_expand(G, drgb[2], drgb[3]); + v_expand(B, drgb[4], drgb[5]); + + // [0 1 2 3 4 5 6 7 8 9 10 11 12] => [4 per R, 4 per G, 4 per B] + v_uint32 qrgb[12]; + for(int k = 0; k < 6; k++) + { + v_expand(drgb[k], qrgb[k*2+0], qrgb[k*2+1]); + } + + uint32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) vdrgb[vsize*3]; + for(int k = 0; k < 12; k++) + { + v_store_aligned(vdrgb + k*vsize/4, qrgb[k]); + } + + v_uint16 trgb[6]; + for(int k = 0; k < 6; k++) + { + trgb[k] = vx_lut(tab, (const int*)vdrgb + k*vsize/2); + } + + v_int16 rgbs[6]; + for(int k = 0; k < 6; k++) + { + rgbs[k] = v_reinterpret_as_s16(trgb[k]); + } + v_int16 sB0, sB1, sG0, sG1, sR0, sR1; + sR0 = rgbs[0]; sR1 = rgbs[1]; + sG0 = rgbs[2]; sG1 = rgbs[3]; + sB0 = rgbs[4]; sB1 = rgbs[5]; + + v_int16 rg[4], bd[4]; + v_zip(sR0, sG0, rg[0], rg[1]); + v_zip(sR1, sG1, rg[2], rg[3]); + v_zip(sB0, vXYZdescale, bd[0], bd[1]); + v_zip(sB1, vXYZdescale, bd[2], bd[3]); + + // [X, Y, Z] = CV_DESCALE(R*C_ + G*C_ + B*C_, lab_shift) + v_uint32 x[4], y[4], z[4]; + for(int j = 0; j < 4; j++) + { + x[j] = v_reinterpret_as_u32(v_dotprod(rg[j], cxrg) + v_dotprod(bd[j], cxb1)) >> lab_shift; + y[j] = v_reinterpret_as_u32(v_dotprod(rg[j], cyrg) + v_dotprod(bd[j], cyb1)) >> lab_shift; + z[j] = v_reinterpret_as_u32(v_dotprod(rg[j], czrg) + v_dotprod(bd[j], czb1)) >> lab_shift; + } + + // [fX, fY, fZ] = LabCbrtTab_b[vx, vy, vz] + // [4 per X, 4 per Y, 4 per Z] + uint32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) vxyz[vsize*3]; + for(int j = 0; j < 4; j++) + { + v_store_aligned(vxyz + (0*4+j)*vsize/4, x[j]); + v_store_aligned(vxyz + (1*4+j)*vsize/4, y[j]); + v_store_aligned(vxyz + (2*4+j)*vsize/4, z[j]); + } + // [X0, X1, Y0, Y1, Z0, Z1] + v_uint16 fxyz[2*3]; + for(int j = 0; j < 2*3; j++) + { + fxyz[j] = vx_lut(LabCbrtTab_b, (const int*)vxyz + j*vsize/2); + } + + v_int16 fX0, fX1, fY0, fY1, fZ0, fZ1; + fX0 = v_reinterpret_as_s16(fxyz[0]), fX1 = v_reinterpret_as_s16(fxyz[1]); + fY0 = v_reinterpret_as_s16(fxyz[2]), fY1 = v_reinterpret_as_s16(fxyz[3]); + fZ0 = v_reinterpret_as_s16(fxyz[4]), fZ1 = v_reinterpret_as_s16(fxyz[5]); + + v_uint16 Ldiff0 = fxyz[2], Ldiff1 = fxyz[3]; + + v_uint8 L, a, b; + + // L = (Lscale*Ldiff + (Lshift + labDescaleShift)) >> lab_shift2; + v_uint32 vL[4]; + v_uint16 vLscale = vx_setall_u16(Lscale); + v_mul_expand(Ldiff0, vLscale, vL[0], vL[1]); + v_mul_expand(Ldiff1, vLscale, vL[2], vL[3]); + v_uint32 vLshift = vx_setall_u32((uint32_t)(Lshift + labDescaleShift)); + for(int k = 0; k < 4; k++) + { + vL[k] = (vL[k] + vLshift) >> lab_shift2; + } + v_uint16 L0, L1; + L0 = v_pack(vL[0], vL[1]); + L1 = v_pack(vL[2], vL[3]); + + L = v_pack(L0, L1); + + // a = (500*(fX - fY) + (128*(1 << lab_shift2) + labDescaleShift)) >> lab_shift2; + // b = (200*(fY - fZ) + (128*(1 << lab_shift2) + labDescaleShift)) >> lab_shift2; + v_int16 adiff0 = v_sub_wrap(fX0, fY0), adiff1 = v_sub_wrap(fX1, fY1); + v_int16 bdiff0 = v_sub_wrap(fY0, fZ0), bdiff1 = v_sub_wrap(fY1, fZ1); + + // [4 for a, 4 for b] + v_int32 ab[8]; + v_int16 v500 = vx_setall_s16(500); + v_mul_expand(adiff0, v500, ab[0], ab[1]); + v_mul_expand(adiff1, v500, ab[2], ab[3]); + v_int16 v200 = vx_setall_s16(200); + v_mul_expand(bdiff0, v200, ab[4], ab[5]); + v_mul_expand(bdiff1, v200, ab[6], ab[7]); + v_int32 abShift = vx_setall_s32(128*(1 << lab_shift2) + labDescaleShift); + for(int k = 0; k < 8; k++) + { + ab[k] = (ab[k] + abShift) >> lab_shift2; + } + v_int16 a0, a1, b0, b1; + a0 = v_pack(ab[0], ab[1]); a1 = v_pack(ab[2], ab[3]); + b0 = v_pack(ab[4], ab[5]); b1 = v_pack(ab[6], ab[7]); + + a = v_pack_u(a0, a1); + b = v_pack_u(b0, b1); + + v_store_interleave(dst, L, a, b); + } +#endif + + for(; i < n; i++, src += scn, dst += 3 ) { int R = tab[src[0]], G = tab[src[1]], B = tab[src[2]]; int fX = LabCbrtTab_b[CV_DESCALE(R*C0 + G*C1 + B*C2, lab_shift)]; @@ -1714,9 +1744,9 @@ struct RGB2Lab_b int a = CV_DESCALE( 500*(fX - fY) + 128*(1 << lab_shift2), lab_shift2 ); int b = CV_DESCALE( 200*(fY - fZ) + 128*(1 << lab_shift2), lab_shift2 ); - dst[i] = saturate_cast(L); - dst[i+1] = saturate_cast(a); - dst[i+2] = saturate_cast(b); + dst[0] = saturate_cast(L); + dst[1] = saturate_cast(a); + dst[2] = saturate_cast(b); } } @@ -1734,7 +1764,6 @@ struct RGB2Lab_f const float* _whitept, bool _srgb) : srccn(_srccn), srgb(_srgb), blueIdx(_blueIdx) { - volatile int _3 = 3; initLabTabs(); useInterpolation = (!_coeffs && !_whitept && srgb && enableRGB2LabInterpolation); @@ -1750,7 +1779,7 @@ struct RGB2Lab_f softdouble::one(), softdouble::one() / whitePt[2] }; - for( int i = 0; i < _3; i++ ) + for( int i = 0; i < 3; i++ ) { softfloat c[3]; for(int k = 0; k < 3; k++) @@ -1769,44 +1798,47 @@ struct RGB2Lab_f void operator()(const float* src, float* dst, int n) const { - int i, scn = srccn, bIdx = blueIdx; + CV_INSTRUMENT_REGION(); + + int scn = srccn, bIdx = blueIdx; float gscale = GammaTabScale; const float* gammaTab = srgb ? sRGBGammaTab : 0; float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; - n *= 3; - i = 0; if(useInterpolation) { + int i = 0; + n *= 3; -#if CV_SIMD128 +#if CV_SIMD if(enablePackedLab) { - static const int nPixels = 4*2; + const int vsize = v_float32::nlanes; + static const int nPixels = vsize*2; for(; i < n - 3*nPixels; i += 3*nPixels, src += scn*nPixels) { - v_float32x4 rvec0, gvec0, bvec0, rvec1, gvec1, bvec1; - v_float32x4 dummy0, dummy1; + v_float32 rvec0, gvec0, bvec0, rvec1, gvec1, bvec1; if(scn == 3) { - v_load_deinterleave(src, rvec0, gvec0, bvec0); - v_load_deinterleave(src + scn*4, rvec1, gvec1, bvec1); + v_load_deinterleave(src + 0*vsize, rvec0, gvec0, bvec0); + v_load_deinterleave(src + 3*vsize, rvec1, gvec1, bvec1); } else // scn == 4 { - v_load_deinterleave(src, rvec0, gvec0, bvec0, dummy0); - v_load_deinterleave(src + scn*4, rvec1, gvec1, bvec1, dummy1); + v_float32 dummy0, dummy1; + v_load_deinterleave(src + 0*vsize, rvec0, gvec0, bvec0, dummy0); + v_load_deinterleave(src + 4*vsize, rvec1, gvec1, bvec1, dummy1); } if(bIdx) { - dummy0 = rvec0; rvec0 = bvec0; bvec0 = dummy0; - dummy1 = rvec1; rvec1 = bvec1; bvec1 = dummy1; + swap(rvec0, bvec0); + swap(rvec1, bvec1); } - v_float32x4 zerof = v_setzero_f32(), onef = v_setall_f32(1.0f); + v_float32 zerof = vx_setzero_f32(), onef = vx_setall_f32(1.0f); /* clip() */ #define clipv(r) (r) = v_min(v_max((r), zerof), onef) clipv(rvec0); clipv(rvec1); @@ -1814,58 +1846,55 @@ struct RGB2Lab_f clipv(bvec0); clipv(bvec1); #undef clipv /* int iR = R*LAB_BASE, iG = G*LAB_BASE, iB = B*LAB_BASE, iL, ia, ib; */ - v_float32x4 basef = v_setall_f32(LAB_BASE); + v_float32 basef = vx_setall_f32(LAB_BASE); rvec0 *= basef, gvec0 *= basef, bvec0 *= basef; rvec1 *= basef, gvec1 *= basef, bvec1 *= basef; - v_int32x4 irvec0, igvec0, ibvec0, irvec1, igvec1, ibvec1; + v_int32 irvec0, igvec0, ibvec0, irvec1, igvec1, ibvec1; irvec0 = v_round(rvec0); irvec1 = v_round(rvec1); igvec0 = v_round(gvec0); igvec1 = v_round(gvec1); ibvec0 = v_round(bvec0); ibvec1 = v_round(bvec1); - v_int16x8 irvec, igvec, ibvec; - irvec = v_pack(irvec0, irvec1); - igvec = v_pack(igvec0, igvec1); - ibvec = v_pack(ibvec0, ibvec1); + v_uint16 uirvec = v_pack_u(irvec0, irvec1); + v_uint16 uigvec = v_pack_u(igvec0, igvec1); + v_uint16 uibvec = v_pack_u(ibvec0, ibvec1); - v_uint16x8 uirvec = v_reinterpret_as_u16(irvec); - v_uint16x8 uigvec = v_reinterpret_as_u16(igvec); - v_uint16x8 uibvec = v_reinterpret_as_u16(ibvec); - - v_uint16x8 ui_lvec, ui_avec, ui_bvec; + v_uint16 ui_lvec, ui_avec, ui_bvec; trilinearPackedInterpolate(uirvec, uigvec, uibvec, LABLUVLUTs16.RGB2LabLUT_s16, ui_lvec, ui_avec, ui_bvec); - v_int16x8 i_lvec = v_reinterpret_as_s16(ui_lvec); - v_int16x8 i_avec = v_reinterpret_as_s16(ui_avec); - v_int16x8 i_bvec = v_reinterpret_as_s16(ui_bvec); + v_int16 i_lvec = v_reinterpret_as_s16(ui_lvec); + v_int16 i_avec = v_reinterpret_as_s16(ui_avec); + v_int16 i_bvec = v_reinterpret_as_s16(ui_bvec); /* float L = iL*1.0f/LAB_BASE, a = ia*1.0f/LAB_BASE, b = ib*1.0f/LAB_BASE; */ - v_int32x4 i_lvec0, i_avec0, i_bvec0, i_lvec1, i_avec1, i_bvec1; + v_int32 i_lvec0, i_avec0, i_bvec0, i_lvec1, i_avec1, i_bvec1; v_expand(i_lvec, i_lvec0, i_lvec1); v_expand(i_avec, i_avec0, i_avec1); v_expand(i_bvec, i_bvec0, i_bvec1); - v_float32x4 l_vec0, a_vec0, b_vec0, l_vec1, a_vec1, b_vec1; + v_float32 l_vec0, a_vec0, b_vec0, l_vec1, a_vec1, b_vec1; l_vec0 = v_cvt_f32(i_lvec0); l_vec1 = v_cvt_f32(i_lvec1); a_vec0 = v_cvt_f32(i_avec0); a_vec1 = v_cvt_f32(i_avec1); b_vec0 = v_cvt_f32(i_bvec0); b_vec1 = v_cvt_f32(i_bvec1); /* dst[i] = L*100.0f */ - l_vec0 = l_vec0*v_setall_f32(100.0f/LAB_BASE); - l_vec1 = l_vec1*v_setall_f32(100.0f/LAB_BASE); + v_float32 v100dBase = vx_setall_f32(100.0f/LAB_BASE); + l_vec0 = l_vec0*v100dBase; + l_vec1 = l_vec1*v100dBase; /* dst[i + 1] = a*256.0f - 128.0f; dst[i + 2] = b*256.0f - 128.0f; */ - a_vec0 = a_vec0*v_setall_f32(256.0f/LAB_BASE) - v_setall_f32(128.0f); - a_vec1 = a_vec1*v_setall_f32(256.0f/LAB_BASE) - v_setall_f32(128.0f); - b_vec0 = b_vec0*v_setall_f32(256.0f/LAB_BASE) - v_setall_f32(128.0f); - b_vec1 = b_vec1*v_setall_f32(256.0f/LAB_BASE) - v_setall_f32(128.0f); + v_float32 v256dBase = vx_setall_f32(256.0f/LAB_BASE), vm128 = vx_setall_f32(-128.f); + a_vec0 = v_fma(a_vec0, v256dBase, vm128); + a_vec1 = v_fma(a_vec1, v256dBase, vm128); + b_vec0 = v_fma(b_vec0, v256dBase, vm128); + b_vec1 = v_fma(b_vec1, v256dBase, vm128); - v_store_interleave(dst + i, l_vec0, a_vec0, b_vec0); - v_store_interleave(dst + i + 3*4, l_vec1, a_vec1, b_vec1); + v_store_interleave(dst + i + 0*vsize, l_vec0, a_vec0, b_vec0); + v_store_interleave(dst + i + 3*vsize, l_vec1, a_vec1, b_vec1); } } -#endif // CV_SIMD128 +#endif // CV_SIMD for(; i < n; i += 3, src += scn) { @@ -1883,35 +1912,112 @@ struct RGB2Lab_f dst[i + 2] = b*256.0f - 128.0f; } } - - static const float _a = (softfloat(16) / softfloat(116)); - for (; i < n; i += 3, src += scn ) + else { - float R = clip(src[0]); - float G = clip(src[1]); - float B = clip(src[2]); - - if (gammaTab) + static const float _a = (softfloat(16) / softfloat(116)); + int i = 0; +#if CV_SIMD + const int vsize = v_float32::nlanes; + const int nrepeats = vsize == 4 ? 2 : 1; + v_float32 vc0 = vx_setall_f32(C0), vc1 = vx_setall_f32(C1), vc2 = vx_setall_f32(C2); + v_float32 vc3 = vx_setall_f32(C3), vc4 = vx_setall_f32(C4), vc5 = vx_setall_f32(C5); + v_float32 vc6 = vx_setall_f32(C6), vc7 = vx_setall_f32(C7), vc8 = vx_setall_f32(C8); + for( ; i <= n - vsize*nrepeats; + i += vsize*nrepeats, src += scn*vsize*nrepeats, dst += 3*vsize*nrepeats) { - R = splineInterpolate(R * gscale, gammaTab, GAMMA_TAB_SIZE); - G = splineInterpolate(G * gscale, gammaTab, GAMMA_TAB_SIZE); - B = splineInterpolate(B * gscale, gammaTab, GAMMA_TAB_SIZE); + v_float32 R[nrepeats], G[nrepeats], B[nrepeats], A; + if(scn == 4) + { + for (int k = 0; k < nrepeats; k++) + { + v_load_deinterleave(src + k*4*vsize, R[k], G[k], B[k], A); + } + } + else // scn == 3 + { + for (int k = 0; k < nrepeats; k++) + { + v_load_deinterleave(src + k*3*vsize, R[k], G[k], B[k]); + } + } + + v_float32 one = vx_setall_f32(1.0f), z = vx_setzero_f32(); + for (int k = 0; k < nrepeats; k++) + { + R[k] = v_max(z, v_min(R[k], one)); + G[k] = v_max(z, v_min(G[k], one)); + B[k] = v_max(z, v_min(B[k], one)); + } + + if(gammaTab) + { + v_float32 vgscale = vx_setall_f32(gscale); + for (int k = 0; k < nrepeats; k++) + { + R[k] = splineInterpolate(R[k]*vgscale, gammaTab, GAMMA_TAB_SIZE); + G[k] = splineInterpolate(G[k]*vgscale, gammaTab, GAMMA_TAB_SIZE); + B[k] = splineInterpolate(B[k]*vgscale, gammaTab, GAMMA_TAB_SIZE); + } + } + + v_float32 X[nrepeats], Y[nrepeats], Z[nrepeats]; + v_float32 FX[nrepeats], FY[nrepeats], FZ[nrepeats]; + for (int k = 0; k < nrepeats; k++) + { + X[k] = v_fma(R[k], vc0, v_fma(G[k], vc1, B[k]*vc2)); + Y[k] = v_fma(R[k], vc3, v_fma(G[k], vc4, B[k]*vc5)); + Z[k] = v_fma(R[k], vc6, v_fma(G[k], vc7, B[k]*vc8)); + + // use spline interpolation instead of direct calculation + v_float32 vTabScale = vx_setall_f32(LabCbrtTabScale); + FX[k] = splineInterpolate(X[k]*vTabScale, LabCbrtTab, LAB_CBRT_TAB_SIZE); + FY[k] = splineInterpolate(Y[k]*vTabScale, LabCbrtTab, LAB_CBRT_TAB_SIZE); + FZ[k] = splineInterpolate(Z[k]*vTabScale, LabCbrtTab, LAB_CBRT_TAB_SIZE); + } + + v_float32 L[nrepeats], a[nrepeats], b[nrepeats]; + for (int k = 0; k < nrepeats; k++) + { + // 7.787f = (29/3)^3/(29*4), 0.008856f = (6/29)^3, 903.3 = (29/3)^3 + v_float32 mask = Y[k] > (vx_setall_f32(0.008856f)); + v_float32 v116 = vx_setall_f32(116.f), vm16 = vx_setall_f32(-16.f); + L[k] = v_select(mask, v_fma(v116, FY[k], vm16), vx_setall_f32(903.3f)*Y[k]); + a[k] = vx_setall_f32(500.f) * (FX[k] - FY[k]); + b[k] = vx_setall_f32(200.f) * (FY[k] - FZ[k]); + + v_store_interleave(dst + k*3*vsize, L[k], a[k], b[k]); + } } - float X = R*C0 + G*C1 + B*C2; - float Y = R*C3 + G*C4 + B*C5; - float Z = R*C6 + G*C7 + B*C8; - // 7.787f = (29/3)^3/(29*4), 0.008856f = (6/29)^3, 903.3 = (29/3)^3 - float FX = X > 0.008856f ? cubeRoot(X) : (7.787f * X + _a); - float FY = Y > 0.008856f ? cubeRoot(Y) : (7.787f * Y + _a); - float FZ = Z > 0.008856f ? cubeRoot(Z) : (7.787f * Z + _a); +#endif - float L = Y > 0.008856f ? (116.f * FY - 16.f) : (903.3f * Y); - float a = 500.f * (FX - FY); - float b = 200.f * (FY - FZ); + for (; i < n; i++, src += scn, dst += 3 ) + { + float R = clip(src[0]); + float G = clip(src[1]); + float B = clip(src[2]); - dst[i] = L; - dst[i + 1] = a; - dst[i + 2] = b; + if (gammaTab) + { + R = splineInterpolate(R * gscale, gammaTab, GAMMA_TAB_SIZE); + G = splineInterpolate(G * gscale, gammaTab, GAMMA_TAB_SIZE); + B = splineInterpolate(B * gscale, gammaTab, GAMMA_TAB_SIZE); + } + float X = R*C0 + G*C1 + B*C2; + float Y = R*C3 + G*C4 + B*C5; + float Z = R*C6 + G*C7 + B*C8; + // 7.787f = (29/3)^3/(29*4), 0.008856f = (6/29)^3, 903.3 = (29/3)^3 + float FX = X > 0.008856f ? cubeRoot(X) : (7.787f * X + _a); + float FY = Y > 0.008856f ? cubeRoot(Y) : (7.787f * Y + _a); + float FZ = Z > 0.008856f ? cubeRoot(Z) : (7.787f * Z + _a); + + float L = Y > 0.008856f ? (116.f * FY - 16.f) : (903.3f * Y); + float a = 500.f * (FX - FY); + float b = 200.f * (FY - FZ); + + dst[0] = L; + dst[1] = a; + dst[2] = b; + } } } @@ -1957,104 +2063,12 @@ struct Lab2RGBfloat lThresh = softfloat(8); // 0.008856f * 903.3f = (6/29)^3*(29/3)^3 = 8 fThresh = softfloat(6)/softfloat(29); // 7.787f * 0.008856f + 16.0f / 116.0f = 6/29 - - #if CV_SSE2 - haveSIMD = checkHardwareSupport(CV_CPU_SSE2); - #endif } - #if CV_SSE2 - void process(__m128& v_li0, __m128& v_li1, __m128& v_ai0, - __m128& v_ai1, __m128& v_bi0, __m128& v_bi1) const - { - // 903.3 = (29/3)^3, 7.787 = (29/3)^3/(29*4) - __m128 v_y00 = _mm_mul_ps(v_li0, _mm_set1_ps(1.0f/903.3f)); - __m128 v_y01 = _mm_mul_ps(v_li1, _mm_set1_ps(1.0f/903.3f)); - __m128 v_fy00 = _mm_add_ps(_mm_mul_ps(_mm_set1_ps(7.787f), v_y00), _mm_set1_ps(16.0f/116.0f)); - __m128 v_fy01 = _mm_add_ps(_mm_mul_ps(_mm_set1_ps(7.787f), v_y01), _mm_set1_ps(16.0f/116.0f)); - - __m128 v_fy10 = _mm_mul_ps(_mm_add_ps(v_li0, _mm_set1_ps(16.0f)), _mm_set1_ps(1.0f/116.0f)); - __m128 v_fy11 = _mm_mul_ps(_mm_add_ps(v_li1, _mm_set1_ps(16.0f)), _mm_set1_ps(1.0f/116.0f)); - __m128 v_y10 = _mm_mul_ps(_mm_mul_ps(v_fy10, v_fy10), v_fy10); - __m128 v_y11 = _mm_mul_ps(_mm_mul_ps(v_fy11, v_fy11), v_fy11); - - __m128 v_cmpli0 = _mm_cmple_ps(v_li0, _mm_set1_ps(lThresh)); - __m128 v_cmpli1 = _mm_cmple_ps(v_li1, _mm_set1_ps(lThresh)); - v_y00 = _mm_and_ps(v_cmpli0, v_y00); - v_y01 = _mm_and_ps(v_cmpli1, v_y01); - v_fy00 = _mm_and_ps(v_cmpli0, v_fy00); - v_fy01 = _mm_and_ps(v_cmpli1, v_fy01); - v_y10 = _mm_andnot_ps(v_cmpli0, v_y10); - v_y11 = _mm_andnot_ps(v_cmpli1, v_y11); - v_fy10 = _mm_andnot_ps(v_cmpli0, v_fy10); - v_fy11 = _mm_andnot_ps(v_cmpli1, v_fy11); - __m128 v_y0 = _mm_or_ps(v_y00, v_y10); - __m128 v_y1 = _mm_or_ps(v_y01, v_y11); - __m128 v_fy0 = _mm_or_ps(v_fy00, v_fy10); - __m128 v_fy1 = _mm_or_ps(v_fy01, v_fy11); - - __m128 v_fxz00 = _mm_add_ps(v_fy0, _mm_mul_ps(v_ai0, _mm_set1_ps(0.002f))); - __m128 v_fxz01 = _mm_add_ps(v_fy1, _mm_mul_ps(v_ai1, _mm_set1_ps(0.002f))); - __m128 v_fxz10 = _mm_sub_ps(v_fy0, _mm_mul_ps(v_bi0, _mm_set1_ps(0.005f))); - __m128 v_fxz11 = _mm_sub_ps(v_fy1, _mm_mul_ps(v_bi1, _mm_set1_ps(0.005f))); - - __m128 v_fxz000 = _mm_mul_ps(_mm_sub_ps(v_fxz00, _mm_set1_ps(16.0f/116.0f)), _mm_set1_ps(1.0f/7.787f)); - __m128 v_fxz001 = _mm_mul_ps(_mm_sub_ps(v_fxz01, _mm_set1_ps(16.0f/116.0f)), _mm_set1_ps(1.0f/7.787f)); - __m128 v_fxz010 = _mm_mul_ps(_mm_sub_ps(v_fxz10, _mm_set1_ps(16.0f/116.0f)), _mm_set1_ps(1.0f/7.787f)); - __m128 v_fxz011 = _mm_mul_ps(_mm_sub_ps(v_fxz11, _mm_set1_ps(16.0f/116.0f)), _mm_set1_ps(1.0f/7.787f)); - - __m128 v_fxz100 = _mm_mul_ps(_mm_mul_ps(v_fxz00, v_fxz00), v_fxz00); - __m128 v_fxz101 = _mm_mul_ps(_mm_mul_ps(v_fxz01, v_fxz01), v_fxz01); - __m128 v_fxz110 = _mm_mul_ps(_mm_mul_ps(v_fxz10, v_fxz10), v_fxz10); - __m128 v_fxz111 = _mm_mul_ps(_mm_mul_ps(v_fxz11, v_fxz11), v_fxz11); - - __m128 v_cmpfxz00 = _mm_cmple_ps(v_fxz00, _mm_set1_ps(fThresh)); - __m128 v_cmpfxz01 = _mm_cmple_ps(v_fxz01, _mm_set1_ps(fThresh)); - __m128 v_cmpfxz10 = _mm_cmple_ps(v_fxz10, _mm_set1_ps(fThresh)); - __m128 v_cmpfxz11 = _mm_cmple_ps(v_fxz11, _mm_set1_ps(fThresh)); - v_fxz000 = _mm_and_ps(v_cmpfxz00, v_fxz000); - v_fxz001 = _mm_and_ps(v_cmpfxz01, v_fxz001); - v_fxz010 = _mm_and_ps(v_cmpfxz10, v_fxz010); - v_fxz011 = _mm_and_ps(v_cmpfxz11, v_fxz011); - v_fxz100 = _mm_andnot_ps(v_cmpfxz00, v_fxz100); - v_fxz101 = _mm_andnot_ps(v_cmpfxz01, v_fxz101); - v_fxz110 = _mm_andnot_ps(v_cmpfxz10, v_fxz110); - v_fxz111 = _mm_andnot_ps(v_cmpfxz11, v_fxz111); - __m128 v_x0 = _mm_or_ps(v_fxz000, v_fxz100); - __m128 v_x1 = _mm_or_ps(v_fxz001, v_fxz101); - __m128 v_z0 = _mm_or_ps(v_fxz010, v_fxz110); - __m128 v_z1 = _mm_or_ps(v_fxz011, v_fxz111); - - __m128 v_ro0 = _mm_mul_ps(_mm_set1_ps(coeffs[0]), v_x0); - __m128 v_ro1 = _mm_mul_ps(_mm_set1_ps(coeffs[0]), v_x1); - __m128 v_go0 = _mm_mul_ps(_mm_set1_ps(coeffs[3]), v_x0); - __m128 v_go1 = _mm_mul_ps(_mm_set1_ps(coeffs[3]), v_x1); - __m128 v_bo0 = _mm_mul_ps(_mm_set1_ps(coeffs[6]), v_x0); - __m128 v_bo1 = _mm_mul_ps(_mm_set1_ps(coeffs[6]), v_x1); - v_ro0 = _mm_add_ps(v_ro0, _mm_mul_ps(_mm_set1_ps(coeffs[1]), v_y0)); - v_ro1 = _mm_add_ps(v_ro1, _mm_mul_ps(_mm_set1_ps(coeffs[1]), v_y1)); - v_go0 = _mm_add_ps(v_go0, _mm_mul_ps(_mm_set1_ps(coeffs[4]), v_y0)); - v_go1 = _mm_add_ps(v_go1, _mm_mul_ps(_mm_set1_ps(coeffs[4]), v_y1)); - v_bo0 = _mm_add_ps(v_bo0, _mm_mul_ps(_mm_set1_ps(coeffs[7]), v_y0)); - v_bo1 = _mm_add_ps(v_bo1, _mm_mul_ps(_mm_set1_ps(coeffs[7]), v_y1)); - v_ro0 = _mm_add_ps(v_ro0, _mm_mul_ps(_mm_set1_ps(coeffs[2]), v_z0)); - v_ro1 = _mm_add_ps(v_ro1, _mm_mul_ps(_mm_set1_ps(coeffs[2]), v_z1)); - v_go0 = _mm_add_ps(v_go0, _mm_mul_ps(_mm_set1_ps(coeffs[5]), v_z0)); - v_go1 = _mm_add_ps(v_go1, _mm_mul_ps(_mm_set1_ps(coeffs[5]), v_z1)); - v_bo0 = _mm_add_ps(v_bo0, _mm_mul_ps(_mm_set1_ps(coeffs[8]), v_z0)); - v_bo1 = _mm_add_ps(v_bo1, _mm_mul_ps(_mm_set1_ps(coeffs[8]), v_z1)); - - v_li0 = _mm_min_ps(_mm_max_ps(v_ro0, _mm_setzero_ps()), _mm_set1_ps(1.0f)); - v_li1 = _mm_min_ps(_mm_max_ps(v_ro1, _mm_setzero_ps()), _mm_set1_ps(1.0f)); - v_ai0 = _mm_min_ps(_mm_max_ps(v_go0, _mm_setzero_ps()), _mm_set1_ps(1.0f)); - v_ai1 = _mm_min_ps(_mm_max_ps(v_go1, _mm_setzero_ps()), _mm_set1_ps(1.0f)); - v_bi0 = _mm_min_ps(_mm_max_ps(v_bo0, _mm_setzero_ps()), _mm_set1_ps(1.0f)); - v_bi1 = _mm_min_ps(_mm_max_ps(v_bo1, _mm_setzero_ps()), _mm_set1_ps(1.0f)); - } - #endif - void operator()(const float* src, float* dst, int n) const { + CV_INSTRUMENT_REGION(); + int i = 0, dcn = dstcn; const float* gammaTab = srgb ? sRGBInvGammaTab : 0; float gscale = GammaTabScale; @@ -2062,76 +2076,137 @@ struct Lab2RGBfloat C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; float alpha = ColorChannel::max(); - n *= 3; - #if CV_SSE2 - if (haveSIMD) +#if CV_SIMD + const int vsize = v_float32::nlanes; + const int nrepeats = 2; + v_float32 v16_116 = vx_setall_f32(16.0f / 116.0f); + for( ; i <= n-vsize*nrepeats; + i += vsize*nrepeats, src += 3*vsize*nrepeats, dst += dcn*vsize*nrepeats) { - for (; i <= n - 24; i += 24, dst += dcn * 8) + v_float32 li[nrepeats], ai[nrepeats], bi[nrepeats]; + for(int k = 0; k < nrepeats; k++) { - __m128 v_li0 = _mm_loadu_ps(src + i + 0); - __m128 v_li1 = _mm_loadu_ps(src + i + 4); - __m128 v_ai0 = _mm_loadu_ps(src + i + 8); - __m128 v_ai1 = _mm_loadu_ps(src + i + 12); - __m128 v_bi0 = _mm_loadu_ps(src + i + 16); - __m128 v_bi1 = _mm_loadu_ps(src + i + 20); + v_load_deinterleave(src + k*3*vsize, li[k], ai[k], bi[k]); + } - _mm_deinterleave_ps(v_li0, v_li1, v_ai0, v_ai1, v_bi0, v_bi1); + v_float32 x[nrepeats], y[nrepeats], z[nrepeats], fy[nrepeats]; + v_float32 limask[nrepeats]; + v_float32 vlThresh = vx_setall_f32(lThresh); + for(int k = 0; k < nrepeats; k++) + { + limask[k] = li[k] <= vlThresh; + } + v_float32 ylo[nrepeats], yhi[nrepeats], fylo[nrepeats], fyhi[nrepeats]; + // 903.3 = (29/3)^3, 7.787 = (29/3)^3/(29*4) + v_float32 vinv903 = vx_setall_f32(1.f/903.3f); + for(int k = 0; k < nrepeats; k++) + { + ylo[k] = li[k] * vinv903; + } + v_float32 v7787 = vx_setall_f32(7.787f); + for(int k = 0; k < nrepeats; k++) + { + fylo[k] = v_fma(v7787, ylo[k], v16_116); + } + v_float32 v16 = vx_setall_f32(16.0f), vinv116 = vx_setall_f32(1.f/116.0f); + for(int k = 0; k < nrepeats; k++) + { + fyhi[k] = (li[k] + v16) * vinv116; + } + for(int k = 0; k < nrepeats; k++) + { + yhi[k] = fyhi[k] * fyhi[k] * fyhi[k]; + } + for(int k = 0; k < nrepeats; k++) + { + y[k] = v_select(limask[k], ylo[k], yhi[k]); + fy[k] = v_select(limask[k], fylo[k], fyhi[k]); + } - process(v_li0, v_li1, v_ai0, v_ai1, v_bi0, v_bi1); - - if (gammaTab) + v_float32 fxz[nrepeats*2]; + v_float32 vpinv500 = vx_setall_f32( 1.f/500.f); + v_float32 vninv200 = vx_setall_f32(-1.f/200.f); + for(int k = 0; k < nrepeats; k++) + { + fxz[k*2+0] = v_fma(ai[k], vpinv500, fy[k]); + fxz[k*2+1] = v_fma(bi[k], vninv200, fy[k]); + } + v_float32 vfTresh = vx_setall_f32(fThresh); + v_float32 vinv7787 = vx_setall_f32(1.f/7.787f); + for(int k = 0; k < nrepeats; k++) + { + for (int j = 0; j < 2; j++) { - __m128 v_gscale = _mm_set1_ps(gscale); - v_li0 = _mm_mul_ps(v_li0, v_gscale); - v_li1 = _mm_mul_ps(v_li1, v_gscale); - v_ai0 = _mm_mul_ps(v_ai0, v_gscale); - v_ai1 = _mm_mul_ps(v_ai1, v_gscale); - v_bi0 = _mm_mul_ps(v_bi0, v_gscale); - v_bi1 = _mm_mul_ps(v_bi1, v_gscale); + v_float32 f = fxz[k*2+j]; + v_float32 fmask = f <= vfTresh; + v_float32 flo = (f - v16_116) * vinv7787; + v_float32 fhi = f*f*f; + fxz[k*2+j] = v_select(fmask, flo, fhi); + } + } + for(int k = 0; k < nrepeats; k++) + { + x[k] = fxz[k*2+0], z[k] = fxz[k*2+1]; + } + v_float32 ro[nrepeats], go[nrepeats], bo[nrepeats]; + v_float32 vc0 = vx_setall_f32(C0), vc1 = vx_setall_f32(C1), vc2 = vx_setall_f32(C2); + v_float32 vc3 = vx_setall_f32(C3), vc4 = vx_setall_f32(C4), vc5 = vx_setall_f32(C5); + v_float32 vc6 = vx_setall_f32(C6), vc7 = vx_setall_f32(C7), vc8 = vx_setall_f32(C8); + for(int k = 0; k < nrepeats; k++) + { + ro[k] = v_fma(vc0, x[k], v_fma(vc1, y[k], vc2 * z[k])); + go[k] = v_fma(vc3, x[k], v_fma(vc4, y[k], vc5 * z[k])); + bo[k] = v_fma(vc6, x[k], v_fma(vc7, y[k], vc8 * z[k])); + } + v_float32 one = vx_setall_f32(1.f), zero = vx_setzero_f32(); + for(int k = 0; k < nrepeats; k++) + { + ro[k] = v_max(zero, v_min(ro[k], one)); + go[k] = v_max(zero, v_min(go[k], one)); + bo[k] = v_max(zero, v_min(bo[k], one)); + } - splineInterpolate(v_li0, gammaTab, GAMMA_TAB_SIZE); - splineInterpolate(v_li1, gammaTab, GAMMA_TAB_SIZE); - splineInterpolate(v_ai0, gammaTab, GAMMA_TAB_SIZE); - splineInterpolate(v_ai1, gammaTab, GAMMA_TAB_SIZE); - splineInterpolate(v_bi0, gammaTab, GAMMA_TAB_SIZE); - splineInterpolate(v_bi1, gammaTab, GAMMA_TAB_SIZE); + if (gammaTab) + { + v_float32 vgscale = vx_setall_f32(gscale); + for(int k = 0; k < nrepeats; k++) + { + ro[k] *= vgscale; + go[k] *= vgscale; + bo[k] *= vgscale; } - if( dcn == 4 ) + for(int k = 0; k < nrepeats; k++) { - __m128 v_a0 = _mm_set1_ps(alpha); - __m128 v_a1 = _mm_set1_ps(alpha); - _mm_interleave_ps(v_li0, v_li1, v_ai0, v_ai1, v_bi0, v_bi1, v_a0, v_a1); - - _mm_storeu_ps(dst + 0, v_li0); - _mm_storeu_ps(dst + 4, v_li1); - _mm_storeu_ps(dst + 8, v_ai0); - _mm_storeu_ps(dst + 12, v_ai1); - _mm_storeu_ps(dst + 16, v_bi0); - _mm_storeu_ps(dst + 20, v_bi1); - _mm_storeu_ps(dst + 24, v_a0); - _mm_storeu_ps(dst + 28, v_a1); + ro[k] = splineInterpolate(ro[k], gammaTab, GAMMA_TAB_SIZE); + go[k] = splineInterpolate(go[k], gammaTab, GAMMA_TAB_SIZE); + bo[k] = splineInterpolate(bo[k], gammaTab, GAMMA_TAB_SIZE); } - else - { - _mm_interleave_ps(v_li0, v_li1, v_ai0, v_ai1, v_bi0, v_bi1); + } - _mm_storeu_ps(dst + 0, v_li0); - _mm_storeu_ps(dst + 4, v_li1); - _mm_storeu_ps(dst + 8, v_ai0); - _mm_storeu_ps(dst + 12, v_ai1); - _mm_storeu_ps(dst + 16, v_bi0); - _mm_storeu_ps(dst + 20, v_bi1); + if(dcn == 4) + { + v_float32 valpha = vx_setall_f32(alpha); + for(int k = 0; k < nrepeats; k++) + { + v_store_interleave(dst + 4*vsize*k, ro[k], go[k], bo[k], valpha); + } + } + else // dcn == 3 + { + for(int k = 0; k < nrepeats; k++) + { + v_store_interleave(dst + 3*vsize*k, ro[k], go[k], bo[k]); } } } - #endif - for (; i < n; i += 3, dst += dcn) +#endif + for (; i < n; i++, src += 3, dst += dcn) { - float li = src[i]; - float ai = src[i + 1]; - float bi = src[i + 2]; + float li = src[0]; + float ai = src[1]; + float bi = src[2]; // 903.3 = (29/3)^3, 7.787 = (29/3)^3/(29*4) float y, fy; @@ -2180,9 +2255,6 @@ struct Lab2RGBfloat bool srgb; float lThresh; float fThresh; - #if CV_SSE2 - bool haveSIMD; - #endif int blueIdx; }; @@ -2204,7 +2276,7 @@ struct Lab2RGBinteger Lab2RGBinteger( int _dstcn, int blueIdx, const float* _coeffs, const float* _whitept, bool srgb ) - : dstcn(_dstcn) + : dstcn(_dstcn), issRGB(srgb) { softdouble whitePt[3]; for(int i = 0; i < 3; i++) @@ -2227,8 +2299,6 @@ struct Lab2RGBinteger coeffs[i+3] = cvRound(lshift*c[1]*whitePt[i]); coeffs[i+(blueIdx^2)*3] = cvRound(lshift*c[2]*whitePt[i]); } - - tab = srgb ? sRGBInvGammaTab_b : linearInvGammaTab_b; } // L, a, b should be in their natural range @@ -2268,63 +2338,75 @@ struct Lab2RGBinteger go = max(0, min((int)INV_GAMMA_TAB_SIZE-1, go)); bo = max(0, min((int)INV_GAMMA_TAB_SIZE-1, bo)); - ro = tab[ro]; - go = tab[go]; - bo = tab[bo]; + if(issRGB) + { + ushort* tab = sRGBInvGammaTab_b; + ro = tab[ro]; + go = tab[go]; + bo = tab[bo]; + } + else + { + // rgb = (rgb*255) >> inv_gamma_shift + ro = ((ro << 8) - ro) >> inv_gamma_shift; + go = ((go << 8) - go) >> inv_gamma_shift; + bo = ((bo << 8) - bo) >> inv_gamma_shift; + } } - // L, a, b should be in their natural range - inline void processLabToXYZ(const v_uint8x16& lv, const v_uint8x16& av, const v_uint8x16& bv, - v_int32x4& xiv00, v_int32x4& yiv00, v_int32x4& ziv00, - v_int32x4& xiv01, v_int32x4& yiv01, v_int32x4& ziv01, - v_int32x4& xiv10, v_int32x4& yiv10, v_int32x4& ziv10, - v_int32x4& xiv11, v_int32x4& yiv11, v_int32x4& ziv11) const +#if CV_SIMD + inline void processLabToXYZ(const v_uint8& l, const v_uint8& a, const v_uint8& b, + v_int32 (&xiv)[4], v_int32 (&y)[4], v_int32 (&ziv)[4]) const { - v_uint16x8 lv0, lv1; - v_expand(lv, lv0, lv1); + v_uint16 l0, l1; + v_expand(l, l0, l1); + v_int32 lq[4]; + v_expand(v_reinterpret_as_s16(l0), lq[0], lq[1]); + v_expand(v_reinterpret_as_s16(l1), lq[2], lq[3]); + // Load Y and IFY values from lookup-table // y = LabToYF_b[L_value*2], ify = LabToYF_b[L_value*2 + 1] - // LabToYF_b[i*2 ] = y; // 2260 <= y <= BASE - // LabToYF_b[i*2+1] = ify; // 0 <= ify <= BASE - uint16_t CV_DECL_ALIGNED(16) v_lv0[8], v_lv1[8]; - v_store_aligned(v_lv0, (lv0 << 1)); v_store_aligned(v_lv1, (lv1 << 1)); - v_int16x8 ify0, ify1; + // LabToYF_b[i*2 ] = y; // 0 <= y <= BASE + // LabToYF_b[i*2+1] = ify; // 2260 <= ify <= BASE + v_int32 yf[4]; + v_int32 ify[4]; + v_int32 mask16 = vx_setall_s32(0xFFFF); + for(int k = 0; k < 4; k++) + { + yf[k] = v_lut((const int*)LabToYF_b, lq[k]); + y[k] = yf[k] & mask16; + ify[k] = v_reinterpret_as_s32(v_reinterpret_as_u32(yf[k]) >> 16); + } - yiv00 = v_int32x4(LabToYF_b[v_lv0[0] ], LabToYF_b[v_lv0[1] ], LabToYF_b[v_lv0[2] ], LabToYF_b[v_lv0[3] ]); - yiv01 = v_int32x4(LabToYF_b[v_lv0[4] ], LabToYF_b[v_lv0[5] ], LabToYF_b[v_lv0[6] ], LabToYF_b[v_lv0[7] ]); - yiv10 = v_int32x4(LabToYF_b[v_lv1[0] ], LabToYF_b[v_lv1[1] ], LabToYF_b[v_lv1[2] ], LabToYF_b[v_lv1[3] ]); - yiv11 = v_int32x4(LabToYF_b[v_lv1[4] ], LabToYF_b[v_lv1[5] ], LabToYF_b[v_lv1[6] ], LabToYF_b[v_lv1[7] ]); + v_int16 ify0, ify1; + ify0 = v_pack(ify[0], ify[1]); + ify1 = v_pack(ify[2], ify[3]); - ify0 = v_int16x8(LabToYF_b[v_lv0[0]+1], LabToYF_b[v_lv0[1]+1], LabToYF_b[v_lv0[2]+1], LabToYF_b[v_lv0[3]+1], - LabToYF_b[v_lv0[4]+1], LabToYF_b[v_lv0[5]+1], LabToYF_b[v_lv0[6]+1], LabToYF_b[v_lv0[7]+1]); - ify1 = v_int16x8(LabToYF_b[v_lv1[0]+1], LabToYF_b[v_lv1[1]+1], LabToYF_b[v_lv1[2]+1], LabToYF_b[v_lv1[3]+1], - LabToYF_b[v_lv1[4]+1], LabToYF_b[v_lv1[5]+1], LabToYF_b[v_lv1[6]+1], LabToYF_b[v_lv1[7]+1]); - - v_int16x8 adiv0, adiv1, bdiv0, bdiv1; - v_uint16x8 av0, av1, bv0, bv1; - v_expand(av, av0, av1); v_expand(bv, bv0, bv1); + v_int16 adiv0, adiv1, bdiv0, bdiv1; + v_uint16 a0, a1, b0, b1; + v_expand(a, a0, a1); v_expand(b, b0, b1); //adiv = aa*BASE/500 - 128*BASE/500, bdiv = bb*BASE/200 - 128*BASE/200; //approximations with reasonable precision - v_uint16x8 mulA = v_setall_u16(53687); - v_uint32x4 ma00, ma01, ma10, ma11; - v_uint32x4 addA = v_setall_u32(1 << 7); - v_mul_expand((av0 + (av0 << 2)), mulA, ma00, ma01); - v_mul_expand((av1 + (av1 << 2)), mulA, ma10, ma11); - adiv0 = v_reinterpret_as_s16(v_pack(((ma00 + addA) >> 13), ((ma01 + addA) >> 13))); - adiv1 = v_reinterpret_as_s16(v_pack(((ma10 + addA) >> 13), ((ma11 + addA) >> 13))); + v_uint16 mulA = vx_setall_u16(53687); + v_uint32 ma[4]; + v_uint32 addA = vx_setall_u32(1 << 7); + v_mul_expand((a0 + (a0 << 2)), mulA, ma[0], ma[1]); + v_mul_expand((a1 + (a1 << 2)), mulA, ma[2], ma[3]); + adiv0 = v_reinterpret_as_s16(v_pack(((ma[0] + addA) >> 13), ((ma[1] + addA) >> 13))); + adiv1 = v_reinterpret_as_s16(v_pack(((ma[2] + addA) >> 13), ((ma[3] + addA) >> 13))); - v_uint16x8 mulB = v_setall_u16(41943); - v_uint32x4 mb00, mb01, mb10, mb11; - v_uint32x4 addB = v_setall_u32(1 << 4); - v_mul_expand(bv0, mulB, mb00, mb01); - v_mul_expand(bv1, mulB, mb10, mb11); - bdiv0 = v_reinterpret_as_s16(v_pack((mb00 + addB) >> 9, (mb01 + addB) >> 9)); - bdiv1 = v_reinterpret_as_s16(v_pack((mb10 + addB) >> 9, (mb11 + addB) >> 9)); + v_uint16 mulB = vx_setall_u16(41943); + v_uint32 mb[4]; + v_uint32 addB = vx_setall_u32(1 << 4); + v_mul_expand(b0, mulB, mb[0], mb[1]); + v_mul_expand(b1, mulB, mb[2], mb[3]); + bdiv0 = v_reinterpret_as_s16(v_pack((mb[0] + addB) >> 9, (mb[1] + addB) >> 9)); + bdiv1 = v_reinterpret_as_s16(v_pack((mb[2] + addB) >> 9, (mb[3] + addB) >> 9)); // 0 <= adiv <= 8356, 0 <= bdiv <= 20890 /* x = ifxz[0]; y = y; z = ifxz[1]; */ - v_uint16x8 xiv0, xiv1, ziv0, ziv1; - v_int16x8 vSubA = v_setall_s16(-128*BASE/500 - minABvalue), vSubB = v_setall_s16(128*BASE/200-1 - minABvalue); + v_uint16 xiv0, xiv1, ziv0, ziv1; + v_int16 vSubA = vx_setall_s16(-128*BASE/500 - minABvalue), vSubB = vx_setall_s16(128*BASE/200-1 - minABvalue); // int ifxz[] = {ify + adiv, ify - bdiv}; // ifxz[k] = abToXZ_b[ifxz[k]-minABvalue]; @@ -2333,214 +2415,131 @@ struct Lab2RGBinteger ziv0 = v_reinterpret_as_u16(v_add_wrap(v_sub_wrap(ify0, bdiv0), vSubB)); ziv1 = v_reinterpret_as_u16(v_add_wrap(v_sub_wrap(ify1, bdiv1), vSubB)); - uint16_t CV_DECL_ALIGNED(16) v_x0[8], v_x1[8], v_z0[8], v_z1[8]; - v_store_aligned(v_x0, xiv0 ); v_store_aligned(v_x1, xiv1 ); - v_store_aligned(v_z0, ziv0 ); v_store_aligned(v_z1, ziv1 ); + v_uint32 uxiv[4], uziv[4]; + v_expand(xiv0, uxiv[0], uxiv[1]); + v_expand(xiv1, uxiv[2], uxiv[3]); + v_expand(ziv0, uziv[0], uziv[1]); + v_expand(ziv1, uziv[2], uziv[3]); - xiv00 = v_int32x4(abToXZ_b[v_x0[0]], abToXZ_b[v_x0[1]], abToXZ_b[v_x0[2]], abToXZ_b[v_x0[3]]); - xiv01 = v_int32x4(abToXZ_b[v_x0[4]], abToXZ_b[v_x0[5]], abToXZ_b[v_x0[6]], abToXZ_b[v_x0[7]]); - xiv10 = v_int32x4(abToXZ_b[v_x1[0]], abToXZ_b[v_x1[1]], abToXZ_b[v_x1[2]], abToXZ_b[v_x1[3]]); - xiv11 = v_int32x4(abToXZ_b[v_x1[4]], abToXZ_b[v_x1[5]], abToXZ_b[v_x1[6]], abToXZ_b[v_x1[7]]); - ziv00 = v_int32x4(abToXZ_b[v_z0[0]], abToXZ_b[v_z0[1]], abToXZ_b[v_z0[2]], abToXZ_b[v_z0[3]]); - ziv01 = v_int32x4(abToXZ_b[v_z0[4]], abToXZ_b[v_z0[5]], abToXZ_b[v_z0[6]], abToXZ_b[v_z0[7]]); - ziv10 = v_int32x4(abToXZ_b[v_z1[0]], abToXZ_b[v_z1[1]], abToXZ_b[v_z1[2]], abToXZ_b[v_z1[3]]); - ziv11 = v_int32x4(abToXZ_b[v_z1[4]], abToXZ_b[v_z1[5]], abToXZ_b[v_z1[6]], abToXZ_b[v_z1[7]]); + for(int k = 0; k < 4; k++) + { + xiv[k] = v_lut(abToXZ_b, v_reinterpret_as_s32(uxiv[k])); + ziv[k] = v_lut(abToXZ_b, v_reinterpret_as_s32(uziv[k])); + } // abToXZ_b[i-minABvalue] = v; // -1335 <= v <= 88231 } - - void operator()(const float* src, float* dst, int n) const - { - int dcn = dstcn; - float alpha = ColorChannel::max(); - - int i = 0; - -#if CV_SIMD128 - if(enablePackedLab) - { - v_float32x4 vldiv = v_setall_f32(256.f/100.0f); - v_float32x4 vf255 = v_setall_f32(255.f); - static const int nPixels = 16; - for(; i <= n*3-3*nPixels; i += 3*nPixels, dst += dcn*nPixels) - { - /* - int L = saturate_cast(src[i]*BASE/100.0f); - int a = saturate_cast(src[i + 1]*BASE/256); - int b = saturate_cast(src[i + 2]*BASE/256); - */ - v_float32x4 vl[4], va[4], vb[4]; - for(int k = 0; k < 4; k++) - { - v_load_deinterleave(src + i + k*3*4, vl[k], va[k], vb[k]); - vl[k] *= vldiv; - } - - v_int32x4 ivl[4], iva[4], ivb[4]; - for(int k = 0; k < 4; k++) - { - ivl[k] = v_round(vl[k]), iva[k] = v_round(va[k]), ivb[k] = v_round(vb[k]); - } - v_int16x8 ivl16[2], iva16[2], ivb16[2]; - ivl16[0] = v_pack(ivl[0], ivl[1]); iva16[0] = v_pack(iva[0], iva[1]); ivb16[0] = v_pack(ivb[0], ivb[1]); - ivl16[1] = v_pack(ivl[2], ivl[3]); iva16[1] = v_pack(iva[2], iva[3]); ivb16[1] = v_pack(ivb[2], ivb[3]); - v_uint8x16 ivl8, iva8, ivb8; - ivl8 = v_reinterpret_as_u8(v_pack(ivl16[0], ivl16[1])); - iva8 = v_reinterpret_as_u8(v_pack(iva16[0], iva16[1])); - ivb8 = v_reinterpret_as_u8(v_pack(ivb16[0], ivb16[1])); - - v_int32x4 ixv[4], iyv[4], izv[4]; - - processLabToXYZ(ivl8, iva8, ivb8, ixv[0], iyv[0], izv[0], - ixv[1], iyv[1], izv[1], - ixv[2], iyv[2], izv[2], - ixv[3], iyv[3], izv[3]); - /* - ro = CV_DESCALE(C0 * x + C1 * y + C2 * z, shift); - go = CV_DESCALE(C3 * x + C4 * y + C5 * z, shift); - bo = CV_DESCALE(C6 * x + C7 * y + C8 * z, shift); - */ - v_int32x4 C0 = v_setall_s32(coeffs[0]), C1 = v_setall_s32(coeffs[1]), C2 = v_setall_s32(coeffs[2]); - v_int32x4 C3 = v_setall_s32(coeffs[3]), C4 = v_setall_s32(coeffs[4]), C5 = v_setall_s32(coeffs[5]); - v_int32x4 C6 = v_setall_s32(coeffs[6]), C7 = v_setall_s32(coeffs[7]), C8 = v_setall_s32(coeffs[8]); - v_int32x4 descaleShift = v_setall_s32(1 << (shift-1)), tabsz = v_setall_s32((int)INV_GAMMA_TAB_SIZE-1); - for(int k = 0; k < 4; k++) - { - v_int32x4 i_r, i_g, i_b; - v_uint32x4 r_vecs, g_vecs, b_vecs; - i_r = (ixv[k]*C0 + iyv[k]*C1 + izv[k]*C2 + descaleShift) >> shift; - i_g = (ixv[k]*C3 + iyv[k]*C4 + izv[k]*C5 + descaleShift) >> shift; - i_b = (ixv[k]*C6 + iyv[k]*C7 + izv[k]*C8 + descaleShift) >> shift; - - //limit indices in table and then substitute - //ro = tab[ro]; go = tab[go]; bo = tab[bo]; - int32_t CV_DECL_ALIGNED(16) rshifts[4], gshifts[4], bshifts[4]; - v_int32x4 rs = v_max(v_setzero_s32(), v_min(tabsz, i_r)); - v_int32x4 gs = v_max(v_setzero_s32(), v_min(tabsz, i_g)); - v_int32x4 bs = v_max(v_setzero_s32(), v_min(tabsz, i_b)); - - v_store_aligned(rshifts, rs); - v_store_aligned(gshifts, gs); - v_store_aligned(bshifts, bs); - - r_vecs = v_uint32x4(tab[rshifts[0]], tab[rshifts[1]], tab[rshifts[2]], tab[rshifts[3]]); - g_vecs = v_uint32x4(tab[gshifts[0]], tab[gshifts[1]], tab[gshifts[2]], tab[gshifts[3]]); - b_vecs = v_uint32x4(tab[bshifts[0]], tab[bshifts[1]], tab[bshifts[2]], tab[bshifts[3]]); - - v_float32x4 v_r, v_g, v_b; - v_r = v_cvt_f32(v_reinterpret_as_s32(r_vecs))/vf255; - v_g = v_cvt_f32(v_reinterpret_as_s32(g_vecs))/vf255; - v_b = v_cvt_f32(v_reinterpret_as_s32(b_vecs))/vf255; - - if(dcn == 4) - { - v_store_interleave(dst + k*dcn*4, v_b, v_g, v_r, v_setall_f32(alpha)); - } - else // dcn == 3 - { - v_store_interleave(dst + k*dcn*4, v_b, v_g, v_r); - } - } - } - } #endif - for(; i < n*3; i += 3, dst += dcn) - { - int ro, go, bo; - process((uchar)(src[i + 0]*255.f/100.f), (uchar)src[i + 1], (uchar)src[i + 2], ro, go, bo); - - dst[0] = bo/255.f; - dst[1] = go/255.f; - dst[2] = ro/255.f; - if(dcn == 4) - dst[3] = alpha; - } - } - void operator()(const uchar* src, uchar* dst, int n) const { + CV_INSTRUMENT_REGION(); + int i, dcn = dstcn; uchar alpha = ColorChannel::max(); + i = 0; -#if CV_SIMD128 +#if CV_SIMD if(enablePackedLab) { - static const int nPixels = 8*2; - for(; i <= n*3-3*nPixels; i += 3*nPixels, dst += dcn*nPixels) + bool srgb = issRGB; + ushort* tab = sRGBInvGammaTab_b; + const int vsize = v_uint8::nlanes; + v_uint8 valpha = vx_setall_u8(alpha); + v_int32 vc[9]; + for(int k = 0; k < 9; k++) { - /* - int L = src[i + 0]; - int a = src[i + 1]; - int b = src[i + 2]; - */ - v_uint8x16 u8l, u8a, u8b; - v_load_deinterleave(src + i, u8l, u8a, u8b); + vc[k] = vx_setall_s32(coeffs[k]); + } + const int descaleShift = 1 << (shift-1); + v_int32 vdescale = vx_setall_s32(descaleShift); + for ( ; i <= n-vsize; + i += vsize, src += 3*vsize, dst += dcn*vsize) + { + v_uint8 l, a, b; + v_load_deinterleave(src, l, a, b); - v_int32x4 xiv[4], yiv[4], ziv[4]; - processLabToXYZ(u8l, u8a, u8b, xiv[0], yiv[0], ziv[0], - xiv[1], yiv[1], ziv[1], - xiv[2], yiv[2], ziv[2], - xiv[3], yiv[3], ziv[3]); - /* - ro = CV_DESCALE(C0 * x + C1 * y + C2 * z, shift); - go = CV_DESCALE(C3 * x + C4 * y + C5 * z, shift); - bo = CV_DESCALE(C6 * x + C7 * y + C8 * z, shift); - */ - v_int32x4 C0 = v_setall_s32(coeffs[0]), C1 = v_setall_s32(coeffs[1]), C2 = v_setall_s32(coeffs[2]); - v_int32x4 C3 = v_setall_s32(coeffs[3]), C4 = v_setall_s32(coeffs[4]), C5 = v_setall_s32(coeffs[5]); - v_int32x4 C6 = v_setall_s32(coeffs[6]), C7 = v_setall_s32(coeffs[7]), C8 = v_setall_s32(coeffs[8]); - v_int32x4 descaleShift = v_setall_s32(1 << (shift-1)); - v_int32x4 tabsz = v_setall_s32((int)INV_GAMMA_TAB_SIZE-1); - v_uint32x4 r_vecs[4], g_vecs[4], b_vecs[4]; + v_int32 xq[4], yq[4], zq[4]; + processLabToXYZ(l, a, b, xq, yq, zq); + + // x, y, z exceed 2^16 so we cannot do v_mul_expand or v_dotprod + v_int32 rq[4], gq[4], bq[4]; for(int k = 0; k < 4; k++) { - v_int32x4 i_r, i_g, i_b; - i_r = (xiv[k]*C0 + yiv[k]*C1 + ziv[k]*C2 + descaleShift) >> shift; - i_g = (xiv[k]*C3 + yiv[k]*C4 + ziv[k]*C5 + descaleShift) >> shift; - i_b = (xiv[k]*C6 + yiv[k]*C7 + ziv[k]*C8 + descaleShift) >> shift; - - //limit indices in table and then substitute - //ro = tab[ro]; go = tab[go]; bo = tab[bo]; - int32_t CV_DECL_ALIGNED(16) rshifts[4], gshifts[4], bshifts[4]; - v_int32x4 rs = v_max(v_setzero_s32(), v_min(tabsz, i_r)); - v_int32x4 gs = v_max(v_setzero_s32(), v_min(tabsz, i_g)); - v_int32x4 bs = v_max(v_setzero_s32(), v_min(tabsz, i_b)); - - v_store_aligned(rshifts, rs); - v_store_aligned(gshifts, gs); - v_store_aligned(bshifts, bs); - - r_vecs[k] = v_uint32x4(tab[rshifts[0]], tab[rshifts[1]], tab[rshifts[2]], tab[rshifts[3]]); - g_vecs[k] = v_uint32x4(tab[gshifts[0]], tab[gshifts[1]], tab[gshifts[2]], tab[gshifts[3]]); - b_vecs[k] = v_uint32x4(tab[bshifts[0]], tab[bshifts[1]], tab[bshifts[2]], tab[bshifts[3]]); + rq[k] = (vc[0] * xq[k] + vc[1] * yq[k] + vc[2] * zq[k] + vdescale) >> shift; + gq[k] = (vc[3] * xq[k] + vc[4] * yq[k] + vc[5] * zq[k] + vdescale) >> shift; + bq[k] = (vc[6] * xq[k] + vc[7] * yq[k] + vc[8] * zq[k] + vdescale) >> shift; } - v_uint16x8 u_rvec0 = v_pack(r_vecs[0], r_vecs[1]), u_rvec1 = v_pack(r_vecs[2], r_vecs[3]); - v_uint16x8 u_gvec0 = v_pack(g_vecs[0], g_vecs[1]), u_gvec1 = v_pack(g_vecs[2], g_vecs[3]); - v_uint16x8 u_bvec0 = v_pack(b_vecs[0], b_vecs[1]), u_bvec1 = v_pack(b_vecs[2], b_vecs[3]); - - v_uint8x16 u8_b, u8_g, u8_r; - u8_b = v_pack(u_bvec0, u_bvec1); - u8_g = v_pack(u_gvec0, u_gvec1); - u8_r = v_pack(u_rvec0, u_rvec1); - - if(dcn == 4) + //limit indices in table and then substitute + //ro = tab[ro]; go = tab[go]; bo = tab[bo]; + v_int32 z = vx_setzero_s32(), up = vx_setall_s32((int)INV_GAMMA_TAB_SIZE-1); + for (int k = 0; k < 4; k++) { - v_store_interleave(dst, u8_b, u8_g, u8_r, v_setall_u8(alpha)); + rq[k] = v_max(z, v_min(up, rq[k])); + gq[k] = v_max(z, v_min(up, gq[k])); + bq[k] = v_max(z, v_min(up, bq[k])); + } + + v_uint16 rgb[6]; + if(srgb) + { + // [RRR... , GGG... , BBB...] + int32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) vidx[vsize*3]; + for (int k = 0; k < 4; k++) + v_store_aligned(vidx + 0*vsize + k*vsize/4, rq[k]); + for (int k = 0; k < 4; k++) + v_store_aligned(vidx + 1*vsize + k*vsize/4, gq[k]); + for (int k = 0; k < 4; k++) + v_store_aligned(vidx + 2*vsize + k*vsize/4, bq[k]); + + rgb[0] = vx_lut(tab, vidx + 0*vsize/2); + rgb[1] = vx_lut(tab, vidx + 1*vsize/2); + rgb[2] = vx_lut(tab, vidx + 2*vsize/2); + rgb[3] = vx_lut(tab, vidx + 3*vsize/2); + rgb[4] = vx_lut(tab, vidx + 4*vsize/2); + rgb[5] = vx_lut(tab, vidx + 5*vsize/2); } else { - v_store_interleave(dst, u8_b, u8_g, u8_r); + // rgb = (rgb*255) >> inv_gamma_shift + for(int k = 0; k < 4; k++) + { + rq[k] = ((rq[k] << 8) - rq[k]) >> inv_gamma_shift; + gq[k] = ((gq[k] << 8) - gq[k]) >> inv_gamma_shift; + bq[k] = ((bq[k] << 8) - bq[k]) >> inv_gamma_shift; + } + rgb[0] = v_reinterpret_as_u16(v_pack(rq[0], rq[1])); + rgb[1] = v_reinterpret_as_u16(v_pack(rq[2], rq[3])); + rgb[2] = v_reinterpret_as_u16(v_pack(gq[0], gq[1])); + rgb[3] = v_reinterpret_as_u16(v_pack(gq[2], gq[3])); + rgb[4] = v_reinterpret_as_u16(v_pack(bq[0], bq[1])); + rgb[5] = v_reinterpret_as_u16(v_pack(bq[2], bq[3])); + } + + v_uint16 R0, R1, G0, G1, B0, B1; + + v_uint8 R, G, B; + R = v_pack(rgb[0], rgb[1]); + G = v_pack(rgb[2], rgb[3]); + B = v_pack(rgb[4], rgb[5]); + + if(dcn == 4) + { + v_store_interleave(dst, B, G, R, valpha); + } + else // dcn == 3 + { + v_store_interleave(dst, B, G, R); } } } #endif - for (; i < n*3; i += 3, dst += dcn) + for (; i < n; i++, src += 3, dst += dcn) { int ro, go, bo; - process(src[i + 0], src[i + 1], src[i + 2], ro, go, bo); + process(src[0], src[1], src[2], ro, go, bo); dst[0] = saturate_cast(bo); dst[1] = saturate_cast(go); @@ -2552,7 +2551,7 @@ struct Lab2RGBinteger int dstcn; int coeffs[9]; - ushort* tab; + bool issRGB; }; @@ -2582,63 +2581,12 @@ struct Lab2RGB_b Lab2RGB_b( int _dstcn, int _blueIdx, const float* _coeffs, const float* _whitept, bool _srgb ) : fcvt(3, _blueIdx, _coeffs, _whitept, _srgb ), icvt(_dstcn, _blueIdx, _coeffs, _whitept, _srgb), dstcn(_dstcn) - { - #if CV_NEON - v_scale_inv = vdupq_n_f32(100.f/255.f); - v_scale = vdupq_n_f32(255.f); - v_alpha = vdup_n_u8(ColorChannel::max()); - v_128 = vdupq_n_f32(128.0f); - #elif CV_SSE2 - v_scale = _mm_set1_ps(255.f); - v_alpha = _mm_set1_ps(ColorChannel::max()); - v_zero = _mm_setzero_si128(); - haveSIMD = checkHardwareSupport(CV_CPU_SSE2); - #endif - } - - #if CV_SSE2 - // 16s x 8 - void process(__m128i v_r, __m128i v_g, __m128i v_b, - const __m128& v_coeffs_, const __m128& v_res_, - float * buf) const - { - __m128 v_r0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_r, v_zero)); - __m128 v_g0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_g, v_zero)); - __m128 v_b0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_b, v_zero)); - - __m128 v_r1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_r, v_zero)); - __m128 v_g1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_g, v_zero)); - __m128 v_b1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_b, v_zero)); - - __m128 v_coeffs = v_coeffs_; - __m128 v_res = v_res_; - - v_r0 = _mm_sub_ps(_mm_mul_ps(v_r0, v_coeffs), v_res); - v_g1 = _mm_sub_ps(_mm_mul_ps(v_g1, v_coeffs), v_res); - - v_coeffs = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(v_coeffs), 0x49)); - v_res = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(v_res), 0x49)); - - v_r1 = _mm_sub_ps(_mm_mul_ps(v_r1, v_coeffs), v_res); - v_b0 = _mm_sub_ps(_mm_mul_ps(v_b0, v_coeffs), v_res); - - v_coeffs = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(v_coeffs), 0x49)); - v_res = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(v_res), 0x49)); - - v_g0 = _mm_sub_ps(_mm_mul_ps(v_g0, v_coeffs), v_res); - v_b1 = _mm_sub_ps(_mm_mul_ps(v_b1, v_coeffs), v_res); - - _mm_store_ps(buf, v_r0); - _mm_store_ps(buf + 4, v_r1); - _mm_store_ps(buf + 8, v_g0); - _mm_store_ps(buf + 12, v_g1); - _mm_store_ps(buf + 16, v_b0); - _mm_store_ps(buf + 20, v_b1); - } - #endif + { } void operator()(const uchar* src, uchar* dst, int n) const { + CV_INSTRUMENT_REGION(); + if(enableBitExactness) { icvt(src, dst, n); @@ -2647,11 +2595,31 @@ struct Lab2RGB_b int i, j, dcn = dstcn; uchar alpha = ColorChannel::max(); +#if CV_SIMD + float CV_DECL_ALIGNED(CV_SIMD_WIDTH) buf[3*BLOCK_SIZE]; +#else float CV_DECL_ALIGNED(16) buf[3*BLOCK_SIZE]; - #if CV_SSE2 - __m128 v_coeffs = _mm_set_ps(100.f/255.f, 1.f, 1.f, 100.f/255.f); - __m128 v_res = _mm_set_ps(0.f, 128.f, 128.f, 0.f); - #endif +#endif + + static const softfloat fl = softfloat(100)/f255; + +#if CV_SIMD + const int fsize = v_float32::nlanes; + v_float32 vl = vx_setall_f32((float)fl); + v_float32 va = vx_setall_f32(1.f); + v_float32 vb = vx_setall_f32(1.f); + v_float32 vaLow = vx_setall_f32(-128.f), vbLow = vx_setall_f32(-128.f); + //TODO: fix that when v_interleave is available + float CV_DECL_ALIGNED(CV_SIMD_WIDTH) interTmpM[fsize*3], interTmpA[fsize*3]; + v_store_interleave(interTmpM, vl, va, vb); + v_store_interleave(interTmpA, vx_setzero_f32(), vaLow, vbLow); + v_float32 mluv[3], aluv[3]; + for(int k = 0; k < 3; k++) + { + mluv[k] = vx_load_aligned(interTmpM + k*fsize); + aluv[k] = vx_load_aligned(interTmpA + k*fsize); + } +#endif i = 0; for(; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 ) @@ -2659,129 +2627,89 @@ struct Lab2RGB_b int dn = std::min(n - i, (int)BLOCK_SIZE); j = 0; - #if CV_NEON - for ( ; j <= (dn - 8) * 3; j += 24) +#if CV_SIMD + const int vsize = v_uint8::nlanes; + for( ; j <= (dn - vsize)*3; j += 3*vsize ) { - uint8x8x3_t v_src = vld3_u8(src + j); - uint16x8_t v_t0 = vmovl_u8(v_src.val[0]), - v_t1 = vmovl_u8(v_src.val[1]), - v_t2 = vmovl_u8(v_src.val[2]); + v_uint8 s0, s1, s2; + s0 = vx_load(src + j + 0*vsize); + s1 = vx_load(src + j + 1*vsize); + s2 = vx_load(src + j + 2*vsize); - float32x4x3_t v_dst; - v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0))), v_scale_inv); - v_dst.val[1] = vsubq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_128); - v_dst.val[2] = vsubq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_128); - vst3q_f32(buf + j, v_dst); - - v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0))), v_scale_inv); - v_dst.val[1] = vsubq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_128); - v_dst.val[2] = vsubq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_128); - vst3q_f32(buf + j + 12, v_dst); - } - #elif CV_SSE2 - if (haveSIMD) - { - for ( ; j <= (dn - 8) * 3; j += 24) + v_uint16 ss[6]; + v_expand(s0, ss[0], ss[1]); + v_expand(s1, ss[2], ss[3]); + v_expand(s2, ss[4], ss[5]); + v_int32 vs[12]; + for(int k = 0; k < 6; k++) { - __m128i v_src0 = _mm_loadu_si128((__m128i const *)(src + j)); - __m128i v_src1 = _mm_loadl_epi64((__m128i const *)(src + j + 16)); + v_expand(v_reinterpret_as_s16(ss[k]), vs[k*2+0], vs[k*2+1]); + } - process(_mm_unpacklo_epi8(v_src0, v_zero), - _mm_unpackhi_epi8(v_src0, v_zero), - _mm_unpacklo_epi8(v_src1, v_zero), - v_coeffs, v_res, - buf + j); + for(int bufp = 0; bufp < 12; bufp++) + { + v_store_aligned(buf + j + bufp, v_muladd(v_cvt_f32(vs[bufp]), mluv[bufp%3], aluv[bufp%3])); } } - #endif +#endif for( ; j < dn*3; j += 3 ) { - buf[j] = src[j]*(100.f/255.f); - buf[j+1] = (float)(src[j+1] - 128); - buf[j+2] = (float)(src[j+2] - 128); + buf[j] = src[j]*((float)fl); + buf[j+1] = (float)(src[j+1] - 128.f); + buf[j+2] = (float)(src[j+2] - 128.f); } + fcvt(buf, buf, dn); + j = 0; - #if CV_NEON - for ( ; j <= (dn - 8) * 3; j += 24, dst += dcn * 8) +#if CV_SIMD + static const int nBlock = 4*fsize; + v_float32 v255 = vx_setall_f32(255.f); + if(dcn == 4) { - float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12); - uint8x8_t v_dst0 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[0], v_scale))), - vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[0], v_scale))))); - uint8x8_t v_dst1 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[1], v_scale))), - vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[1], v_scale))))); - uint8x8_t v_dst2 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[2], v_scale))), - vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[2], v_scale))))); + v_uint8 valpha = vx_setall_u8(alpha); + for( ; j <= (dn-nBlock)*3; + j += nBlock*3, dst += nBlock) + { + v_float32 vf[4*3]; + for(int k = 0; k < 4; k++) + { + v_load_deinterleave(buf + j, vf[k*3+0], vf[k*3+1], vf[k*3+2]); + } - if (dcn == 4) - { - uint8x8x4_t v_dst; - v_dst.val[0] = v_dst0; - v_dst.val[1] = v_dst1; - v_dst.val[2] = v_dst2; - v_dst.val[3] = v_alpha; - vst4_u8(dst, v_dst); - } - else - { - uint8x8x3_t v_dst; - v_dst.val[0] = v_dst0; - v_dst.val[1] = v_dst1; - v_dst.val[2] = v_dst2; - vst3_u8(dst, v_dst); + v_int32 vi[4*3]; + for(int k = 0; k < 4*3; k++) + { + vi[k] = v_round(vf[k]*v255); + } + + v_uint8 rgb[3]; + for(int k = 0; k < 3; k++) + { + rgb[k] = v_pack_u(v_pack(vi[0*3+k], vi[1*3+k]), + v_pack(vi[2*3+k], vi[3*3+k])); + } + + v_store_interleave(dst, rgb[0], rgb[1], rgb[2], valpha); } } - #elif CV_SSE2 - if (dcn == 3 && haveSIMD) + else // dcn == 3 { - for ( ; j <= (dn * 3 - 16); j += 16, dst += 16) + for(; j < dn*3 - nBlock; j += nBlock, dst += nBlock) { - __m128 v_src0 = _mm_mul_ps(_mm_load_ps(buf + j), v_scale); - __m128 v_src1 = _mm_mul_ps(_mm_load_ps(buf + j + 4), v_scale); - __m128 v_src2 = _mm_mul_ps(_mm_load_ps(buf + j + 8), v_scale); - __m128 v_src3 = _mm_mul_ps(_mm_load_ps(buf + j + 12), v_scale); - - __m128i v_dst0 = _mm_packs_epi32(_mm_cvtps_epi32(v_src0), - _mm_cvtps_epi32(v_src1)); - __m128i v_dst1 = _mm_packs_epi32(_mm_cvtps_epi32(v_src2), - _mm_cvtps_epi32(v_src3)); - - _mm_storeu_si128((__m128i *)dst, _mm_packus_epi16(v_dst0, v_dst1)); + v_float32 vf[4]; + v_int32 vi[4]; + for(int k = 0; k < 4; k++) + { + vf[k] = vx_load_aligned(buf + j + k*fsize); + vi[k] = v_round(vf[k]*v255); + } + v_store(dst, v_pack_u(v_pack(vi[0], vi[1]), v_pack(vi[2], vi[3]))); } - - int jr = j % 3; - if (jr) - dst -= jr, j -= jr; } - else if (dcn == 4 && haveSIMD) - { - for ( ; j <= (dn * 3 - 12); j += 12, dst += 16) - { - __m128 v_buf0 = _mm_mul_ps(_mm_load_ps(buf + j), v_scale); - __m128 v_buf1 = _mm_mul_ps(_mm_load_ps(buf + j + 4), v_scale); - __m128 v_buf2 = _mm_mul_ps(_mm_load_ps(buf + j + 8), v_scale); - - __m128 v_ba0 = _mm_unpackhi_ps(v_buf0, v_alpha); - __m128 v_ba1 = _mm_unpacklo_ps(v_buf2, v_alpha); - - __m128i v_src0 = _mm_cvtps_epi32(_mm_shuffle_ps(v_buf0, v_ba0, 0x44)); - __m128i v_src1 = _mm_shuffle_epi32(_mm_cvtps_epi32(_mm_shuffle_ps(v_ba0, v_buf1, 0x4e)), 0x78); - __m128i v_src2 = _mm_cvtps_epi32(_mm_shuffle_ps(v_buf1, v_ba1, 0x4e)); - __m128i v_src3 = _mm_shuffle_epi32(_mm_cvtps_epi32(_mm_shuffle_ps(v_ba1, v_buf2, 0xee)), 0x78); - - __m128i v_dst0 = _mm_packs_epi32(v_src0, v_src1); - __m128i v_dst1 = _mm_packs_epi32(v_src2, v_src3); - - _mm_storeu_si128((__m128i *)dst, _mm_packus_epi16(v_dst0, v_dst1)); - } - - int jr = j % 3; - if (jr) - dst -= jr, j -= jr; - } - #endif +#endif for( ; j < dn*3; j += 3, dst += dcn ) { @@ -2796,15 +2724,6 @@ struct Lab2RGB_b Lab2RGBfloat fcvt; Lab2RGBinteger icvt; - #if CV_NEON - float32x4_t v_scale, v_scale_inv, v_128; - uint8x8_t v_alpha; - #elif CV_SSE2 - __m128 v_scale; - __m128 v_alpha; - __m128i v_zero; - bool haveSIMD; - #endif int dstcn; }; @@ -2818,17 +2737,16 @@ struct RGB2Luvfloat const float* whitept, bool _srgb ) : srccn(_srccn), srgb(_srgb) { - volatile int i; initLabTabs(); softdouble whitePt[3]; - for( i = 0; i < 3; i++ ) + for(int i = 0; i < 3; i++ ) if(whitept) whitePt[i] = softdouble(whitept[i]); else whitePt[i] = D65[i]; - for( i = 0; i < 3; i++ ) + for(int i = 0; i < 3; i++ ) { for(int j = 0; j < 3; j++) if(_coeffs) @@ -2851,241 +2769,105 @@ struct RGB2Luvfloat un = d*softfloat(13*4)*whitePt[0]; vn = d*softfloat(13*9)*whitePt[1]; - #if CV_SSE2 - haveSIMD = checkHardwareSupport(CV_CPU_SSE2); - #endif - CV_Assert(whitePt[1] == softdouble::one()); } - #if CV_NEON - void process(float32x4x3_t& v_src) const - { - float32x4_t v_x = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], vdupq_n_f32(coeffs[0])), v_src.val[1], vdupq_n_f32(coeffs[1])), v_src.val[2], vdupq_n_f32(coeffs[2])); - float32x4_t v_y = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], vdupq_n_f32(coeffs[3])), v_src.val[1], vdupq_n_f32(coeffs[4])), v_src.val[2], vdupq_n_f32(coeffs[5])); - float32x4_t v_z = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], vdupq_n_f32(coeffs[6])), v_src.val[1], vdupq_n_f32(coeffs[7])), v_src.val[2], vdupq_n_f32(coeffs[8])); - - v_src.val[0] = vmulq_f32(v_y, vdupq_n_f32(LabCbrtTabScale)); - splineInterpolate(v_src.val[0], LabCbrtTab, LAB_CBRT_TAB_SIZE); - - v_src.val[0] = vmlaq_f32(vdupq_n_f32(-16.f), v_src.val[0], vdupq_n_f32(116.f)); - - float32x4_t v_div = vmaxq_f32(vmlaq_f32(vmlaq_f32(v_x, vdupq_n_f32(15.f), v_y), vdupq_n_f32(3.f), v_z), vdupq_n_f32(FLT_EPSILON)); - float32x4_t v_reciprocal = vrecpeq_f32(v_div); - v_reciprocal = vmulq_f32(vrecpsq_f32(v_div, v_reciprocal), v_reciprocal); - v_reciprocal = vmulq_f32(vrecpsq_f32(v_div, v_reciprocal), v_reciprocal); - float32x4_t v_d = vmulq_f32(vdupq_n_f32(52.f), v_reciprocal); - - v_src.val[1] = vmulq_f32(v_src.val[0], vmlaq_f32(vdupq_n_f32(-un), v_x, v_d)); - v_src.val[2] = vmulq_f32(v_src.val[0], vmlaq_f32(vdupq_n_f32(-vn), vmulq_f32(vdupq_n_f32(2.25f), v_y), v_d)); - } - #elif CV_SSE2 - void process(__m128& v_r0, __m128& v_r1, __m128& v_g0, - __m128& v_g1, __m128& v_b0, __m128& v_b1) const - { - __m128 v_x0 = _mm_mul_ps(v_r0, _mm_set1_ps(coeffs[0])); - __m128 v_x1 = _mm_mul_ps(v_r1, _mm_set1_ps(coeffs[0])); - __m128 v_y0 = _mm_mul_ps(v_r0, _mm_set1_ps(coeffs[3])); - __m128 v_y1 = _mm_mul_ps(v_r1, _mm_set1_ps(coeffs[3])); - __m128 v_z0 = _mm_mul_ps(v_r0, _mm_set1_ps(coeffs[6])); - __m128 v_z1 = _mm_mul_ps(v_r1, _mm_set1_ps(coeffs[6])); - - v_x0 = _mm_add_ps(v_x0, _mm_mul_ps(v_g0, _mm_set1_ps(coeffs[1]))); - v_x1 = _mm_add_ps(v_x1, _mm_mul_ps(v_g1, _mm_set1_ps(coeffs[1]))); - v_y0 = _mm_add_ps(v_y0, _mm_mul_ps(v_g0, _mm_set1_ps(coeffs[4]))); - v_y1 = _mm_add_ps(v_y1, _mm_mul_ps(v_g1, _mm_set1_ps(coeffs[4]))); - v_z0 = _mm_add_ps(v_z0, _mm_mul_ps(v_g0, _mm_set1_ps(coeffs[7]))); - v_z1 = _mm_add_ps(v_z1, _mm_mul_ps(v_g1, _mm_set1_ps(coeffs[7]))); - - v_x0 = _mm_add_ps(v_x0, _mm_mul_ps(v_b0, _mm_set1_ps(coeffs[2]))); - v_x1 = _mm_add_ps(v_x1, _mm_mul_ps(v_b1, _mm_set1_ps(coeffs[2]))); - v_y0 = _mm_add_ps(v_y0, _mm_mul_ps(v_b0, _mm_set1_ps(coeffs[5]))); - v_y1 = _mm_add_ps(v_y1, _mm_mul_ps(v_b1, _mm_set1_ps(coeffs[5]))); - v_z0 = _mm_add_ps(v_z0, _mm_mul_ps(v_b0, _mm_set1_ps(coeffs[8]))); - v_z1 = _mm_add_ps(v_z1, _mm_mul_ps(v_b1, _mm_set1_ps(coeffs[8]))); - - __m128 v_l0 = _mm_mul_ps(v_y0, _mm_set1_ps(LabCbrtTabScale)); - __m128 v_l1 = _mm_mul_ps(v_y1, _mm_set1_ps(LabCbrtTabScale)); - splineInterpolate(v_l0, LabCbrtTab, LAB_CBRT_TAB_SIZE); - splineInterpolate(v_l1, LabCbrtTab, LAB_CBRT_TAB_SIZE); - - v_l0 = _mm_mul_ps(v_l0, _mm_set1_ps(116.0f)); - v_l1 = _mm_mul_ps(v_l1, _mm_set1_ps(116.0f)); - v_r0 = _mm_sub_ps(v_l0, _mm_set1_ps(16.0f)); - v_r1 = _mm_sub_ps(v_l1, _mm_set1_ps(16.0f)); - - v_z0 = _mm_mul_ps(v_z0, _mm_set1_ps(3.0f)); - v_z1 = _mm_mul_ps(v_z1, _mm_set1_ps(3.0f)); - v_z0 = _mm_add_ps(v_z0, v_x0); - v_z1 = _mm_add_ps(v_z1, v_x1); - v_z0 = _mm_add_ps(v_z0, _mm_mul_ps(v_y0, _mm_set1_ps(15.0f))); - v_z1 = _mm_add_ps(v_z1, _mm_mul_ps(v_y1, _mm_set1_ps(15.0f))); - v_z0 = _mm_max_ps(v_z0, _mm_set1_ps(FLT_EPSILON)); - v_z1 = _mm_max_ps(v_z1, _mm_set1_ps(FLT_EPSILON)); - __m128 v_d0 = _mm_div_ps(_mm_set1_ps(52.0f), v_z0); - __m128 v_d1 = _mm_div_ps(_mm_set1_ps(52.0f), v_z1); - - v_x0 = _mm_mul_ps(v_x0, v_d0); - v_x1 = _mm_mul_ps(v_x1, v_d1); - v_x0 = _mm_sub_ps(v_x0, _mm_set1_ps(un)); - v_x1 = _mm_sub_ps(v_x1, _mm_set1_ps(un)); - v_g0 = _mm_mul_ps(v_x0, v_r0); - v_g1 = _mm_mul_ps(v_x1, v_r1); - - v_y0 = _mm_mul_ps(v_y0, v_d0); - v_y1 = _mm_mul_ps(v_y1, v_d1); - v_y0 = _mm_mul_ps(v_y0, _mm_set1_ps(2.25f)); - v_y1 = _mm_mul_ps(v_y1, _mm_set1_ps(2.25f)); - v_y0 = _mm_sub_ps(v_y0, _mm_set1_ps(vn)); - v_y1 = _mm_sub_ps(v_y1, _mm_set1_ps(vn)); - v_b0 = _mm_mul_ps(v_y0, v_r0); - v_b1 = _mm_mul_ps(v_y1, v_r1); - } - #endif - void operator()(const float* src, float* dst, int n) const { + CV_INSTRUMENT_REGION(); + int i = 0, scn = srccn; float gscale = GammaTabScale; const float* gammaTab = srgb ? sRGBGammaTab : 0; float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; - n *= 3; - #if CV_NEON - if (scn == 3) +#if CV_SIMD + const int vsize = v_float32::nlanes; + const int nrepeats = vsize == 4 ? 2 : 1; + for( ; i <= n-vsize*nrepeats; + i+= vsize*nrepeats, src += scn*vsize*nrepeats, dst += 3*vsize*nrepeats) { - for( ; i <= n - 12; i += 12, src += scn * 4 ) + v_float32 R[nrepeats], G[nrepeats], B[nrepeats], A; + if(scn == 4) { - float32x4x3_t v_src = vld3q_f32(src); - - v_src.val[0] = vmaxq_f32(v_src.val[0], vdupq_n_f32(0)); - v_src.val[1] = vmaxq_f32(v_src.val[1], vdupq_n_f32(0)); - v_src.val[2] = vmaxq_f32(v_src.val[2], vdupq_n_f32(0)); - - v_src.val[0] = vminq_f32(v_src.val[0], vdupq_n_f32(1)); - v_src.val[1] = vminq_f32(v_src.val[1], vdupq_n_f32(1)); - v_src.val[2] = vminq_f32(v_src.val[2], vdupq_n_f32(1)); - - if( gammaTab ) + for (int k = 0; k < nrepeats; k++) { - v_src.val[0] = vmulq_f32(v_src.val[0], vdupq_n_f32(gscale)); - v_src.val[1] = vmulq_f32(v_src.val[1], vdupq_n_f32(gscale)); - v_src.val[2] = vmulq_f32(v_src.val[2], vdupq_n_f32(gscale)); - splineInterpolate(v_src.val[0], gammaTab, GAMMA_TAB_SIZE); - splineInterpolate(v_src.val[1], gammaTab, GAMMA_TAB_SIZE); - splineInterpolate(v_src.val[2], gammaTab, GAMMA_TAB_SIZE); + v_load_deinterleave(src + k*4*vsize, R[k], G[k], B[k], A); + } + } + else // scn == 3 + { + for (int k = 0; k < nrepeats; k++) + { + v_load_deinterleave(src + k*3*vsize, R[k], G[k], B[k]); + } + } + + v_float32 zero = vx_setzero_f32(), one = vx_setall_f32(1.f); + for (int k = 0; k < nrepeats; k++) + { + R[k] = v_min(v_max(R[k], zero), one); + G[k] = v_min(v_max(G[k], zero), one); + B[k] = v_min(v_max(B[k], zero), one); + } + + if(gammaTab) + { + v_float32 vgscale = vx_setall_f32(gscale); + for (int k = 0; k < nrepeats; k++) + { + R[k] *= vgscale; + G[k] *= vgscale; + B[k] *= vgscale; } - process(v_src); + for (int k = 0; k < nrepeats; k++) + { + R[k] = splineInterpolate(R[k], gammaTab, GAMMA_TAB_SIZE); + G[k] = splineInterpolate(G[k], gammaTab, GAMMA_TAB_SIZE); + B[k] = splineInterpolate(B[k], gammaTab, GAMMA_TAB_SIZE); + } + } - vst3q_f32(dst + i, v_src); + v_float32 X[nrepeats], Y[nrepeats], Z[nrepeats]; + v_float32 vc0 = vx_setall_f32(C0), vc1 = vx_setall_f32(C1), vc2 = vx_setall_f32(C2); + v_float32 vc3 = vx_setall_f32(C3), vc4 = vx_setall_f32(C4), vc5 = vx_setall_f32(C5); + v_float32 vc6 = vx_setall_f32(C6), vc7 = vx_setall_f32(C7), vc8 = vx_setall_f32(C8); + for (int k = 0; k < nrepeats; k++) + { + X[k] = v_fma(R[k], vc0, v_fma(G[k], vc1, B[k]*vc2)); + Y[k] = v_fma(R[k], vc3, v_fma(G[k], vc4, B[k]*vc5)); + Z[k] = v_fma(R[k], vc6, v_fma(G[k], vc7, B[k]*vc8)); + } + + v_float32 L[nrepeats], u[nrepeats], v[nrepeats]; + v_float32 vmun = vx_setall_f32(-un), vmvn = vx_setall_f32(-vn); + for (int k = 0; k < nrepeats; k++) + { + L[k] = splineInterpolate(Y[k]*vx_setall_f32(LabCbrtTabScale), LabCbrtTab, LAB_CBRT_TAB_SIZE); + // L = 116.f*L - 16.f; + L[k] = v_fma(L[k], vx_setall_f32(116.f), vx_setall_f32(-16.f)); + + v_float32 d; + // d = (4*13) / max(X + 15 * Y + 3 * Z, FLT_EPSILON) + d = v_fma(Y[k], vx_setall_f32(15.f), v_fma(Z[k], vx_setall_f32(3.f), X[k])); + d = vx_setall_f32(4.f*13.f) / v_max(d, vx_setall_f32(FLT_EPSILON)); + // u = L*(X*d - un) + u[k] = L[k]*v_fma(X[k], d, vmun); + // v = L*((9*0.25f)*Y*d - vn); + v[k] = L[k]*v_fma(vx_setall_f32(9.f*0.25f)*Y[k], d, vmvn); + } + + for (int k = 0; k < nrepeats; k++) + { + v_store_interleave(dst + k*3*vsize, L[k], u[k], v[k]); } } - else - { - for( ; i <= n - 12; i += 12, src += scn * 4 ) - { - float32x4x4_t v_src = vld4q_f32(src); +#endif - v_src.val[0] = vmaxq_f32(v_src.val[0], vdupq_n_f32(0)); - v_src.val[1] = vmaxq_f32(v_src.val[1], vdupq_n_f32(0)); - v_src.val[2] = vmaxq_f32(v_src.val[2], vdupq_n_f32(0)); - - v_src.val[0] = vminq_f32(v_src.val[0], vdupq_n_f32(1)); - v_src.val[1] = vminq_f32(v_src.val[1], vdupq_n_f32(1)); - v_src.val[2] = vminq_f32(v_src.val[2], vdupq_n_f32(1)); - - if( gammaTab ) - { - v_src.val[0] = vmulq_f32(v_src.val[0], vdupq_n_f32(gscale)); - v_src.val[1] = vmulq_f32(v_src.val[1], vdupq_n_f32(gscale)); - v_src.val[2] = vmulq_f32(v_src.val[2], vdupq_n_f32(gscale)); - splineInterpolate(v_src.val[0], gammaTab, GAMMA_TAB_SIZE); - splineInterpolate(v_src.val[1], gammaTab, GAMMA_TAB_SIZE); - splineInterpolate(v_src.val[2], gammaTab, GAMMA_TAB_SIZE); - } - - float32x4x3_t v_dst; - v_dst.val[0] = v_src.val[0]; - v_dst.val[1] = v_src.val[1]; - v_dst.val[2] = v_src.val[2]; - process(v_dst); - - vst3q_f32(dst + i, v_dst); - } - } - #elif CV_SSE2 - if (haveSIMD) - { - for( ; i <= n - 24; i += 24, src += scn * 8 ) - { - __m128 v_r0 = _mm_loadu_ps(src + 0); - __m128 v_r1 = _mm_loadu_ps(src + 4); - __m128 v_g0 = _mm_loadu_ps(src + 8); - __m128 v_g1 = _mm_loadu_ps(src + 12); - __m128 v_b0 = _mm_loadu_ps(src + 16); - __m128 v_b1 = _mm_loadu_ps(src + 20); - - if (scn == 3) - { - _mm_deinterleave_ps(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); - } - else - { - __m128 v_a0 = _mm_loadu_ps(src + 24); - __m128 v_a1 = _mm_loadu_ps(src + 28); - - _mm_deinterleave_ps(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1, v_a0, v_a1); - } - - v_r0 = _mm_max_ps(v_r0, _mm_setzero_ps()); - v_r1 = _mm_max_ps(v_r1, _mm_setzero_ps()); - v_g0 = _mm_max_ps(v_g0, _mm_setzero_ps()); - v_g1 = _mm_max_ps(v_g1, _mm_setzero_ps()); - v_b0 = _mm_max_ps(v_b0, _mm_setzero_ps()); - v_b1 = _mm_max_ps(v_b1, _mm_setzero_ps()); - - v_r0 = _mm_min_ps(v_r0, _mm_set1_ps(1.f)); - v_r1 = _mm_min_ps(v_r1, _mm_set1_ps(1.f)); - v_g0 = _mm_min_ps(v_g0, _mm_set1_ps(1.f)); - v_g1 = _mm_min_ps(v_g1, _mm_set1_ps(1.f)); - v_b0 = _mm_min_ps(v_b0, _mm_set1_ps(1.f)); - v_b1 = _mm_min_ps(v_b1, _mm_set1_ps(1.f)); - - if ( gammaTab ) - { - __m128 v_gscale = _mm_set1_ps(gscale); - v_r0 = _mm_mul_ps(v_r0, v_gscale); - v_r1 = _mm_mul_ps(v_r1, v_gscale); - v_g0 = _mm_mul_ps(v_g0, v_gscale); - v_g1 = _mm_mul_ps(v_g1, v_gscale); - v_b0 = _mm_mul_ps(v_b0, v_gscale); - v_b1 = _mm_mul_ps(v_b1, v_gscale); - - splineInterpolate(v_r0, gammaTab, GAMMA_TAB_SIZE); - splineInterpolate(v_r1, gammaTab, GAMMA_TAB_SIZE); - splineInterpolate(v_g0, gammaTab, GAMMA_TAB_SIZE); - splineInterpolate(v_g1, gammaTab, GAMMA_TAB_SIZE); - splineInterpolate(v_b0, gammaTab, GAMMA_TAB_SIZE); - splineInterpolate(v_b1, gammaTab, GAMMA_TAB_SIZE); - } - - process(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); - - _mm_interleave_ps(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); - - _mm_storeu_ps(dst + i + 0, v_r0); - _mm_storeu_ps(dst + i + 4, v_r1); - _mm_storeu_ps(dst + i + 8, v_g0); - _mm_storeu_ps(dst + i + 12, v_g1); - _mm_storeu_ps(dst + i + 16, v_b0); - _mm_storeu_ps(dst + i + 20, v_b1); - } - } - #endif - for( ; i < n; i += 3, src += scn ) + for( ; i < n; i++, src += scn, dst += 3 ) { float R = src[0], G = src[1], B = src[2]; R = std::min(std::max(R, 0.f), 1.f); @@ -3109,16 +2891,13 @@ struct RGB2Luvfloat float u = L*(X*d - un); float v = L*((9*0.25f)*Y*d - vn); - dst[i] = L; dst[i+1] = u; dst[i+2] = v; + dst[0] = L; dst[1] = u; dst[2] = v; } } int srccn; float coeffs[9], un, vn; bool srgb; - #if CV_SSE2 - bool haveSIMD; - #endif }; struct RGB2Luv_f @@ -3176,95 +2955,14 @@ struct Luv2RGBfloat d = softfloat::one()/max(d, softfloat::eps()); un = softfloat(4*13)*d*whitePt[0]; vn = softfloat(9*13)*d*whitePt[1]; - #if CV_SSE2 - haveSIMD = checkHardwareSupport(CV_CPU_SSE2); - #endif CV_Assert(whitePt[1] == softdouble::one()); } - #if CV_SSE2 - void process(__m128& v_l0, __m128& v_l1, __m128& v_u0, - __m128& v_u1, __m128& v_v0, __m128& v_v1) const - { - // L*(3./29.)^3 - __m128 v_y00 = _mm_mul_ps(v_l0, _mm_set1_ps(1.0f/903.3f)); - __m128 v_y01 = _mm_mul_ps(v_l1, _mm_set1_ps(1.0f/903.3f)); - // ((L + 16)/116)^3 - __m128 v_y10 = _mm_mul_ps(_mm_add_ps(v_l0, _mm_set1_ps(16.0f)), _mm_set1_ps(1.f/116.f)); - __m128 v_y11 = _mm_mul_ps(_mm_add_ps(v_l1, _mm_set1_ps(16.0f)), _mm_set1_ps(1.f/116.f)); - v_y10 = _mm_mul_ps(_mm_mul_ps(v_y10, v_y10), v_y10); - v_y11 = _mm_mul_ps(_mm_mul_ps(v_y11, v_y11), v_y11); - // Y = (L <= 8) ? Y0 : Y1; - __m128 v_cmpl0 = _mm_cmplt_ps(v_l0, _mm_set1_ps(8.f)); - __m128 v_cmpl1 = _mm_cmplt_ps(v_l1, _mm_set1_ps(8.f)); - v_y00 = _mm_and_ps(v_cmpl0, v_y00); - v_y01 = _mm_and_ps(v_cmpl1, v_y01); - v_y10 = _mm_andnot_ps(v_cmpl0, v_y10); - v_y11 = _mm_andnot_ps(v_cmpl1, v_y11); - __m128 v_y0 = _mm_or_ps(v_y00, v_y10); - __m128 v_y1 = _mm_or_ps(v_y01, v_y11); - // up = 3*(u + L*_un); - __m128 v_up0 = _mm_mul_ps(_mm_set1_ps(3.f), _mm_add_ps(v_u0, _mm_mul_ps(v_l0, _mm_set1_ps(un)))); - __m128 v_up1 = _mm_mul_ps(_mm_set1_ps(3.f), _mm_add_ps(v_u1, _mm_mul_ps(v_l1, _mm_set1_ps(un)))); - // vp = 0.25/(v + L*_vn); - __m128 v_vp0 = _mm_div_ps(_mm_set1_ps(0.25f), _mm_add_ps(v_v0, _mm_mul_ps(v_l0, _mm_set1_ps(vn)))); - __m128 v_vp1 = _mm_div_ps(_mm_set1_ps(0.25f), _mm_add_ps(v_v1, _mm_mul_ps(v_l1, _mm_set1_ps(vn)))); - // vp = max(-0.25, min(0.25, vp)); - v_vp0 = _mm_max_ps(v_vp0, _mm_set1_ps(-0.25f)); - v_vp1 = _mm_max_ps(v_vp1, _mm_set1_ps(-0.25f)); - v_vp0 = _mm_min_ps(v_vp0, _mm_set1_ps( 0.25f)); - v_vp1 = _mm_min_ps(v_vp1, _mm_set1_ps( 0.25f)); - //X = 3*up*vp; // (*Y) is done later - __m128 v_x0 = _mm_mul_ps(_mm_set1_ps(3.f), _mm_mul_ps(v_up0, v_vp0)); - __m128 v_x1 = _mm_mul_ps(_mm_set1_ps(3.f), _mm_mul_ps(v_up1, v_vp1)); - //Z = ((12*13*L - up)*vp - 5); // (*Y) is done later - __m128 v_z0 = _mm_sub_ps(_mm_mul_ps(_mm_sub_ps(_mm_mul_ps(_mm_set1_ps(12.f*13.f), v_l0), v_up0), v_vp0), _mm_set1_ps(5.f)); - __m128 v_z1 = _mm_sub_ps(_mm_mul_ps(_mm_sub_ps(_mm_mul_ps(_mm_set1_ps(12.f*13.f), v_l1), v_up1), v_vp1), _mm_set1_ps(5.f)); - - // R = (X*C0 + C1 + Z*C2)*Y; // here (*Y) is done - v_l0 = _mm_mul_ps(v_x0, _mm_set1_ps(coeffs[0])); - v_l1 = _mm_mul_ps(v_x1, _mm_set1_ps(coeffs[0])); - v_u0 = _mm_mul_ps(v_x0, _mm_set1_ps(coeffs[3])); - v_u1 = _mm_mul_ps(v_x1, _mm_set1_ps(coeffs[3])); - v_v0 = _mm_mul_ps(v_x0, _mm_set1_ps(coeffs[6])); - v_v1 = _mm_mul_ps(v_x1, _mm_set1_ps(coeffs[6])); - v_l0 = _mm_add_ps(v_l0, _mm_set1_ps(coeffs[1])); - v_l1 = _mm_add_ps(v_l1, _mm_set1_ps(coeffs[1])); - v_u0 = _mm_add_ps(v_u0, _mm_set1_ps(coeffs[4])); - v_u1 = _mm_add_ps(v_u1, _mm_set1_ps(coeffs[4])); - v_v0 = _mm_add_ps(v_v0, _mm_set1_ps(coeffs[7])); - v_v1 = _mm_add_ps(v_v1, _mm_set1_ps(coeffs[7])); - v_l0 = _mm_add_ps(v_l0, _mm_mul_ps(v_z0, _mm_set1_ps(coeffs[2]))); - v_l1 = _mm_add_ps(v_l1, _mm_mul_ps(v_z1, _mm_set1_ps(coeffs[2]))); - v_u0 = _mm_add_ps(v_u0, _mm_mul_ps(v_z0, _mm_set1_ps(coeffs[5]))); - v_u1 = _mm_add_ps(v_u1, _mm_mul_ps(v_z1, _mm_set1_ps(coeffs[5]))); - v_v0 = _mm_add_ps(v_v0, _mm_mul_ps(v_z0, _mm_set1_ps(coeffs[8]))); - v_v1 = _mm_add_ps(v_v1, _mm_mul_ps(v_z1, _mm_set1_ps(coeffs[8]))); - v_l0 = _mm_mul_ps(v_l0, v_y0); - v_l1 = _mm_mul_ps(v_l1, v_y1); - v_u0 = _mm_mul_ps(v_u0, v_y0); - v_u1 = _mm_mul_ps(v_u1, v_y1); - v_v0 = _mm_mul_ps(v_v0, v_y0); - v_v1 = _mm_mul_ps(v_v1, v_y1); - - v_l0 = _mm_max_ps(v_l0, _mm_setzero_ps()); - v_l1 = _mm_max_ps(v_l1, _mm_setzero_ps()); - v_u0 = _mm_max_ps(v_u0, _mm_setzero_ps()); - v_u1 = _mm_max_ps(v_u1, _mm_setzero_ps()); - v_v0 = _mm_max_ps(v_v0, _mm_setzero_ps()); - v_v1 = _mm_max_ps(v_v1, _mm_setzero_ps()); - v_l0 = _mm_min_ps(v_l0, _mm_set1_ps(1.f)); - v_l1 = _mm_min_ps(v_l1, _mm_set1_ps(1.f)); - v_u0 = _mm_min_ps(v_u0, _mm_set1_ps(1.f)); - v_u1 = _mm_min_ps(v_u1, _mm_set1_ps(1.f)); - v_v0 = _mm_min_ps(v_v0, _mm_set1_ps(1.f)); - v_v1 = _mm_min_ps(v_v1, _mm_set1_ps(1.f)); - } - #endif - void operator()(const float* src, float* dst, int n) const { + CV_INSTRUMENT_REGION(); + int i = 0, dcn = dstcn; const float* gammaTab = srgb ? sRGBInvGammaTab : 0; float gscale = GammaTabScale; @@ -3273,73 +2971,111 @@ struct Luv2RGBfloat C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; float alpha = ColorChannel::max(); float _un = un, _vn = vn; - n *= 3; - #if CV_SSE2 - if (haveSIMD) +#if CV_SIMD + const int vsize = v_float32::nlanes; + const int nrepeats = vsize == 4 ? 2 : 1; + for( ; i <= n - vsize*nrepeats; + i += vsize*nrepeats, src += vsize*3*nrepeats, dst += dcn*vsize*nrepeats) { - for( ; i <= n - 24; i += 24, dst += dcn * 8 ) + v_float32 L[nrepeats], u[nrepeats], v[nrepeats]; + for (int k = 0; k < nrepeats; k++) { - __m128 v_l0 = _mm_loadu_ps(src + i + 0); - __m128 v_l1 = _mm_loadu_ps(src + i + 4); - __m128 v_u0 = _mm_loadu_ps(src + i + 8); - __m128 v_u1 = _mm_loadu_ps(src + i + 12); - __m128 v_v0 = _mm_loadu_ps(src + i + 16); - __m128 v_v1 = _mm_loadu_ps(src + i + 20); + v_load_deinterleave(src + k*vsize*3, L[k], u[k], v[k]); + } - _mm_deinterleave_ps(v_l0, v_l1, v_u0, v_u1, v_v0, v_v1); + v_float32 X[nrepeats], Y[nrepeats], Z[nrepeats]; - process(v_l0, v_l1, v_u0, v_u1, v_v0, v_v1); + v_float32 v16 = vx_setall_f32(16.f); + v_float32 v116inv = vx_setall_f32(1.f/116.f); + v_float32 v903inv = vx_setall_f32(1.0f/903.296296f); //(3./29.)^3 + for (int k = 0; k < nrepeats; k++) + { + v_float32 Ylo, Yhi; - if( gammaTab ) + // ((L + 16)/116)^3 + Ylo = (L[k] + v16) * v116inv; + Ylo = Ylo*Ylo*Ylo; + // L*(3./29.)^3 + Yhi = L[k] * v903inv; + + // Y = (L <= 8) ? Y0 : Y1; + Y[k] = v_select(L[k] >= vx_setall_f32(8.f), Ylo, Yhi); + } + + v_float32 v4inv = vx_setall_f32(0.25f), v3 = vx_setall_f32(3.f); + for(int k = 0; k < nrepeats; k++) + { + v_float32 up, vp; + + // up = 3*(u + L*_un); + up = v3*(v_fma(L[k], vx_setall_f32(_un), u[k])); + // vp = 0.25/(v + L*_vn); + vp = v4inv/(v_fma(L[k], vx_setall_f32(_vn), v[k])); + + // vp = max(-0.25, min(0.25, vp)); + vp = v_max(vx_setall_f32(-0.25f), v_min(v4inv, vp)); + + //X = 3*up*vp; // (*Y) is done later + X[k] = v3*up*vp; + //Z = ((12*13*L - up)*vp - 5); // (*Y) is done later + // xor flips the sign, works like unary minus + Z[k] = v_fma(v_fma(L[k], vx_setall_f32(12.f*13.f), (vx_setall_f32(-0.f) ^ up)), vp, vx_setall_f32(-5.f)); + } + + v_float32 R[nrepeats], G[nrepeats], B[nrepeats]; + v_float32 vc0 = vx_setall_f32(C0), vc1 = vx_setall_f32(C1), vc2 = vx_setall_f32(C2); + v_float32 vc3 = vx_setall_f32(C3), vc4 = vx_setall_f32(C4), vc5 = vx_setall_f32(C5); + v_float32 vc6 = vx_setall_f32(C6), vc7 = vx_setall_f32(C7), vc8 = vx_setall_f32(C8); + for(int k = 0; k < nrepeats; k++) + { + // R = (X*C0 + C1 + Z*C2)*Y; // here (*Y) is done + R[k] = v_fma(Z[k], vc2, v_fma(X[k], vc0, vc1))*Y[k]; + G[k] = v_fma(Z[k], vc5, v_fma(X[k], vc3, vc4))*Y[k]; + B[k] = v_fma(Z[k], vc8, v_fma(X[k], vc6, vc7))*Y[k]; + } + + v_float32 vzero = vx_setzero_f32(), v1 = vx_setall_f32(1.f); + for(int k = 0; k < nrepeats; k++) + { + R[k] = v_min(v_max(R[k], vzero), v1); + G[k] = v_min(v_max(G[k], vzero), v1); + B[k] = v_min(v_max(B[k], vzero), v1); + } + + if(gammaTab) + { + v_float32 vgscale = vx_setall_f32(gscale); + for(int k = 0; k < nrepeats; k++) { - __m128 v_gscale = _mm_set1_ps(gscale); - v_l0 = _mm_mul_ps(v_l0, v_gscale); - v_l1 = _mm_mul_ps(v_l1, v_gscale); - v_u0 = _mm_mul_ps(v_u0, v_gscale); - v_u1 = _mm_mul_ps(v_u1, v_gscale); - v_v0 = _mm_mul_ps(v_v0, v_gscale); - v_v1 = _mm_mul_ps(v_v1, v_gscale); - splineInterpolate(v_l0, gammaTab, GAMMA_TAB_SIZE); - splineInterpolate(v_l1, gammaTab, GAMMA_TAB_SIZE); - splineInterpolate(v_u0, gammaTab, GAMMA_TAB_SIZE); - splineInterpolate(v_u1, gammaTab, GAMMA_TAB_SIZE); - splineInterpolate(v_v0, gammaTab, GAMMA_TAB_SIZE); - splineInterpolate(v_v1, gammaTab, GAMMA_TAB_SIZE); + R[k] *= vgscale; + G[k] *= vgscale; + B[k] *= vgscale; } - - if( dcn == 4 ) + for(int k = 0; k < nrepeats; k++) { - __m128 v_a0 = _mm_set1_ps(alpha); - __m128 v_a1 = _mm_set1_ps(alpha); - _mm_interleave_ps(v_l0, v_l1, v_u0, v_u1, v_v0, v_v1, v_a0, v_a1); - - _mm_storeu_ps(dst + 0, v_l0); - _mm_storeu_ps(dst + 4, v_l1); - _mm_storeu_ps(dst + 8, v_u0); - _mm_storeu_ps(dst + 12, v_u1); - _mm_storeu_ps(dst + 16, v_v0); - _mm_storeu_ps(dst + 20, v_v1); - _mm_storeu_ps(dst + 24, v_a0); - _mm_storeu_ps(dst + 28, v_a1); + R[k] = splineInterpolate(R[k], gammaTab, GAMMA_TAB_SIZE); + G[k] = splineInterpolate(G[k], gammaTab, GAMMA_TAB_SIZE); + B[k] = splineInterpolate(B[k], gammaTab, GAMMA_TAB_SIZE); } - else + } + for(int k = 0; k < nrepeats; k++) + { + if(dcn == 4) { - _mm_interleave_ps(v_l0, v_l1, v_u0, v_u1, v_v0, v_v1); - - _mm_storeu_ps(dst + 0, v_l0); - _mm_storeu_ps(dst + 4, v_l1); - _mm_storeu_ps(dst + 8, v_u0); - _mm_storeu_ps(dst + 12, v_u1); - _mm_storeu_ps(dst + 16, v_v0); - _mm_storeu_ps(dst + 20, v_v1); + v_store_interleave(dst + k*vsize*4, R[k], G[k], B[k], vx_setall_f32(alpha)); + } + else // dcn == 3 + { + v_store_interleave(dst + k*vsize*3, R[k], G[k], B[k]); } } } - #endif - for( ; i < n; i += 3, dst += dcn ) +#endif + + for( ; i < n; i++, src += 3, dst += dcn ) { - float L = src[i], u = src[i+1], v = src[i+2], X, Y, Z; + float L = src[0], u = src[1], v = src[2], X, Y, Z; if(L >= 8) { Y = (L + 16.f) * (1.f/116.f); @@ -3380,9 +3116,6 @@ struct Luv2RGBfloat int dstcn; float coeffs[9], un, vn; bool srgb; - #if CV_SSE2 - bool haveSIMD; - #endif }; @@ -3417,69 +3150,72 @@ struct RGB2Luvinterpolate void operator()(const uchar* src, uchar* dst, int n) const { + CV_INSTRUMENT_REGION(); + int i, scn = srccn, bIdx = blueIdx; i = 0; n *= 3; -#if CV_SIMD128 +#if CV_SIMD if(enablePackedRGB2Luv) { - static const int nPixels = 8*2; + const int vsize = v_uint16::nlanes; + static const int nPixels = vsize*2; for(; i < n - 3*nPixels; i += 3*nPixels, src += scn*nPixels) { /* int R = src[bIdx], G = src[1], B = src[bIdx^2]; - */ - v_uint8x16 r16, g16, b16, dummy16; + */ + v_uint8 r, g, b, dummy; if(scn == 3) { - v_load_deinterleave(src, r16, g16, b16); + v_load_deinterleave(src, r, g, b); } else // scn == 4 { - v_load_deinterleave(src, r16, g16, b16, dummy16); + v_load_deinterleave(src, r, g, b, dummy); } if(bIdx) { - dummy16 = r16; r16 = b16; b16 = dummy16; + swap(r, b); } /* static const int baseDiv = LAB_BASE/256; R = R*baseDiv, G = G*baseDiv, B = B*baseDiv; - */ - v_uint16x8 r80, r81, g80, g81, b80, b81; - v_expand(r16, r80, r81); - v_expand(g16, g80, g81); - v_expand(b16, b80, b81); - r80 = r80 << (lab_base_shift - 8); r81 = r81 << (lab_base_shift - 8); - g80 = g80 << (lab_base_shift - 8); g81 = g81 << (lab_base_shift - 8); - b80 = b80 << (lab_base_shift - 8); b81 = b81 << (lab_base_shift - 8); + */ + v_uint16 r0, r1, g0, g1, b0, b1; + v_expand(r, r0, r1); + v_expand(g, g0, g1); + v_expand(b, b0, b1); + r0 = r0 << (lab_base_shift - 8); r1 = r1 << (lab_base_shift - 8); + g0 = g0 << (lab_base_shift - 8); g1 = g1 << (lab_base_shift - 8); + b0 = b0 << (lab_base_shift - 8); b1 = b1 << (lab_base_shift - 8); /* int L, u, v; trilinearInterpolate(R, G, B, RGB2LuvLUT_s16, L, u, v); - */ - v_uint16x8 l80, u80, v80, l81, u81, v81; - trilinearPackedInterpolate(r80, g80, b80, LABLUVLUTs16.RGB2LuvLUT_s16, l80, u80, v80); - trilinearPackedInterpolate(r81, g81, b81, LABLUVLUTs16.RGB2LuvLUT_s16, l81, u81, v81); + */ + v_uint16 l0, u0, v0, l1, u1, v1; + trilinearPackedInterpolate(r0, g0, b0, LABLUVLUTs16.RGB2LuvLUT_s16, l0, u0, v0); + trilinearPackedInterpolate(r1, g1, b1, LABLUVLUTs16.RGB2LuvLUT_s16, l1, u1, v1); /* - dst[i] = saturate_cast(L/baseDiv); + dst[i] = saturate_cast(L/baseDiv); dst[i+1] = saturate_cast(u/baseDiv); dst[i+2] = saturate_cast(v/baseDiv); - */ - l80 = l80 >> (lab_base_shift - 8); l81 = l81 >> (lab_base_shift - 8); - u80 = u80 >> (lab_base_shift - 8); u81 = u81 >> (lab_base_shift - 8); - v80 = v80 >> (lab_base_shift - 8); v81 = v81 >> (lab_base_shift - 8); - v_uint8x16 l16 = v_pack(l80, l81); - v_uint8x16 u16 = v_pack(u80, u81); - v_uint8x16 v16 = v_pack(v80, v81); - v_store_interleave(dst + i, l16, u16, v16); + */ + l0 = l0 >> (lab_base_shift - 8); l1 = l1 >> (lab_base_shift - 8); + u0 = u0 >> (lab_base_shift - 8); u1 = u1 >> (lab_base_shift - 8); + v0 = v0 >> (lab_base_shift - 8); v1 = v1 >> (lab_base_shift - 8); + v_uint8 l = v_pack(l0, l1); + v_uint8 u = v_pack(u0, u1); + v_uint8 v = v_pack(v0, v1); + v_store_interleave(dst + i, l, u, v); } } -#endif // CV_SIMD128 +#endif // CV_SIMD for(; i < n; i += 3, src += scn) { @@ -3506,60 +3242,24 @@ struct RGB2Luvinterpolate struct RGB2Luv_b { typedef uchar channel_type; + static const int bufChannels = 3; RGB2Luv_b( int _srccn, int blueIdx, const float* _coeffs, const float* _whitept, bool _srgb ) : srccn(_srccn), - fcvt(3, blueIdx, _coeffs, _whitept, _srgb), + fcvt(bufChannels, blueIdx, _coeffs, _whitept, _srgb), icvt(_srccn, blueIdx, _coeffs, _whitept, _srgb) { + // using interpolation for LRGB gives error up to 8 of 255, don't use it useInterpolation = (!_coeffs && !_whitept && _srgb && enableBitExactness && enableRGB2LuvInterpolation); - - #if CV_NEON - v_scale_inv = vdupq_n_f32(softfloat::one()/f255); - v_scale = vdupq_n_f32(f255/softfloat(100)); - v_coeff1 = vdupq_n_f32(f255/uRange); - v_coeff2 = vdupq_n_f32(-uLow*f255/uRange); - v_coeff3 = vdupq_n_f32(f255/vRange); - v_coeff4 = vdupq_n_f32(-vLow*f255/vRange); - v_alpha = vdup_n_u8(ColorChannel::max()); - #elif CV_SSE2 - v_zero = _mm_setzero_si128(); - v_scale_inv = _mm_set1_ps(softfloat::one()/f255); - haveSIMD = checkHardwareSupport(CV_CPU_SSE2); - #endif } - #if CV_SSE2 - void process(const float * buf, - __m128 & v_coeffs, __m128 & v_res, uchar * dst) const - { - __m128 v_l0f = _mm_load_ps(buf); - __m128 v_l1f = _mm_load_ps(buf + 4); - __m128 v_u0f = _mm_load_ps(buf + 8); - __m128 v_u1f = _mm_load_ps(buf + 12); - - v_l0f = _mm_add_ps(_mm_mul_ps(v_l0f, v_coeffs), v_res); - v_u1f = _mm_add_ps(_mm_mul_ps(v_u1f, v_coeffs), v_res); - v_coeffs = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(v_coeffs), 0x92)); - v_res = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(v_res), 0x92)); - v_u0f = _mm_add_ps(_mm_mul_ps(v_u0f, v_coeffs), v_res); - v_coeffs = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(v_coeffs), 0x92)); - v_res = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(v_res), 0x92)); - v_l1f = _mm_add_ps(_mm_mul_ps(v_l1f, v_coeffs), v_res); - - __m128i v_l = _mm_packs_epi32(_mm_cvtps_epi32(v_l0f), _mm_cvtps_epi32(v_l1f)); - __m128i v_u = _mm_packs_epi32(_mm_cvtps_epi32(v_u0f), _mm_cvtps_epi32(v_u1f)); - __m128i v_l0 = _mm_packus_epi16(v_l, v_u); - - _mm_storeu_si128((__m128i *)(dst), v_l0); - } - #endif - void operator()(const uchar* src, uchar* dst, int n) const { + CV_INSTRUMENT_REGION(); + if(useInterpolation) { icvt(src, dst, n); @@ -3567,92 +3267,90 @@ struct RGB2Luv_b } int i, j, scn = srccn; - float CV_DECL_ALIGNED(16) buf[3*BLOCK_SIZE]; +#if CV_SIMD + float CV_DECL_ALIGNED(CV_SIMD_WIDTH) buf[bufChannels*BLOCK_SIZE]; +#else + float CV_DECL_ALIGNED(16) buf[bufChannels*BLOCK_SIZE]; +#endif - #if CV_SSE2 - __m128 v_coeffs = _mm_set_ps(f255/softfloat(100), f255/vRange, f255/uRange, f255/softfloat(100)); - __m128 v_res = _mm_set_ps(0.f, -vLow*f255/vRange, -uLow*f255/uRange, 0.f); - #endif + static const softfloat fL = f255/softfloat(100); + static const softfloat fu = f255/uRange; + static const softfloat fv = f255/vRange; + static const softfloat su = -uLow*f255/uRange; + static const softfloat sv = -vLow*f255/vRange; +#if CV_SIMD + const int fsize = v_float32::nlanes; + v_float32 ml = vx_setall_f32((float)fL), al = vx_setzero_f32(); + v_float32 mu = vx_setall_f32((float)fu), au = vx_setall_f32((float)su); + v_float32 mv = vx_setall_f32((float)fv), av = vx_setall_f32((float)sv); + //TODO: fix that when v_interleave is available + float CV_DECL_ALIGNED(CV_SIMD_WIDTH) interTmpM[fsize*3], interTmpA[fsize*3]; + v_store_interleave(interTmpM, ml, mu, mv); + v_store_interleave(interTmpA, al, au, av); + v_float32 mluv[3], aluv[3]; + for(int k = 0; k < 3; k++) + { + mluv[k] = vx_load_aligned(interTmpM + k*fsize); + aluv[k] = vx_load_aligned(interTmpA + k*fsize); + } +#endif - for( i = 0; i < n; i += BLOCK_SIZE, dst += BLOCK_SIZE*3 ) + for( i = 0; i < n; i += BLOCK_SIZE, dst += BLOCK_SIZE*bufChannels ) { int dn = std::min(n - i, (int)BLOCK_SIZE); j = 0; - #if CV_NEON - for ( ; j <= (dn - 8) * 3; j += 24, src += 8 * scn) - { - uint16x8_t v_t0, v_t1, v_t2; - - if (scn == 3) - { - uint8x8x3_t v_src = vld3_u8(src); - v_t0 = vmovl_u8(v_src.val[0]); - v_t1 = vmovl_u8(v_src.val[1]); - v_t2 = vmovl_u8(v_src.val[2]); - } - else - { - uint8x8x4_t v_src = vld4_u8(src); - v_t0 = vmovl_u8(v_src.val[0]); - v_t1 = vmovl_u8(v_src.val[1]); - v_t2 = vmovl_u8(v_src.val[2]); - } - - float32x4x3_t v_dst; - v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0))), v_scale_inv); - v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_scale_inv); - v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_scale_inv); - vst3q_f32(buf + j, v_dst); - - v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0))), v_scale_inv); - v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_scale_inv); - v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_scale_inv); - vst3q_f32(buf + j + 12, v_dst); - } - #elif CV_SSE2 - if (scn == 3 && haveSIMD) - { - for ( ; j <= (dn * 3 - 16); j += 16, src += 16) - { - __m128i v_src = _mm_loadu_si128((__m128i const *)src); - - __m128i v_src_p = _mm_unpacklo_epi8(v_src, v_zero); - _mm_store_ps(buf + j, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v_src_p, v_zero)), v_scale_inv)); - _mm_store_ps(buf + j + 4, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(v_src_p, v_zero)), v_scale_inv)); - - v_src_p = _mm_unpackhi_epi8(v_src, v_zero); - _mm_store_ps(buf + j + 8, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v_src_p, v_zero)), v_scale_inv)); - _mm_store_ps(buf + j + 12, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(v_src_p, v_zero)), v_scale_inv)); - } - - int jr = j % 3; - if (jr) - src -= jr, j -= jr; - } - else if (scn == 4 && haveSIMD) - { - for ( ; j <= (dn * 3 - 12); j += 12, src += 16) - { - __m128i v_src = _mm_loadu_si128((__m128i const *)src); - - __m128i v_src_lo = _mm_unpacklo_epi8(v_src, v_zero); - __m128i v_src_hi = _mm_unpackhi_epi8(v_src, v_zero); - _mm_storeu_ps(buf + j, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v_src_lo, v_zero)), v_scale_inv)); - _mm_storeu_ps(buf + j + 3, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(v_src_lo, v_zero)), v_scale_inv)); - _mm_storeu_ps(buf + j + 6, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v_src_hi, v_zero)), v_scale_inv)); - float tmp = buf[j + 8]; - _mm_storeu_ps(buf + j + 8, _mm_mul_ps(_mm_cvtepi32_ps(_mm_shuffle_epi32(_mm_unpackhi_epi16(v_src_hi, v_zero), 0x90)), v_scale_inv)); - buf[j + 8] = tmp; - } - - int jr = j % 3; - if (jr) - src -= jr, j -= jr; - } - #endif static const softfloat f255inv = softfloat::one()/f255; - for( ; j < dn*3; j += 3, src += scn ) +#if CV_SIMD + v_float32 v255inv = vx_setall_f32((float)f255inv); + if(scn == 4) + { + static const int nBlock = fsize*4; + for( ; j <= dn*bufChannels - nBlock*3; + j += nBlock*3, src += nBlock*4) + { + v_uint8 rgb[3], dummy; + v_load_deinterleave(src, rgb[0], rgb[1], rgb[2], dummy); + + v_uint16 d[3*2]; + for(int k = 0; k < 3; k++) + { + v_expand(rgb[k], d[k*2+0], d[k*2+1]); + } + v_int32 q[3*4]; + for(int k = 0; k < 3*2; k++) + { + v_expand(v_reinterpret_as_s16(d[k]), q[k*2+0], q[k*2+1]); + } + + v_float32 f[3*4]; + for(int k = 0; k < 3*4; k++) + { + f[k] = v_cvt_f32(q[k])*v255inv; + } + + for(int k = 0; k < 4; k++) + { + v_store_interleave(buf + j + k*3*fsize, f[0*4+k], f[1*4+k], f[2*4+k]); + } + } + } + else // scn == 3 + { + static const int nBlock = fsize*2; + for( ; j <= dn*bufChannels - nBlock; + j += nBlock, src += nBlock) + { + v_uint16 d = vx_load_expand(src); + v_int32 q0, q1; + v_expand(v_reinterpret_as_s16(d), q0, q1); + + v_store_aligned(buf + j + 0*fsize, v_cvt_f32(q0)*v255inv); + v_store_aligned(buf + j + 1*fsize, v_cvt_f32(q1)*v255inv); + } + } +#endif + for( ; j < dn*bufChannels; j += bufChannels, src += scn ) { buf[j ] = (float)(src[0]*((float)f255inv)); buf[j+1] = (float)(src[1]*((float)f255inv)); @@ -3661,43 +3359,34 @@ struct RGB2Luv_b fcvt(buf, buf, dn); j = 0; - #if CV_NEON - for ( ; j <= (dn - 8) * 3; j += 24) - { - float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12); - uint8x8x3_t v_dst; - v_dst.val[0] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[0], v_scale))), - vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[0], v_scale))))); - v_dst.val[1] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vaddq_f32(vmulq_f32(v_src0.val[1], v_coeff1), v_coeff2))), - vqmovn_u32(cv_vrndq_u32_f32(vaddq_f32(vmulq_f32(v_src1.val[1], v_coeff1), v_coeff2))))); - v_dst.val[2] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vaddq_f32(vmulq_f32(v_src0.val[2], v_coeff3), v_coeff4))), - vqmovn_u32(cv_vrndq_u32_f32(vaddq_f32(vmulq_f32(v_src1.val[2], v_coeff3), v_coeff4))))); - - vst3_u8(dst + j, v_dst); - } - #elif CV_SSE2 - if (haveSIMD) +#if CV_SIMD + for( ; j <= dn*3 - fsize*3*4; j += fsize*3*4) { - for ( ; j <= (dn - 16) * 3; j += 48) + v_float32 f[3*4]; + for(int k = 0; k < 3*4; k++) + f[k] = vx_load_aligned(buf + j + k*fsize); + + for(int k = 0; k < 4; k++) { - process(buf + j, - v_coeffs, v_res, dst + j); + f[k*3+0] = v_fma(f[k*3+0], mluv[0], aluv[0]); + f[k*3+1] = v_fma(f[k*3+1], mluv[1], aluv[1]); + f[k*3+2] = v_fma(f[k*3+2], mluv[2], aluv[2]); + } - process(buf + j + 16, - v_coeffs, v_res, dst + j + 16); + v_int32 q[3*4]; + for(int k = 0; k < 3*4; k++) + { + q[k] = v_round(f[k]); + } - process(buf + j + 32, - v_coeffs, v_res, dst + j + 32); + for(int k = 0; k < 3; k++) + { + v_store(dst + j + k*fsize*4, v_pack_u(v_pack(q[k*4+0], q[k*4+1]), + v_pack(q[k*4+2], q[k*4+3]))); } } - #endif - - static const softfloat fL = f255/softfloat(100); - static const softfloat fu = f255/uRange; - static const softfloat fv = f255/vRange; - static const softfloat su = -uLow*f255/uRange; - static const softfloat sv = -vLow*f255/vRange; +#endif for( ; j < dn*3; j += 3 ) { dst[j] = saturate_cast(buf[j]*(float)fL); @@ -3711,14 +3400,6 @@ struct RGB2Luv_b RGB2Luvfloat fcvt; RGB2Luvinterpolate icvt; - #if CV_NEON - float32x4_t v_scale, v_scale_inv, v_coeff1, v_coeff2, v_coeff3, v_coeff4; - uint8x8_t v_alpha; - #elif CV_SSE2 - __m128 v_scale_inv; - __m128i v_zero; - bool haveSIMD; - #endif bool useInterpolation; }; @@ -3734,7 +3415,7 @@ struct Luv2RGBinteger // whitept is fixed for int calculations Luv2RGBinteger( int _dstcn, int blueIdx, const float* _coeffs, const float* /*_whitept*/, bool _srgb ) - : dstcn(_dstcn) + : dstcn(_dstcn), issRGB(_srgb) { initLabTabs(); @@ -3752,8 +3433,6 @@ struct Luv2RGBinteger coeffs[i+3] = cvRound(lshift*c[1]); coeffs[i+(blueIdx^2)*3] = cvRound(lshift*c[2]); } - - tab = _srgb ? sRGBInvGammaTab_b : linearInvGammaTab_b; } // L, u, v should be in their natural range @@ -3766,8 +3445,8 @@ struct Luv2RGBinteger // vp: +/- 0.25*BASE*1024 int up = LUVLUT.LuToUp_b[LL*256+uu]; int vp = LUVLUT.LvToVp_b[LL*256+vv]; - //X = y*3.f* up/((float)BASE/1024) *vp/((float)BASE*1024); - //Z = y*(((12.f*13.f)*((float)LL)*100.f/255.f - up/((float)BASE))*vp/((float)BASE*1024) - 5.f); + // X = y*3.f* up/((float)BASE/1024) *vp/((float)BASE*1024); + // Z = y*(((12.f*13.f)*((float)LL)*100.f/255.f - up/((float)BASE))*vp/((float)BASE*1024) - 5.f); long long int xv = ((int)up)*(long long)vp; int x = (int)(xv/BASE); @@ -3795,116 +3474,269 @@ struct Luv2RGBinteger go = max(0, min((int)INV_GAMMA_TAB_SIZE-1, go)); bo = max(0, min((int)INV_GAMMA_TAB_SIZE-1, bo)); - ro = tab[ro]; - go = tab[go]; - bo = tab[bo]; + if(issRGB) + { + ushort* tab = sRGBInvGammaTab_b; + ro = tab[ro]; + go = tab[go]; + bo = tab[bo]; + } + else + { + // rgb = (rgb*255) >> inv_gamma_shift + ro = ((ro << 8) - ro) >> inv_gamma_shift; + go = ((go << 8) - go) >> inv_gamma_shift; + bo = ((bo << 8) - bo) >> inv_gamma_shift; + } } - inline void processLuvToXYZ(const v_uint8x16& lv, const v_uint8x16& uv, const v_uint8x16& vv, - int32_t* xyz) const + inline void processLuvToXYZ(const v_uint8& lv, const v_uint8& uv, const v_uint8& vv, + v_int32 (&x)[4], v_int32 (&y)[4], v_int32 (&z)[4]) const { - uint8_t CV_DECL_ALIGNED(16) lvstore[16], uvstore[16], vvstore[16]; - v_store_aligned(lvstore, lv); v_store_aligned(uvstore, uv); v_store_aligned(vvstore, vv); + const int vsize = v_uint8::nlanes; - for(int i = 0; i < 16; i++) + v_uint16 lv0, lv1; + v_expand(lv, lv0, lv1); + v_uint32 lq[4]; + v_expand(lv0, lq[0], lq[1]); + v_expand(lv1, lq[2], lq[3]); + + // y = LabToYF_b[LL*2]; + // load int32 instead of int16 then cut unused part by masking + v_int32 mask16 = vx_setall_s32(0xFFFF); + for(int k = 0; k < 4; k++) { - int LL = lvstore[i]; - int u = uvstore[i]; - int v = vvstore[i]; - int y = LabToYF_b[LL*2]; + y[k] = v_lut((const int*)LabToYF_b, v_reinterpret_as_s32(lq[k])) & mask16; + } - int up = LUVLUT.LuToUp_b[LL*256+u]; - int vp = LUVLUT.LvToVp_b[LL*256+v]; + v_int32 up[4], vp[4]; + // int up = LUVLUT.LuToUp_b[LL*256+u]; + // int vp = LUVLUT.LvToVp_b[LL*256+v]; + v_uint16 uv0, uv1, vv0, vv1; + v_expand(uv, uv0, uv1); + v_expand(vv, vv0, vv1); + // LL*256 + v_uint16 ll0, ll1; + ll0 = lv0 << 8; ll1 = lv1 << 8; + v_uint16 upidx0, upidx1, vpidx0, vpidx1; + upidx0 = ll0 + uv0; upidx1 = ll1 + uv1; + vpidx0 = ll0 + vv0; vpidx1 = ll1 + vv1; + v_uint32 upidx[4], vpidx[4]; + v_expand(upidx0, upidx[0], upidx[1]); v_expand(upidx1, upidx[2], upidx[3]); + v_expand(vpidx0, vpidx[0], vpidx[1]); v_expand(vpidx1, vpidx[2], vpidx[3]); + for(int k = 0; k < 4; k++) + { + up[k] = v_lut(LUVLUT.LuToUp_b, v_reinterpret_as_s32(upidx[k])); + vp[k] = v_lut(LUVLUT.LvToVp_b, v_reinterpret_as_s32(vpidx[k])); + } - long long int xv = up*(long long int)vp; - long long int vpl = LUVLUT.LvToVpl_b[LL*256+v]; - long long int zp = vpl - xv*(255/3); + // long long int vpl = LUVLUT.LvToVpl_b[LL*256+v]; + v_int64 vpl[8]; + int32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) vpidxstore[vsize]; + for(int k = 0; k < 4; k++) + { + v_store_aligned(vpidxstore + k*vsize/4, v_reinterpret_as_s32(vpidx[k])); + } + for(int k = 0; k < 8; k++) + { + vpl[k] = vx_lut((const int64_t*)LUVLUT.LvToVpl_b, vpidxstore + k*vsize/8); + } + + // not all 64-bit arithmetic is available in univ. intrinsics + // need to handle it with scalar code + int64_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) vvpl[vsize]; + for(int k = 0; k < 8; k++) + { + v_store_aligned(vvpl + k*vsize/8, vpl[k]); + } + int32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) vup[vsize], vvp[vsize], vx[vsize], vy[vsize], vzm[vsize]; + for(int k = 0; k < 4; k++) + { + v_store_aligned(vup + k*vsize/4, up[k]); + v_store_aligned(vvp + k*vsize/4, vp[k]); + v_store_aligned(vy + k*vsize/4, y[k]); + } + for(int i = 0; i < vsize; i++) + { + int32_t y_ = vy[i]; + int32_t up_ = vup[i]; + int32_t vp_ = vvp[i]; + + int64_t vpl_ = vvpl[i]; + int64_t xv = up_*(int64_t)vp_; + + int64_t zp = vpl_ - xv*(255/3); zp = zp >> base_shift; - long long int zq = zp - (5*255*BASE); - int zm = (int)((y*zq) >> base_shift); + int64_t zq = zp - (5*255*BASE); + int32_t zm = (int32_t)((y_*zq) >> base_shift); + vzm[i] = zm; - int x = (int)(xv >> base_shift); - x = (y*x) >> base_shift; + vx[i] = (int32_t)(xv >> base_shift); + } + v_int32 zm[4]; + for(int k = 0; k < 4; k++) + { + x[k] = vx_load_aligned(vx + k*vsize/4); + zm[k] = vx_load_aligned(vzm + k*vsize/4); + } - int z = zm/256 + zm/65536; - x = max(0, min(2*BASE, x)); z = max(0, min(2*BASE, z)); + for(int k = 0; k < 4; k++) + { + x[k] = (y[k]*x[k]) >> base_shift; + } - xyz[i] = x; xyz[i + 16] = y; xyz[i + 32] = z; + // z = zm/256 + zm/65536; + for (int k = 0; k < 4; k++) + { + z[k] = (zm[k] >> 8) + (zm[k] >> 16); + } + + // (x, z) = clip((x, z), min=0, max=2*BASE) + v_int32 zero = vx_setzero_s32(), base2 = vx_setall_s32(2*BASE); + for(int k = 0; k < 4; k++) + { + x[k] = v_max(zero, v_min(base2, x[k])); + z[k] = v_max(zero, v_min(base2, z[k])); } } void operator()(const uchar* src, uchar* dst, int n) const { + CV_INSTRUMENT_REGION(); + int i, dcn = dstcn; uchar alpha = ColorChannel::max(); i = 0; -#if CV_SIMD128 + +#if CV_SIMD if(enablePackedLuv2RGB) { - static const int nPixels = 16; - for (; i < n*3-3*nPixels; i += 3*nPixels, dst += dcn*nPixels) + ushort* tab = sRGBInvGammaTab_b; + bool srgb = issRGB; + static const int vsize = v_uint8::nlanes; + const int descaleShift = 1 << (shift-1); + v_int16 vdescale = vx_setall_s16(descaleShift); + v_int16 vc[9]; + for(int k = 0; k < 9; k++) { - v_uint8x16 u8l, u8u, u8v; - v_load_deinterleave(src + i, u8l, u8u, u8v); + vc[k] = vx_setall_s16((short)coeffs[k]); + } + v_int16 one = vx_setall_s16(1); + v_int16 cbxy, cbz1, cgxy, cgz1, crxy, crz1; + v_int16 dummy; + v_zip(vc[0], vc[1], crxy, dummy); + v_zip(vc[2], one, crz1, dummy); + v_zip(vc[3], vc[4], cgxy, dummy); + v_zip(vc[5], one, cgz1, dummy); + v_zip(vc[6], vc[7], cbxy, dummy); + v_zip(vc[8], one, cbz1, dummy); + // fixing 16bit signed multiplication + // by subtracting 2^(base_shift-1) and then adding result back + v_int32 dummy32, fm[3]; + v_expand(vc[0]+vc[1]+vc[2], fm[0], dummy32); + v_expand(vc[3]+vc[4]+vc[5], fm[1], dummy32); + v_expand(vc[6]+vc[7]+vc[8], fm[2], dummy32); + fm[0] = fm[0] << (base_shift-1); + fm[1] = fm[1] << (base_shift-1); + fm[2] = fm[2] << (base_shift-1); - int32_t CV_DECL_ALIGNED(16) xyz[48]; - processLuvToXYZ(u8l, u8u, u8v, xyz); + for (; i <= n-vsize; i += vsize, src += 3*vsize, dst += dcn*vsize) + { + v_uint8 u8l, u8u, u8v; + v_load_deinterleave(src, u8l, u8u, u8v); - v_int32x4 xiv[4], yiv[4], ziv[4]; - for(int k = 0; k < 4; k++) + v_int32 xiv[4], yiv[4], ziv[4]; + + processLuvToXYZ(u8l, u8u, u8v, xiv, yiv, ziv); + + // [xxyyzz] + v_uint16 xyz[6]; + xyz[0] = v_pack_u(xiv[0], xiv[1]); xyz[1] = v_pack_u(xiv[2], xiv[3]); + xyz[2] = v_pack_u(yiv[0], yiv[1]); xyz[3] = v_pack_u(yiv[2], yiv[3]); + xyz[4] = v_pack_u(ziv[0], ziv[1]); xyz[5] = v_pack_u(ziv[2], ziv[3]); + + // ro = CV_DESCALE(C0 * x + C1 * y + C2 * z, shift); + // go = CV_DESCALE(C3 * x + C4 * y + C5 * z, shift); + // bo = CV_DESCALE(C6 * x + C7 * y + C8 * z, shift); + + // fix 16bit multiplication: c_i*v = c_i*(v-fixmul) + c_i*fixmul + v_uint16 fixmul = vx_setall_u16(1 << (base_shift-1)); + v_int16 sxyz[6]; + for(int k = 0; k < 6; k++) { - xiv[k] = v_load_aligned(xyz + 4*k); - yiv[k] = v_load_aligned(xyz + 4*k + 16); - ziv[k] = v_load_aligned(xyz + 4*k + 32); + sxyz[k] = v_reinterpret_as_s16(v_sub_wrap(xyz[k], fixmul)); } - /* - ro = CV_DESCALE(C0 * x + C1 * y + C2 * z, shift); - go = CV_DESCALE(C3 * x + C4 * y + C5 * z, shift); - bo = CV_DESCALE(C6 * x + C7 * y + C8 * z, shift); - */ - v_int32x4 C0 = v_setall_s32(coeffs[0]), C1 = v_setall_s32(coeffs[1]), C2 = v_setall_s32(coeffs[2]); - v_int32x4 C3 = v_setall_s32(coeffs[3]), C4 = v_setall_s32(coeffs[4]), C5 = v_setall_s32(coeffs[5]); - v_int32x4 C6 = v_setall_s32(coeffs[6]), C7 = v_setall_s32(coeffs[7]), C8 = v_setall_s32(coeffs[8]); - v_int32x4 descaleShift = v_setall_s32(1 << (shift-1)); - v_int32x4 tabsz = v_setall_s32((int)INV_GAMMA_TAB_SIZE-1); - v_uint32x4 r_vecs[4], g_vecs[4], b_vecs[4]; + v_int16 xy[4], zd[4]; + v_zip(sxyz[0], sxyz[2], xy[0], xy[1]); + v_zip(sxyz[4], vdescale, zd[0], zd[1]); + v_zip(sxyz[1], sxyz[3], xy[2], xy[3]); + v_zip(sxyz[5], vdescale, zd[2], zd[3]); + + // [rrrrggggbbbb] + v_int32 i_rgb[4*3]; + // a bit faster than one loop for all for(int k = 0; k < 4; k++) { - v_int32x4 i_r, i_g, i_b; - i_r = (xiv[k]*C0 + yiv[k]*C1 + ziv[k]*C2 + descaleShift) >> shift; - i_g = (xiv[k]*C3 + yiv[k]*C4 + ziv[k]*C5 + descaleShift) >> shift; - i_b = (xiv[k]*C6 + yiv[k]*C7 + ziv[k]*C8 + descaleShift) >> shift; - - //limit indices in table and then substitute - //ro = tab[ro]; go = tab[go]; bo = tab[bo]; - int32_t CV_DECL_ALIGNED(16) rshifts[4], gshifts[4], bshifts[4]; - v_int32x4 rs = v_max(v_setzero_s32(), v_min(tabsz, i_r)); - v_int32x4 gs = v_max(v_setzero_s32(), v_min(tabsz, i_g)); - v_int32x4 bs = v_max(v_setzero_s32(), v_min(tabsz, i_b)); - - v_store_aligned(rshifts, rs); - v_store_aligned(gshifts, gs); - v_store_aligned(bshifts, bs); - - r_vecs[k] = v_uint32x4(tab[rshifts[0]], tab[rshifts[1]], tab[rshifts[2]], tab[rshifts[3]]); - g_vecs[k] = v_uint32x4(tab[gshifts[0]], tab[gshifts[1]], tab[gshifts[2]], tab[gshifts[3]]); - b_vecs[k] = v_uint32x4(tab[bshifts[0]], tab[bshifts[1]], tab[bshifts[2]], tab[bshifts[3]]); + i_rgb[k+4*0] = (v_dotprod(xy[k], crxy) + v_dotprod(zd[k], crz1) + fm[0]) >> shift; + } + for(int k = 0; k < 4; k++) + { + i_rgb[k+4*1] = (v_dotprod(xy[k], cgxy) + v_dotprod(zd[k], cgz1) + fm[1]) >> shift; + } + for(int k = 0; k < 4; k++) + { + i_rgb[k+4*2] = (v_dotprod(xy[k], cbxy) + v_dotprod(zd[k], cbz1) + fm[2]) >> shift; } - v_uint16x8 u_rvec0 = v_pack(r_vecs[0], r_vecs[1]), u_rvec1 = v_pack(r_vecs[2], r_vecs[3]); - v_uint16x8 u_gvec0 = v_pack(g_vecs[0], g_vecs[1]), u_gvec1 = v_pack(g_vecs[2], g_vecs[3]); - v_uint16x8 u_bvec0 = v_pack(b_vecs[0], b_vecs[1]), u_bvec1 = v_pack(b_vecs[2], b_vecs[3]); + // [rrggbb] + v_uint16 u_rgbvec[6]; - v_uint8x16 u8_b, u8_g, u8_r; - u8_b = v_pack(u_bvec0, u_bvec1); - u8_g = v_pack(u_gvec0, u_gvec1); - u8_r = v_pack(u_rvec0, u_rvec1); + // limit indices in table and then substitute + v_int32 z32 = vx_setzero_s32(); + v_int32 tabsz = vx_setall_s32((int)INV_GAMMA_TAB_SIZE-1); + for(int k = 0; k < 12; k++) + { + i_rgb[k] = v_max(z32, v_min(tabsz, i_rgb[k])); + } + + // ro = tab[ro]; go = tab[go]; bo = tab[bo]; + if(srgb) + { + // [rr.., gg.., bb..] + int32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) rgbshifts[3*vsize]; + for(int k = 0; k < 12; k++) + { + v_store_aligned(rgbshifts + k*vsize/4, i_rgb[k]); + } + for(int k = 0; k < 6; k++) + { + u_rgbvec[k] = vx_lut(tab, rgbshifts + k*vsize/2); + } + } + else + { + // rgb = (rgb*255) >> inv_gamma_shift + for(int k = 0; k < 12; k++) + { + i_rgb[k] = ((i_rgb[k] << 8) - i_rgb[k]) >> inv_gamma_shift; + } + + for(int k = 0; k < 6; k++) + { + u_rgbvec[k] = v_reinterpret_as_u16(v_pack(i_rgb[k*2+0], i_rgb[k*2+1])); + } + } + + v_uint8 u8_b, u8_g, u8_r; + u8_r = v_pack(u_rgbvec[0], u_rgbvec[1]); + u8_g = v_pack(u_rgbvec[2], u_rgbvec[3]); + u8_b = v_pack(u_rgbvec[4], u_rgbvec[5]); if(dcn == 4) { - v_store_interleave(dst, u8_b, u8_g, u8_r, v_setall_u8(alpha)); + v_store_interleave(dst, u8_b, u8_g, u8_r, vx_setall_u8(alpha)); } else { @@ -3914,10 +3746,10 @@ struct Luv2RGBinteger } #endif - for (; i < n*3; i += 3, dst += dcn) + for (; i < n; i++, src += 3, dst += dcn) { int ro, go, bo; - process(src[i + 0], src[i + 1], src[i + 2], ro, go, bo); + process(src[0], src[1], src[2], ro, go, bo); dst[0] = saturate_cast(bo); dst[1] = saturate_cast(go); @@ -3930,7 +3762,7 @@ struct Luv2RGBinteger int dstcn; int coeffs[9]; - ushort* tab; + bool issRGB; }; @@ -3941,7 +3773,7 @@ struct Luv2RGB_b Luv2RGB_b( int _dstcn, int blueIdx, const float* _coeffs, const float* _whitept, bool _srgb ) : dstcn(_dstcn), - fcvt(_dstcn, blueIdx, _coeffs, _whitept, _srgb), + fcvt(3, blueIdx, _coeffs, _whitept, _srgb), icvt(_dstcn, blueIdx, _coeffs, _whitept, _srgb) { // whitept is fixed for int calculations @@ -3950,6 +3782,8 @@ struct Luv2RGB_b void operator()(const uchar* src, uchar* dst, int n) const { + CV_INSTRUMENT_REGION(); + if(useBitExactness) { icvt(src, dst, n); @@ -3958,49 +3792,65 @@ struct Luv2RGB_b int i, j, dcn = dstcn; uchar alpha = ColorChannel::max(); +#if CV_SIMD + float CV_DECL_ALIGNED(CV_SIMD_WIDTH) buf[3*BLOCK_SIZE]; +#else float CV_DECL_ALIGNED(16) buf[3*BLOCK_SIZE]; +#endif static const softfloat fl = softfloat(100)/f255; static const softfloat fu = uRange/f255; static const softfloat fv = vRange/f255; - for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 ) +#if CV_SIMD + const int fsize = v_float32::nlanes; + v_float32 vl = vx_setall_f32((float)fl); + v_float32 vu = vx_setall_f32((float)fu); + v_float32 vv = vx_setall_f32((float)fv); + v_float32 vuLow = vx_setall_f32((float)uLow), vvLow = vx_setall_f32((float)vLow); + //TODO: fix that when v_interleave is available + float CV_DECL_ALIGNED(CV_SIMD_WIDTH) interTmpM[fsize*3], interTmpA[fsize*3]; + v_store_interleave(interTmpM, vl, vu, vv); + v_store_interleave(interTmpA, vx_setzero_f32(), vuLow, vvLow); + v_float32 mluv[3], aluv[3]; + for(int k = 0; k < 3; k++) + { + mluv[k] = vx_load_aligned(interTmpM + k*fsize); + aluv[k] = vx_load_aligned(interTmpA + k*fsize); + } +#endif + + i = 0; + for( ; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 ) { int dn = std::min(n - i, (int)BLOCK_SIZE); j = 0; - v_float32x4 luvlm(fl, fu, fv, fl), uvlum(fu, fv, fl, fu), vluvm(fv, fl, fu, fv); - v_float32x4 luvla(0, uLow, vLow, 0), uvlua(uLow, vLow, 0, uLow), vluva(vLow, 0, uLow, vLow); - - static const int nPixBlock = 16; - for( ; j < (dn-nPixBlock)*3; j += nPixBlock*3) +#if CV_SIMD + const int vsize = v_uint8::nlanes; + for( ; j <= (dn - vsize)*3; j += 3*vsize ) { - v_uint8x16 src8; - v_uint16x8 src16_0, src16_1; - v_int32x4 src32_00, src32_01, src32_10, src32_11; - v_float32x4 m00, m01, m10, m11, a00, a01, a10, a11; + v_uint8 s0, s1, s2; + s0 = vx_load(src + j + 0*vsize); + s1 = vx_load(src + j + 1*vsize); + s2 = vx_load(src + j + 2*vsize); - int bufp = 0, srcp = 0; + v_uint16 ss[6]; + v_expand(s0, ss[0], ss[1]); + v_expand(s1, ss[2], ss[3]); + v_expand(s2, ss[4], ss[5]); + v_int32 vs[12]; + for(int k = 0; k < 6; k++) + { + v_expand(v_reinterpret_as_s16(ss[k]), vs[k*2+0], vs[k*2+1]); + } - #define CVTSTORE(n) v_store_aligned(buf + j + (bufp++)*4, v_muladd(v_cvt_f32(src32_##n), m##n, a##n)) - #define LOADSTORE(seq1, seq2, seq3, seq4) \ - do{\ - m00 = seq1##m, m01 = seq2##m, m10 = seq3##m, m11 = seq4##m;\ - a00 = seq1##a, a01 = seq2##a, a10 = seq3##a, a11 = seq4##a;\ - src8 = v_load(src + j + (srcp++)*16);\ - v_expand(src8, src16_0, src16_1);\ - v_expand(v_reinterpret_as_s16(src16_0), src32_00, src32_01);\ - v_expand(v_reinterpret_as_s16(src16_1), src32_10, src32_11);\ - CVTSTORE(00); CVTSTORE(01); CVTSTORE(10); CVTSTORE(11);\ - }while(0) - - LOADSTORE(luvl, uvlu, vluv, luvl); - LOADSTORE(uvlu, vluv, luvl, uvlu); - LOADSTORE(vluv, luvl, uvlu, vluv); - - #undef CVTSTORE - #undef LOADSTORE + for(int bufp = 0; bufp < 12; bufp++) + { + v_store_aligned(buf + j + bufp, v_muladd(v_cvt_f32(vs[bufp]), mluv[bufp%3], aluv[bufp%3])); + } } +#endif for( ; j < dn*3; j += 3 ) { buf[j] = src[j]*((float)fl); @@ -4012,20 +3862,52 @@ struct Luv2RGB_b j = 0; - //assume that fcvt returns 1.f as alpha value in case of 4 channels - static const int nBlock = 16; - v_float32x4 m255(255.f, 255.f, 255.f, 255.f); - v_float32x4 f00, f01, f10, f11; - v_int32x4 i00, i01, i10, i11; - for(; j < dn*3 - nBlock; j += nBlock, dst += nBlock) +#if CV_SIMD + static const int nBlock = 4*fsize; + v_float32 v255 = vx_setall_f32(255.f); + if(dcn == 4) { - f00 = v_load_aligned(buf + j + 0); f01 = v_load_aligned(buf + j + 4); - f10 = v_load_aligned(buf + j + 8); f11 = v_load_aligned(buf + j + 12); - i00 = v_round(f00*m255); i01 = v_round(f01*m255); - i10 = v_round(f10*m255); i11 = v_round(f11*m255); - v_store(dst, v_pack(v_reinterpret_as_u16(v_pack(i00, i01)), - v_reinterpret_as_u16(v_pack(i10, i11)))); + v_uint8 valpha = vx_setall_u8(alpha); + for( ; j <= (dn-nBlock)*3; + j += nBlock*3, dst += nBlock) + { + v_float32 vf[4*3]; + for(int k = 0; k < 4; k++) + { + v_load_deinterleave(buf + j, vf[k*3+0], vf[k*3+1], vf[k*3+2]); + } + + v_int32 vi[4*3]; + for(int k = 0; k < 4*3; k++) + { + vi[k] = v_round(vf[k]*v255); + } + + v_uint8 rgb[3]; + for(int k = 0; k < 3; k++) + { + rgb[k] = v_pack_u(v_pack(vi[0*3+k], vi[1*3+k]), + v_pack(vi[2*3+k], vi[3*3+k])); + } + + v_store_interleave(dst, rgb[0], rgb[1], rgb[2], valpha); + } } + else // dcn == 3 + { + for(; j < dn*3 - nBlock; j += nBlock, dst += nBlock) + { + v_float32 vf[4]; + v_int32 vi[4]; + for(int k = 0; k < 4; k++) + { + vf[k] = vx_load_aligned(buf + j + k*fsize); + vi[k] = v_round(vf[k]*v255); + } + v_store(dst, v_pack_u(v_pack(vi[0], vi[1]), v_pack(vi[2], vi[3]))); + } + } +#endif for( ; j < dn*3; j += 3, dst += dcn ) { diff --git a/modules/imgproc/src/geometry.cpp b/modules/imgproc/src/geometry.cpp index 9e21caf796..332cc5d477 100644 --- a/modules/imgproc/src/geometry.cpp +++ b/modules/imgproc/src/geometry.cpp @@ -544,21 +544,41 @@ float cv::intersectConvexConvex( InputArray _p1, InputArray _p2, OutputArray _p1 return 0.f; } - if( pointPolygonTest(_InputArray(fp1, n), fp2[0], false) >= 0 ) + bool intersected = false; + + // check if all of fp2's vertices is inside/on the edge of fp1. + int nVertices = 0; + for (int i=0; i= 0; + + // if all of fp2's vertices is inside/on the edge of fp1. + if (nVertices == m) { + intersected = true; result = fp2; nr = m; } - else if( pointPolygonTest(_InputArray(fp2, m), fp1[0], false) >= 0 ) + else // otherwise check if fp2 is inside fp1. { - result = fp1; - nr = n; + nVertices = 0; + for (int i=0; i= 0; + + // // if all of fp1's vertices is inside/on the edge of fp2. + if (nVertices == n) + { + intersected = true; + result = fp1; + nr = n; + } } - else + + if (!intersected) { _p12.release(); return 0.f; } + area = (float)contourArea(_InputArray(result, nr), false); } diff --git a/modules/imgproc/test/test_color.cpp b/modules/imgproc/test/test_color.cpp index 6ad51ad512..e1fb21bd40 100644 --- a/modules/imgproc/test/test_color.cpp +++ b/modules/imgproc/test/test_color.cpp @@ -2687,9 +2687,9 @@ TEST(Imgproc_ColorLab_Full, bitExactness) << "Iteration: " << iter << endl << "Hash vs Correct hash: " << h << ", " << goodHash << endl << "Error in: (" << x << ", " << y << ")" << endl - << "Reference value: " << gx[0] << " " << gx[1] << " " << gx[2] << endl - << "Actual value: " << rx[0] << " " << rx[1] << " " << rx[2] << endl - << "Src value: " << px[0] << " " << px[1] << " " << px[2] << endl + << "Reference value: " << int(gx[0]) << " " << int(gx[1]) << " " << int(gx[2]) << endl + << "Actual value: " << int(rx[0]) << " " << int(rx[1]) << " " << int(rx[2]) << endl + << "Src value: " << int(px[0]) << " " << int(px[1]) << " " << int(px[2]) << endl << "Size: (" << probe.rows << ", " << probe.cols << ")" << endl; break; @@ -2780,9 +2780,9 @@ TEST(Imgproc_ColorLuv_Full, bitExactness) << "Iteration: " << iter << endl << "Hash vs Correct hash: " << h << ", " << goodHash << endl << "Error in: (" << x << ", " << y << ")" << endl - << "Reference value: " << gx[0] << " " << gx[1] << " " << gx[2] << endl - << "Actual value: " << rx[0] << " " << rx[1] << " " << rx[2] << endl - << "Src value: " << px[0] << " " << px[1] << " " << px[2] << endl + << "Reference value: " << int(gx[0]) << " " << int(gx[1]) << " " << int(gx[2]) << endl + << "Actual value: " << int(rx[0]) << " " << int(rx[1]) << " " << int(rx[2]) << endl + << "Src value: " << int(px[0]) << " " << int(px[1]) << " " << int(px[2]) << endl << "Size: (" << probe.rows << ", " << probe.cols << ")" << endl; break; diff --git a/modules/imgproc/test/test_intersectconvexconvex.cpp b/modules/imgproc/test/test_intersectconvexconvex.cpp new file mode 100644 index 0000000000..fa25f3d531 --- /dev/null +++ b/modules/imgproc/test/test_intersectconvexconvex.cpp @@ -0,0 +1,260 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "test_precomp.hpp" + +namespace opencv_test { namespace { + + +TEST(Imgproc_IntersectConvexConvex, no_intersection) +{ + std::vector convex1; + convex1.push_back(cv::Point(290, 126)); + convex1.push_back(cv::Point(284, 132)); + convex1.push_back(cv::Point(281, 133)); + convex1.push_back(cv::Point(256, 124)); + convex1.push_back(cv::Point(249, 116)); + convex1.push_back(cv::Point(234, 91)); + convex1.push_back(cv::Point(232, 86)); + convex1.push_back(cv::Point(232, 79)); + convex1.push_back(cv::Point(251, 69)); + convex1.push_back(cv::Point(257, 68)); + convex1.push_back(cv::Point(297, 85)); + convex1.push_back(cv::Point(299, 87)); + + std::vector convex2; + convex2.push_back(cv::Point(192, 236)); + convex2.push_back(cv::Point(190, 245)); + convex2.push_back(cv::Point(177, 260)); + convex2.push_back(cv::Point(154, 271)); + convex2.push_back(cv::Point(142, 270)); + convex2.push_back(cv::Point(135, 263)); + convex2.push_back(cv::Point(131, 254)); + convex2.push_back(cv::Point(132, 240)); + convex2.push_back(cv::Point(172, 213)); + convex2.push_back(cv::Point(176, 216)); + + std::vector intersection; + float area = cv::intersectConvexConvex(convex1, convex2, intersection); + + EXPECT_TRUE(intersection.empty()); + EXPECT_NEAR(area, 0, std::numeric_limits::epsilon()); +} + + +TEST(Imgproc_IntersectConvexConvex, no_intersection_with_1_vertex_on_edge_1) +{ + std::vector convex1; + convex1.push_back(cv::Point(0,0)); + convex1.push_back(cv::Point(740, 0)); + convex1.push_back(cv::Point(740, 540)); + convex1.push_back(cv::Point(0, 540)); + + std::vector convex2; + convex2.push_back(cv::Point(0, 210)); + convex2.push_back(cv::Point(-30, 210)); + convex2.push_back(cv::Point(-37, 170)); + convex2.push_back(cv::Point(-7, 172)); + + std::vector intersection; + float area = cv::intersectConvexConvex(convex1, convex2, intersection); + + EXPECT_TRUE(intersection.empty()); + EXPECT_NEAR(area, 0, std::numeric_limits::epsilon()); +} + + +TEST(Imgproc_IntersectConvexConvex, no_intersection_with_1_vertex_on_edge_2) +{ + std::vector convex1; + convex1.push_back(cv::Point(0,0)); + convex1.push_back(cv::Point(740, 0)); + convex1.push_back(cv::Point(740, 540)); + convex1.push_back(cv::Point(0, 540)); + + std::vector convex2; + convex2.push_back(cv::Point(740, 210)); + convex2.push_back(cv::Point(750, 100)); + convex2.push_back(cv::Point(790, 250)); + convex2.push_back(cv::Point(800, 500)); + + std::vector intersection; + float area = cv::intersectConvexConvex(convex1, convex2, intersection); + + EXPECT_TRUE(intersection.empty()); + EXPECT_NEAR(area, 0, std::numeric_limits::epsilon()); +} + + +TEST(Imgproc_IntersectConvexConvex, intersection_with_1_vertex_on_edge) +{ + std::vector convex1; + convex1.push_back(cv::Point(0,0)); + convex1.push_back(cv::Point(740, 0)); + convex1.push_back(cv::Point(740, 540)); + convex1.push_back(cv::Point(0, 540)); + + std::vector convex2; + convex2.push_back(cv::Point(30, 210)); + convex2.push_back(cv::Point(0,210)); + convex2.push_back(cv::Point(7, 172)); + convex2.push_back(cv::Point(37, 170)); + + std::vector intersection; + float area = cv::intersectConvexConvex(convex1, convex2, intersection); + + std::vector expected_intersection; + expected_intersection.push_back(cv::Point(0, 210)); + expected_intersection.push_back(cv::Point(7, 172)); + expected_intersection.push_back(cv::Point(37, 170)); + expected_intersection.push_back(cv::Point(30, 210)); + + EXPECT_EQ(intersection, expected_intersection); + EXPECT_NEAR(area, 1163, std::numeric_limits::epsilon()); +} + + +TEST(Imgproc_IntersectConvexConvex, intersection_with_2_vertices_on_edge) +{ + std::vector convex1; + convex1.push_back(cv::Point(0,0)); + convex1.push_back(cv::Point(740, 0)); + convex1.push_back(cv::Point(740, 540)); + convex1.push_back(cv::Point(0, 540)); + + std::vector convex2; + convex2.push_back(cv::Point(30, 210)); + convex2.push_back(cv::Point(37, 170)); + convex2.push_back(cv::Point(0,210)); + convex2.push_back(cv::Point(0, 300)); + + std::vector intersection; + float area = cv::intersectConvexConvex(convex1, convex2, intersection); + + std::vector expected_intersection; + expected_intersection.push_back(cv::Point(0, 300)); + expected_intersection.push_back(cv::Point(0, 210)); + expected_intersection.push_back(cv::Point(37, 170)); + expected_intersection.push_back(cv::Point(30, 210)); + + EXPECT_EQ(intersection, expected_intersection); + EXPECT_NEAR(area, 1950, std::numeric_limits::epsilon()); +} + + +TEST(Imgproc_IntersectConvexConvex, intersection_1) +{ + std::vector convex1; + convex1.push_back(cv::Point(0,0)); + convex1.push_back(cv::Point(740, 0)); + convex1.push_back(cv::Point(740, 540)); + convex1.push_back(cv::Point(0, 540)); + + std::vector convex2; + convex2.push_back(cv::Point(20,210)); + convex2.push_back(cv::Point(30, 210)); + convex2.push_back(cv::Point(37, 170)); + convex2.push_back(cv::Point(7, 172)); + + std::vector intersection; + float area = cv::intersectConvexConvex(convex1, convex2, intersection); + + std::vector expected_intersection; + expected_intersection.push_back(cv::Point(7, 172)); + expected_intersection.push_back(cv::Point(37, 170)); + expected_intersection.push_back(cv::Point(30, 210)); + expected_intersection.push_back(cv::Point(20, 210)); + + EXPECT_EQ(intersection, expected_intersection); + EXPECT_NEAR(area, 783, std::numeric_limits::epsilon()); +} + + +TEST(Imgproc_IntersectConvexConvex, intersection_2) +{ + std::vector convex1; + convex1.push_back(cv::Point(0,0)); + convex1.push_back(cv::Point(740, 0)); + convex1.push_back(cv::Point(740, 540)); + convex1.push_back(cv::Point(0, 540)); + + std::vector convex2; + convex2.push_back(cv::Point(-2,210)); + convex2.push_back(cv::Point(-5, 300)); + convex2.push_back(cv::Point(37, 150)); + convex2.push_back(cv::Point(7, 172)); + + std::vector intersection; + float area = cv::intersectConvexConvex(convex1, convex2, intersection); + + std::vector expected_intersection; + expected_intersection.push_back(cv::Point(0, 202)); + expected_intersection.push_back(cv::Point(7, 172)); + expected_intersection.push_back(cv::Point(37, 150)); + expected_intersection.push_back(cv::Point(0, 282)); + + EXPECT_EQ(intersection, expected_intersection); + EXPECT_NEAR(area, 1857.19836425781, std::numeric_limits::epsilon()); +} + + +TEST(Imgproc_IntersectConvexConvex, intersection_3) +{ + std::vector convex1; + convex1.push_back(cv::Point(15, 0)); + convex1.push_back(cv::Point(740, 0)); + convex1.push_back(cv::Point(740, 540)); + convex1.push_back(cv::Point(15, 540)); + + std::vector convex2; + convex2.push_back(cv::Point(0,210)); + convex2.push_back(cv::Point(30, 210)); + convex2.push_back(cv::Point(37, 170)); + convex2.push_back(cv::Point(7, 172)); + + std::vector intersection; + float area = cv::intersectConvexConvex(convex1, convex2, intersection); + + std::vector expected_intersection; + expected_intersection.push_back(cv::Point(15, 171)); + expected_intersection.push_back(cv::Point(37, 170)); + expected_intersection.push_back(cv::Point(30, 210)); + expected_intersection.push_back(cv::Point(15, 210)); + + EXPECT_EQ(intersection, expected_intersection); + + EXPECT_NEAR(area, 723.866760253906, std::numeric_limits::epsilon()); +} + + +TEST(Imgproc_IntersectConvexConvex, intersection_4) +{ + std::vector convex1; + convex1.push_back(cv::Point(15, 0)); + convex1.push_back(cv::Point(740, 0)); + convex1.push_back(cv::Point(740, 540)); + convex1.push_back(cv::Point(15, 540)); + + std::vector convex2; + convex2.push_back(cv::Point(15, 0)); + convex2.push_back(cv::Point(740, 0)); + convex2.push_back(cv::Point(740, 540)); + convex2.push_back(cv::Point(15, 540)); + + std::vector intersection; + float area = cv::intersectConvexConvex(convex1, convex2, intersection); + + std::vector expected_intersection; + expected_intersection.push_back(cv::Point(15, 0)); + expected_intersection.push_back(cv::Point(740, 0)); + expected_intersection.push_back(cv::Point(740, 540)); + expected_intersection.push_back(cv::Point(15, 540)); + + EXPECT_EQ(intersection, expected_intersection); + EXPECT_NEAR(area, 391500, std::numeric_limits::epsilon()); +} + + +} // namespace +} // opencv_test diff --git a/modules/java/generator/android/java/org/opencv/android/Utils.java b/modules/java/generator/android/java/org/opencv/android/Utils.java index 404c986da8..eef4c45622 100644 --- a/modules/java/generator/android/java/org/opencv/android/Utils.java +++ b/modules/java/generator/android/java/org/opencv/android/Utils.java @@ -87,9 +87,9 @@ public class Utils { */ public static void bitmapToMat(Bitmap bmp, Mat mat, boolean unPremultiplyAlpha) { if (bmp == null) - throw new java.lang.IllegalArgumentException("bmp == null"); + throw new IllegalArgumentException("bmp == null"); if (mat == null) - throw new java.lang.IllegalArgumentException("mat == null"); + throw new IllegalArgumentException("mat == null"); nBitmapToMat2(bmp, mat.nativeObj, unPremultiplyAlpha); } @@ -117,9 +117,9 @@ public class Utils { */ public static void matToBitmap(Mat mat, Bitmap bmp, boolean premultiplyAlpha) { if (mat == null) - throw new java.lang.IllegalArgumentException("mat == null"); + throw new IllegalArgumentException("mat == null"); if (bmp == null) - throw new java.lang.IllegalArgumentException("bmp == null"); + throw new IllegalArgumentException("bmp == null"); nMatToBitmap2(mat.nativeObj, bmp, premultiplyAlpha); } diff --git a/modules/java/generator/src/java/org/opencv/utils/Converters.java b/modules/java/generator/src/java/org/opencv/utils/Converters.java index 9faf2ecee9..94675da183 100644 --- a/modules/java/generator/src/java/org/opencv/utils/Converters.java +++ b/modules/java/generator/src/java/org/opencv/utils/Converters.java @@ -159,11 +159,11 @@ public class Converters { public static void Mat_to_vector_Point(Mat m, List pts) { if (pts == null) - throw new java.lang.IllegalArgumentException("Output List can't be null"); + throw new IllegalArgumentException("Output List can't be null"); int count = m.rows(); int type = m.type(); if (m.cols() != 1) - throw new java.lang.IllegalArgumentException("Input Mat should have one column\n" + m); + throw new IllegalArgumentException("Input Mat should have one column\n" + m); pts.clear(); if (type == CvType.CV_32SC2) { @@ -185,7 +185,7 @@ public class Converters { pts.add(new Point(buff[i * 2], buff[i * 2 + 1])); } } else { - throw new java.lang.IllegalArgumentException( + throw new IllegalArgumentException( "Input Mat should be of CV_32SC2, CV_32FC2 or CV_64FC2 type\n" + m); } } @@ -204,11 +204,11 @@ public class Converters { public static void Mat_to_vector_Point3(Mat m, List pts) { if (pts == null) - throw new java.lang.IllegalArgumentException("Output List can't be null"); + throw new IllegalArgumentException("Output List can't be null"); int count = m.rows(); int type = m.type(); if (m.cols() != 1) - throw new java.lang.IllegalArgumentException("Input Mat should have one column\n" + m); + throw new IllegalArgumentException("Input Mat should have one column\n" + m); pts.clear(); if (type == CvType.CV_32SC3) { @@ -230,7 +230,7 @@ public class Converters { pts.add(new Point3(buff[i * 3], buff[i * 3 + 1], buff[i * 3 + 2])); } } else { - throw new java.lang.IllegalArgumentException( + throw new IllegalArgumentException( "Input Mat should be of CV_32SC3, CV_32FC3 or CV_64FC3 type\n" + m); } } @@ -255,10 +255,10 @@ public class Converters { public static void Mat_to_vector_Mat(Mat m, List mats) { if (mats == null) - throw new java.lang.IllegalArgumentException("mats == null"); + throw new IllegalArgumentException("mats == null"); int count = m.rows(); if (CvType.CV_32SC2 != m.type() || m.cols() != 1) - throw new java.lang.IllegalArgumentException( + throw new IllegalArgumentException( "CvType.CV_32SC2 != m.type() || m.cols()!=1\n" + m); mats.clear(); @@ -289,10 +289,10 @@ public class Converters { public static void Mat_to_vector_float(Mat m, List fs) { if (fs == null) - throw new java.lang.IllegalArgumentException("fs == null"); + throw new IllegalArgumentException("fs == null"); int count = m.rows(); if (CvType.CV_32FC1 != m.type() || m.cols() != 1) - throw new java.lang.IllegalArgumentException( + throw new IllegalArgumentException( "CvType.CV_32FC1 != m.type() || m.cols()!=1\n" + m); fs.clear(); @@ -322,10 +322,10 @@ public class Converters { public static void Mat_to_vector_uchar(Mat m, List us) { if (us == null) - throw new java.lang.IllegalArgumentException("Output List can't be null"); + throw new IllegalArgumentException("Output List can't be null"); int count = m.rows(); if (CvType.CV_8UC1 != m.type() || m.cols() != 1) - throw new java.lang.IllegalArgumentException( + throw new IllegalArgumentException( "CvType.CV_8UC1 != m.type() || m.cols()!=1\n" + m); us.clear(); @@ -372,10 +372,10 @@ public class Converters { public static void Mat_to_vector_int(Mat m, List is) { if (is == null) - throw new java.lang.IllegalArgumentException("is == null"); + throw new IllegalArgumentException("is == null"); int count = m.rows(); if (CvType.CV_32SC1 != m.type() || m.cols() != 1) - throw new java.lang.IllegalArgumentException( + throw new IllegalArgumentException( "CvType.CV_32SC1 != m.type() || m.cols()!=1\n" + m); is.clear(); @@ -388,10 +388,10 @@ public class Converters { public static void Mat_to_vector_char(Mat m, List bs) { if (bs == null) - throw new java.lang.IllegalArgumentException("Output List can't be null"); + throw new IllegalArgumentException("Output List can't be null"); int count = m.rows(); if (CvType.CV_8SC1 != m.type() || m.cols() != 1) - throw new java.lang.IllegalArgumentException( + throw new IllegalArgumentException( "CvType.CV_8SC1 != m.type() || m.cols()!=1\n" + m); bs.clear(); @@ -424,10 +424,10 @@ public class Converters { public static void Mat_to_vector_Rect(Mat m, List rs) { if (rs == null) - throw new java.lang.IllegalArgumentException("rs == null"); + throw new IllegalArgumentException("rs == null"); int count = m.rows(); if (CvType.CV_32SC4 != m.type() || m.cols() != 1) - throw new java.lang.IllegalArgumentException( + throw new IllegalArgumentException( "CvType.CV_32SC4 != m.type() || m.rows()!=1\n" + m); rs.clear(); @@ -460,10 +460,10 @@ public class Converters { public static void Mat_to_vector_Rect2d(Mat m, List rs) { if (rs == null) - throw new java.lang.IllegalArgumentException("rs == null"); + throw new IllegalArgumentException("rs == null"); int count = m.rows(); if (CvType.CV_64FC4 != m.type() || m.cols() != 1) - throw new java.lang.IllegalArgumentException( + throw new IllegalArgumentException( "CvType.CV_64FC4 != m.type() || m.rows()!=1\n" + m); rs.clear(); @@ -499,10 +499,10 @@ public class Converters { public static void Mat_to_vector_KeyPoint(Mat m, List kps) { if (kps == null) - throw new java.lang.IllegalArgumentException("Output List can't be null"); + throw new IllegalArgumentException("Output List can't be null"); int count = m.rows(); if (CvType.CV_64FC(7) != m.type() || m.cols() != 1) - throw new java.lang.IllegalArgumentException( + throw new IllegalArgumentException( "CvType.CV_64FC(7) != m.type() || m.cols()!=1\n" + m); kps.clear(); @@ -530,10 +530,10 @@ public class Converters { public static void Mat_to_vector_vector_Point(Mat m, List pts) { if (pts == null) - throw new java.lang.IllegalArgumentException("Output List can't be null"); + throw new IllegalArgumentException("Output List can't be null"); if (m == null) - throw new java.lang.IllegalArgumentException("Input Mat can't be null"); + throw new IllegalArgumentException("Input Mat can't be null"); List mats = new ArrayList(m.rows()); Mat_to_vector_Mat(m, mats); @@ -548,10 +548,10 @@ public class Converters { // vector_vector_Point2f public static void Mat_to_vector_vector_Point2f(Mat m, List pts) { if (pts == null) - throw new java.lang.IllegalArgumentException("Output List can't be null"); + throw new IllegalArgumentException("Output List can't be null"); if (m == null) - throw new java.lang.IllegalArgumentException("Input Mat can't be null"); + throw new IllegalArgumentException("Input Mat can't be null"); List mats = new ArrayList(m.rows()); Mat_to_vector_Mat(m, mats); @@ -580,10 +580,10 @@ public class Converters { // vector_vector_Point3f public static void Mat_to_vector_vector_Point3f(Mat m, List pts) { if (pts == null) - throw new java.lang.IllegalArgumentException("Output List can't be null"); + throw new IllegalArgumentException("Output List can't be null"); if (m == null) - throw new java.lang.IllegalArgumentException("Input Mat can't be null"); + throw new IllegalArgumentException("Input Mat can't be null"); List mats = new ArrayList(m.rows()); Mat_to_vector_Mat(m, mats); @@ -625,10 +625,10 @@ public class Converters { public static void Mat_to_vector_vector_KeyPoint(Mat m, List kps) { if (kps == null) - throw new java.lang.IllegalArgumentException("Output List can't be null"); + throw new IllegalArgumentException("Output List can't be null"); if (m == null) - throw new java.lang.IllegalArgumentException("Input Mat can't be null"); + throw new IllegalArgumentException("Input Mat can't be null"); List mats = new ArrayList(m.rows()); Mat_to_vector_Mat(m, mats); @@ -659,10 +659,10 @@ public class Converters { public static void Mat_to_vector_double(Mat m, List ds) { if (ds == null) - throw new java.lang.IllegalArgumentException("ds == null"); + throw new IllegalArgumentException("ds == null"); int count = m.rows(); if (CvType.CV_64FC1 != m.type() || m.cols() != 1) - throw new java.lang.IllegalArgumentException( + throw new IllegalArgumentException( "CvType.CV_64FC1 != m.type() || m.cols()!=1\n" + m); ds.clear(); @@ -695,10 +695,10 @@ public class Converters { public static void Mat_to_vector_DMatch(Mat m, List matches) { if (matches == null) - throw new java.lang.IllegalArgumentException("Output List can't be null"); + throw new IllegalArgumentException("Output List can't be null"); int count = m.rows(); if (CvType.CV_64FC4 != m.type() || m.cols() != 1) - throw new java.lang.IllegalArgumentException( + throw new IllegalArgumentException( "CvType.CV_64FC4 != m.type() || m.cols()!=1\n" + m); matches.clear(); @@ -725,10 +725,10 @@ public class Converters { public static void Mat_to_vector_vector_DMatch(Mat m, List lvdm) { if (lvdm == null) - throw new java.lang.IllegalArgumentException("Output List can't be null"); + throw new IllegalArgumentException("Output List can't be null"); if (m == null) - throw new java.lang.IllegalArgumentException("Input Mat can't be null"); + throw new IllegalArgumentException("Input Mat can't be null"); List mats = new ArrayList(m.rows()); Mat_to_vector_Mat(m, mats); @@ -757,10 +757,10 @@ public class Converters { public static void Mat_to_vector_vector_char(Mat m, List> llb) { if (llb == null) - throw new java.lang.IllegalArgumentException("Output List can't be null"); + throw new IllegalArgumentException("Output List can't be null"); if (m == null) - throw new java.lang.IllegalArgumentException("Input Mat can't be null"); + throw new IllegalArgumentException("Input Mat can't be null"); List mats = new ArrayList(m.rows()); Mat_to_vector_Mat(m, mats); @@ -796,10 +796,10 @@ public class Converters { public static void Mat_to_vector_RotatedRect(Mat m, List rs) { if (rs == null) - throw new java.lang.IllegalArgumentException("rs == null"); + throw new IllegalArgumentException("rs == null"); int count = m.rows(); if (CvType.CV_32FC(5) != m.type() || m.cols() != 1) - throw new java.lang.IllegalArgumentException( + throw new IllegalArgumentException( "CvType.CV_32FC5 != m.type() || m.rows()!=1\n" + m); rs.clear(); diff --git a/samples/dnn/tf_text_graph_faster_rcnn.py b/samples/dnn/tf_text_graph_faster_rcnn.py index e1dfba9fee..8a88c7328a 100644 --- a/samples/dnn/tf_text_graph_faster_rcnn.py +++ b/samples/dnn/tf_text_graph_faster_rcnn.py @@ -31,7 +31,13 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath): aspect_ratios = [float(ar) for ar in grid_anchor_generator['aspect_ratios']] width_stride = float(grid_anchor_generator['width_stride'][0]) height_stride = float(grid_anchor_generator['height_stride'][0]) - features_stride = float(config['feature_extractor'][0]['first_stage_features_stride'][0]) + + feature_extractor = config['feature_extractor'][0] + if 'type' in feature_extractor and feature_extractor['type'][0] == 'faster_rcnn_nas': + features_stride = 16.0 + else: + features_stride = float(feature_extractor['first_stage_features_stride'][0]) + first_stage_nms_iou_threshold = float(config['first_stage_nms_iou_threshold'][0]) first_stage_max_proposals = int(config['first_stage_max_proposals'][0])