Merge remote-tracking branch 'upstream/3.4' into merge-3.4

This commit is contained in:
Alexander Alekhin 2018-09-04 19:35:38 +03:00 committed by Alexander Alekhin
commit d74b98c3d9
117 changed files with 6913 additions and 3713 deletions

View File

@ -1,45 +0,0 @@
# Main variables:
# IPP_A_LIBRARIES and IPP_A_INCLUDE to use IPP Async
# HAVE_IPP_A for conditional compilation OpenCV with/without IPP Async
# IPP_ASYNC_ROOT - root of IPP Async installation
if(X86_64)
find_path(
IPP_A_INCLUDE_DIR
NAMES ipp_async_defs.h
PATHS $ENV{IPP_ASYNC_ROOT}
PATH_SUFFIXES include
DOC "Path to Intel IPP Async interface headers")
find_file(
IPP_A_LIBRARIES
NAMES ipp_async_preview.lib
PATHS $ENV{IPP_ASYNC_ROOT}
PATH_SUFFIXES lib/intel64
DOC "Path to Intel IPP Async interface libraries")
else()
find_path(
IPP_A_INCLUDE_DIR
NAMES ipp_async_defs.h
PATHS $ENV{IPP_ASYNC_ROOT}
PATH_SUFFIXES include
DOC "Path to Intel IPP Async interface headers")
find_file(
IPP_A_LIBRARIES
NAMES ipp_async_preview.lib
PATHS $ENV{IPP_ASYNC_ROOT}
PATH_SUFFIXES lib/ia32
DOC "Path to Intel IPP Async interface libraries")
endif()
if(IPP_A_INCLUDE_DIR AND IPP_A_LIBRARIES)
set(HAVE_IPP_A TRUE)
else()
set(HAVE_IPP_A FALSE)
message(WARNING "Intel IPP Async library directory (set by IPP_A_LIBRARIES_DIR variable) is not found or does not have Intel IPP Async libraries.")
endif()
mark_as_advanced(FORCE IPP_A_LIBRARIES IPP_A_INCLUDE_DIR)

View File

@ -35,17 +35,6 @@ if(WITH_IPP)
endif()
endif()
# --- IPP Async ---
if(WITH_IPP_A)
include("${OpenCV_SOURCE_DIR}/cmake/OpenCVFindIPPAsync.cmake")
if(IPP_A_INCLUDE_DIR AND IPP_A_LIBRARIES)
ocv_include_directories(${IPP_A_INCLUDE_DIR})
link_directories(${IPP_A_LIBRARIES})
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${IPP_A_LIBRARIES})
endif()
endif(WITH_IPP_A)
# --- CUDA ---
if(WITH_CUDA)
include("${OpenCV_SOURCE_DIR}/cmake/OpenCVDetectCUDA.cmake")

View File

@ -103,9 +103,6 @@
#cmakedefine HAVE_IPP_ICV
#cmakedefine HAVE_IPP_IW
/* Intel IPP Async */
#cmakedefine HAVE_IPP_A
/* JPEG-2000 codec */
#cmakedefine HAVE_JASPER

View File

@ -227,7 +227,6 @@ SEARCH_INCLUDES = YES
INCLUDE_PATH =
INCLUDE_FILE_PATTERNS =
PREDEFINED = __cplusplus=1 \
HAVE_IPP_A=1 \
CVAPI(x)=x \
CV_DOXYGEN= \
CV_EXPORTS= \

View File

@ -1,7 +1,7 @@
How to use the OpenCV parallel_for_ to parallelize your code {#tutorial_how_to_use_OpenCV_parallel_for_}
==================================================================
@prev_tutorial{tutorial_how_to_use_ippa_conversion}
@prev_tutorial{tutorial_interoperability_with_OpenCV_1}
Goal
----

View File

@ -1,146 +0,0 @@
Intel® IPP Asynchronous C/C++ library in OpenCV {#tutorial_how_to_use_ippa_conversion}
===============================================
@prev_tutorial{tutorial_interoperability_with_OpenCV_1}
@next_tutorial{tutorial_how_to_use_OpenCV_parallel_for_}
Goal
----
The tutorial demonstrates the [Intel® IPP Asynchronous
C/C++](http://software.intel.com/en-us/intel-ipp-preview) library usage with OpenCV. The code
example below illustrates implementation of the Sobel operation, accelerated with Intel® IPP
Asynchronous C/C++ functions. In this code example, @ref cv::hpp::getMat and @ref cv::hpp::getHpp
functions are used for data conversion between
[hppiMatrix](http://software.intel.com/en-us/node/501660) and Mat matrices.
Code
----
You may also find the source code in the
`samples/cpp/tutorial_code/core/ippasync/ippasync_sample.cpp` file of the OpenCV source library or
download it from [here](https://github.com/opencv/opencv/tree/master/samples/cpp/tutorial_code/core/ippasync/ippasync_sample.cpp).
@include cpp/tutorial_code/core/ippasync/ippasync_sample.cpp
Explanation
-----------
-# Create parameters for OpenCV:
@code{.cpp}
VideoCapture cap;
Mat image, gray, result;
@endcode
and IPP Async:
@code{.cpp}
hppiMatrix* src,* dst;
hppAccel accel = 0;
hppAccelType accelType;
hppStatus sts;
hppiVirtualMatrix * virtMatrix;
@endcode
-# Load input image or video. How to open and read video stream you can see in the
@ref tutorial_video_input_psnr_ssim tutorial.
@code{.cpp}
if( useCamera )
{
printf("used camera\n");
cap.open(0);
}
else
{
printf("used image %s\n", file.c_str());
cap.open(file.c_str());
}
if( !cap.isOpened() )
{
printf("can not open camera or video file\n");
return -1;
}
@endcode
-# Create accelerator instance using
[hppCreateInstance](http://software.intel.com/en-us/node/501686):
@code{.cpp}
accelType = sAccel == "cpu" ? HPP_ACCEL_TYPE_CPU:
sAccel == "gpu" ? HPP_ACCEL_TYPE_GPU:
HPP_ACCEL_TYPE_ANY;
//Create accelerator instance
sts = hppCreateInstance(accelType, 0, &accel);
CHECK_STATUS(sts, "hppCreateInstance");
@endcode
-# Create an array of virtual matrices using
[hppiCreateVirtualMatrices](http://software.intel.com/en-us/node/501700) function.
@code{.cpp}
virtMatrix = hppiCreateVirtualMatrices(accel, 1);
@endcode
-# Prepare a matrix for input and output data:
@code{.cpp}
cap >> image;
if(image.empty())
break;
cvtColor( image, gray, COLOR_BGR2GRAY );
result.create( image.rows, image.cols, CV_8U);
@endcode
-# Convert Mat to [hppiMatrix](http://software.intel.com/en-us/node/501660) using @ref cv::hpp::getHpp
and call [hppiSobel](http://software.intel.com/en-us/node/474701) function.
@code{.cpp}
//convert Mat to hppiMatrix
src = getHpp(gray, accel);
dst = getHpp(result, accel);
sts = hppiSobel(accel,src, HPP_MASK_SIZE_3X3,HPP_NORM_L1,virtMatrix[0]);
CHECK_STATUS(sts,"hppiSobel");
sts = hppiConvert(accel, virtMatrix[0], 0, HPP_RND_MODE_NEAR, dst, HPP_DATA_TYPE_8U);
CHECK_STATUS(sts,"hppiConvert");
// Wait for tasks to complete
sts = hppWait(accel, HPP_TIME_OUT_INFINITE);
CHECK_STATUS(sts, "hppWait");
@endcode
We use [hppiConvert](http://software.intel.com/en-us/node/501746) because
[hppiSobel](http://software.intel.com/en-us/node/474701) returns destination matrix with
HPP_DATA_TYPE_16S data type for source matrix with HPP_DATA_TYPE_8U type. You should check
hppStatus after each call IPP Async function.
-# Create windows and show the images, the usual way.
@code{.cpp}
imshow("image", image);
imshow("rez", result);
waitKey(15);
@endcode
-# Delete hpp matrices.
@code{.cpp}
sts = hppiFreeMatrix(src);
CHECK_DEL_STATUS(sts,"hppiFreeMatrix");
sts = hppiFreeMatrix(dst);
CHECK_DEL_STATUS(sts,"hppiFreeMatrix");
@endcode
-# Delete virtual matrices and accelerator instance.
@code{.cpp}
if (virtMatrix)
{
sts = hppiDeleteVirtualMatrices(accel, virtMatrix);
CHECK_DEL_STATUS(sts,"hppiDeleteVirtualMatrices");
}
if (accel)
{
sts = hppDeleteInstance(accel);
CHECK_DEL_STATUS(sts, "hppDeleteInstance");
}
@endcode
Result
------
After compiling the code above we can execute it giving an image or video path and accelerator type
as an argument. For this tutorial we use baboon.png image as input. The result is below.
![](images/How_To_Use_IPPA_Result.jpg)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 61 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.8 KiB

View File

@ -2,7 +2,7 @@ Interoperability with OpenCV 1 {#tutorial_interoperability_with_OpenCV_1}
==============================
@prev_tutorial{tutorial_file_input_output_with_xml_yml}
@next_tutorial{tutorial_how_to_use_ippa_conversion}
@next_tutorial{tutorial_how_to_use_OpenCV_parallel_for_}
Goal
----

View File

@ -93,15 +93,6 @@ understanding how to manipulate the images on a pixel level.
Look here to shed light on all this questions.
- @subpage tutorial_how_to_use_ippa_conversion
*Compatibility:* \> OpenCV 2.0
*Author:* Elena Gvozdeva
You will see how to use the IPP Async with OpenCV.
- @subpage tutorial_how_to_use_OpenCV_parallel_for_
*Compatibility:* \>= OpenCV 2.4.3

Binary file not shown.

After

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 556 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

View File

@ -0,0 +1,72 @@
Motion Deblur Filter {#tutorial_motion_deblur_filter}
==========================
Goal
----
In this tutorial you will learn:
- what the PSF of a motion blur image is
- how to restore a motion blur image
Theory
------
For the degradation image model theory and the Wiener filter theory you can refer to the tutorial @ref tutorial_out_of_focus_deblur_filter "Out-of-focus Deblur Filter".
On this page only a linear motion blur distortion is considered. The motion blur image on this page is a real world image. The blur was caused by a moving subject.
### What is the PSF of a motion blur image?
The point spread function (PSF) of a linear motion blur distortion is a line segment. Such a PSF is specified by two parameters: \f$LEN\f$ is the length of the blur and \f$THETA\f$ is the angle of motion.
![Point spread function of a linear motion blur distortion](images/motion_psf.png)
### How to restore a blurred image?
On this page the Wiener filter is used as the restoration filter, for details you can refer to the tutorial @ref tutorial_out_of_focus_deblur_filter "Out-of-focus Deblur Filter".
In order to synthesize the Wiener filter for a motion blur case, it needs to specify the signal-to-noise ratio (\f$SNR\f$), \f$LEN\f$ and \f$THETA\f$ of the PSF.
Source code
-----------
You can find source code in the `samples/cpp/tutorial_code/ImgProc/motion_deblur_filter/motion_deblur_filter.cpp` of the OpenCV source code library.
@include cpp/tutorial_code/ImgProc/motion_deblur_filter/motion_deblur_filter.cpp
Explanation
-----------
A motion blur image recovering algorithm consists of PSF generation, Wiener filter generation and filtering a blurred image in a frequency domain:
@snippet samples/cpp/tutorial_code/ImgProc/motion_deblur_filter/motion_deblur_filter.cpp main
A function calcPSF() forms a PSF according to input parameters \f$LEN\f$ and \f$THETA\f$ (in degrees):
@snippet samples/cpp/tutorial_code/ImgProc/motion_deblur_filter/motion_deblur_filter.cpp calcPSF
A function edgetaper() tapers the input images edges in order to reduce the ringing effect in a restored image:
@snippet samples/cpp/tutorial_code/ImgProc/motion_deblur_filter/motion_deblur_filter.cpp edgetaper
The functions calcWnrFilter(), fftshift() and filter2DFreq() realize an image filtration by a specified PSF in the frequency domain. The functions are copied from the tutorial
@ref tutorial_out_of_focus_deblur_filter "Out-of-focus Deblur Filter".
Result
------
Below you can see the real world image with motion blur distortion. The license plate is not readable on both cars. The red markers show the cars license plate location.
![Motion blur image. The license plates are not readable](images/motion_original.jpg)
Below you can see the restoration result for the black car license plate. The result has been computed with \f$LEN\f$ = 125, \f$THETA\f$ = 0, \f$SNR\f$ = 700.
![The restored image of the black car license plate](images/black_car.jpg)
Below you can see the restoration result for the white car license plate. The result has been computed with \f$LEN\f$ = 78, \f$THETA\f$ = 15, \f$SNR\f$ = 300.
![The restored image of the white car license plate](images/white_car.jpg)
The values of \f$SNR\f$, \f$LEN\f$ and \f$THETA\f$ were selected manually to give the best possible visual result. The \f$THETA\f$ parameter coincides with the cars moving direction, and the
\f$LEN\f$ parameter depends on the cars moving speed.
The result is not perfect, but at least it gives us a hint of the images content. With some effort, the car license plate is now readable.
@note The parameters \f$LEN\f$ and \f$THETA\f$ are the most important. You should adjust \f$LEN\f$ and \f$THETA\f$ first, then \f$SNR\f$.
You can also find a quick video demonstration of a license plate recovering method
[YouTube](https://youtu.be/xSrE0hdhb4o).
@youtube{xSrE0hdhb4o}

View File

@ -8,54 +8,54 @@ Goal
In this tutorial you will learn:
- what is a degradation image model
- what is PSF of out-of-focus image
- what a degradation image model is
- what the PSF of an out-of-focus image is
- how to restore a blurred image
- what is Wiener filter
- what is a Wiener filter
Theory
------
@note The explanation is based on the books @cite gonzalez and @cite gruzman. Also, you can refer to Matlab's tutorial [Image Deblurring in Matlab] and an article [SmartDeblur].
@note An out-of-focus image on this page is a real world image. An out-of-focus was done manually by camera optics.
@note The explanation is based on the books @cite gonzalez and @cite gruzman. Also, you can refer to Matlab's tutorial [Image Deblurring in Matlab] and the article [SmartDeblur].
@note The out-of-focus image on this page is a real world image. The out-of-focus was achieved manually by camera optics.
### What is a degradation image model?
A mathematical model of the image degradation in frequency domain representation is:
Here is a mathematical model of the image degradation in frequency domain representation:
\f[S = H\cdot U + N\f]
where
\f$S\f$ is a spectrum of blurred (degraded) image,
\f$U\f$ is a spectrum of original true (undegraded) image,
\f$H\f$ is frequency response of point spread function (PSF),
\f$H\f$ is a frequency response of point spread function (PSF),
\f$N\f$ is a spectrum of additive noise.
Circular PSF is a good approximation of out-of-focus distortion. Such PSF is specified by only one parameter - radius \f$R\f$. Circular PSF is used in this work.
The circular PSF is a good approximation of out-of-focus distortion. Such a PSF is specified by only one parameter - radius \f$R\f$. Circular PSF is used in this work.
![Circular point spread function](psf.png)
### How to restore an blurred image?
### How to restore a blurred image?
The objective of restoration (deblurring) is to obtain an estimate of the original image. Restoration formula in frequency domain is:
The objective of restoration (deblurring) is to obtain an estimate of the original image. The restoration formula in frequency domain is:
\f[U' = H_w\cdot S\f]
where
\f$U'\f$ is spectrum of estimation of original image \f$U\f$,
\f$H_w\f$ is restoration filter, for example, Wiener filter.
\f$U'\f$ is the spectrum of estimation of original image \f$U\f$, and
\f$H_w\f$ is the restoration filter, for example, the Wiener filter.
### What is Wiener filter?
### What is the Wiener filter?
Wiener filter is a way to restore a blurred image. Let's suppose that PSF is a real and symmetric signal, a power spectrum of the original true image and noise are not known,
then simplified Wiener formula is:
The Wiener filter is a way to restore a blurred image. Let's suppose that the PSF is a real and symmetric signal, a power spectrum of the original true image and noise are not known,
then a simplified Wiener formula is:
\f[H_w = \frac{H}{|H|^2+\frac{1}{SNR}} \f]
where
\f$SNR\f$ is signal-to-noise ratio.
So, in order to recover an out-of-focus image by Wiener filter, it needs to know \f$SNR\f$ and \f$R\f$ of circular PSF.
So, in order to recover an out-of-focus image by Wiener filter, it needs to know the \f$SNR\f$ and \f$R\f$ of the circular PSF.
Source code
@ -68,36 +68,36 @@ You can find source code in the `samples/cpp/tutorial_code/ImgProc/out_of_focus_
Explanation
-----------
An out-of-focus image recovering algorithm consists of PSF generation, Wiener filter generation and filtering an blurred image in frequency domain:
An out-of-focus image recovering algorithm consists of PSF generation, Wiener filter generation and filtering a blurred image in frequency domain:
@snippet samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp main
A function calcPSF() forms an circular PSF according to input parameter radius \f$R\f$:
A function calcPSF() forms a circular PSF according to input parameter radius \f$R\f$:
@snippet samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp calcPSF
A function calcWnrFilter() synthesizes simplified Wiener filter \f$H_w\f$ according to formula described above:
A function calcWnrFilter() synthesizes the simplified Wiener filter \f$H_w\f$ according to the formula described above:
@snippet samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp calcWnrFilter
A function fftshift() rearranges PSF. This code was just copied from tutorial @ref tutorial_discrete_fourier_transform "Discrete Fourier Transform":
A function fftshift() rearranges the PSF. This code was just copied from the tutorial @ref tutorial_discrete_fourier_transform "Discrete Fourier Transform":
@snippet samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp fftshift
A function filter2DFreq() filters an blurred image in frequency domain:
A function filter2DFreq() filters the blurred image in the frequency domain:
@snippet samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp filter2DFreq
Result
------
Below you can see real out-of-focus image:
Below you can see the real out-of-focus image:
![Out-of-focus image](images/original.jpg)
Below result was done by \f$R\f$ = 53 and \f$SNR\f$ = 5200 parameters:
And the following result has been computed with \f$R\f$ = 53 and \f$SNR\f$ = 5200 parameters:
![The restored (deblurred) image](images/recovered.jpg)
The Wiener filter was used, values of \f$R\f$ and \f$SNR\f$ were selected manually to give the best possible visual result.
We can see that the result is not perfect, but it gives us a hint to the image content. With some difficulty, the text is readable.
The Wiener filter was used, and values of \f$R\f$ and \f$SNR\f$ were selected manually to give the best possible visual result.
We can see that the result is not perfect, but it gives us a hint to the image's content. With some difficulty, the text is readable.
@note The parameter \f$R\f$ is the most important. So you should adjust \f$R\f$ first, then \f$SNR\f$.
@note Sometimes you can observe the ringing effect in an restored image. This effect can be reduced by several methods. For example, you can taper input image edges.
@note Sometimes you can observe the ringing effect in a restored image. This effect can be reduced with several methods. For example, you can taper input image edges.
You can also find a quick video demonstration of this on
[YouTube](https://youtu.be/0bEcE4B0XP4).

View File

@ -320,3 +320,13 @@ In this section you will learn about the image processing (manipulation) functio
*Author:* Karpushin Vladislav
You will learn how to recover an out-of-focus image by Wiener filter.
- @subpage tutorial_motion_deblur_filter
*Languages:* C++
*Compatibility:* \> OpenCV 2.0
*Author:* Karpushin Vladislav
You will learn how to recover an image with motion blur distortion using a Wiener filter.

View File

@ -142,8 +142,6 @@ of them, you need to download and install them on your system.
- [Intel Integrated Performance Primitives (*IPP*)](http://software.intel.com/en-us/articles/intel-ipp/) may be used to improve the performance
of color conversion, Haar training and DFT functions of the OpenCV library. Watch out, since
this is not a free service.
- [Intel IPP Asynchronous C/C++](http://software.intel.com/en-us/intel-ipp-preview) is currently focused delivering Intel Graphics
support for advanced image processing and computer vision functions.
- OpenCV offers a somewhat fancier and more useful graphical user interface, than the default one
by using the [Qt framework](http://qt.nokia.com/downloads). For a quick overview of what this has to offer, look into the
documentations *highgui* module, under the *Qt New Functions* section. Version 4.6 or later of
@ -204,10 +202,6 @@ libraries). If you do not need the support for some of these, you can just freel
![](images/IntelTBB.png)
-# For the [Intel IPP Asynchronous C/C++](http://software.intel.com/en-us/intel-ipp-preview) download the source files and set environment
variable **IPP_ASYNC_ROOT**. It should point to
`<your Program Files(x86) directory>/Intel/IPP Preview */ipp directory`. Here \* denotes the
particular preview name.
-# In case of the [Eigen](http://eigen.tuxfamily.org/index.php?title=Main_Page#Download) library it is again a case of download and extract to the
`D:/OpenCV/dep` directory.
-# Same as above with [OpenEXR](http://www.openexr.com/downloads.html).
@ -319,6 +313,7 @@ libraries). If you do not need the support for some of these, you can just freel
you are concerned about performance, build them and run.
- *BUILD_opencv_python* -\> Self-explanatory. Create the binaries to use OpenCV from the
Python language.
- *BUILD_opencv_world* -\> Generate a single "opencv_world" binary (a shared or static library, depending on *BUILD_SHARED_LIBS*) including all the modules instead of a collection of separate binaries, one binary per module.
Press again the *Configure* button and ensure no errors are reported. If this is the case, you
can tell CMake to create the project files by pushing the *Generate* button. Go to the build

View File

@ -1487,12 +1487,14 @@ static void computeDisparitySGBM_HH4( const Mat& img1, const Mat& img2,
size_t minLrSize = width1 , LrSize = minLrSize*D2;
int hsumBufNRows = SH2*2 + 2;
size_t totalBufSize = (LrSize + minLrSize)*NLR*sizeof(CostType) + // minLr[] and Lr[]
costBufSize*hsumBufNRows*sizeof(CostType) + // hsumBuf
CSBufSize*2*sizeof(CostType) + 1024; // C, S
costBufSize*hsumBufNRows*sizeof(CostType) + // hsumBuf
CSBufSize*2*sizeof(CostType) + 1024; // C, S
if( buffer.empty() || !buffer.isContinuous() ||
buffer.cols*buffer.rows*buffer.elemSize() < totalBufSize )
buffer.create(1, (int)totalBufSize, CV_8U);
{
buffer.reserveBuffer(totalBufSize);
}
// summary cost over different (nDirs) directions
CostType* Cbuf = (CostType*)alignPtr(buffer.ptr(), ALIGN);

View File

@ -664,6 +664,8 @@ inline void v_mul_expand(const v_uint32x8& a, const v_uint32x8& b,
v_zip(v_uint64x4(v0), v_uint64x4(v1), c, d);
}
inline v_int16x16 v_mul_hi(const v_int16x16& a, const v_int16x16& b) { return v_int16x16(_mm256_mulhi_epi16(a.val, b.val)); }
inline v_uint16x16 v_mul_hi(const v_uint16x16& a, const v_uint16x16& b) { return v_uint16x16(_mm256_mulhi_epu16(a.val, b.val)); }
/** Non-saturating arithmetics **/
#define OPENCV_HAL_IMPL_AVX_BIN_FUNC(func, _Tpvec, intrin) \

View File

@ -891,6 +891,20 @@ template<typename _Tp, int n> inline void v_mul_expand(const v_reg<_Tp, n>& a, c
}
}
/** @brief Multiply and extract high part
Multiply values two registers and store high part of the results.
Implemented only for 16-bit source types (v_int16x8, v_uint16x8). Returns \f$ a*b >> 16 \f$
*/
template<typename _Tp, int n> inline v_reg<_Tp, n> v_mul_hi(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
{
typedef typename V_TypeTraits<_Tp>::w_type w_type;
v_reg<_Tp, n> c;
for (int i = 0; i < n; i++)
c.s[i] = (_Tp)(((w_type)a.s[i] * b.s[i]) >> sizeof(_Tp)*8);
return c;
}
//! @cond IGNORED
template<typename _Tp, int n> inline void v_hsum(const v_reg<_Tp, n>& a,
v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& c)

View File

@ -553,6 +553,21 @@ inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b,
d.val = vmull_u32(vget_high_u32(a.val), vget_high_u32(b.val));
}
inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b)
{
return v_int16x8(vcombine_s16(
vshrn_n_s32(vmull_s16( vget_low_s16(a.val), vget_low_s16(b.val)), 16),
vshrn_n_s32(vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val)), 16)
));
}
inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b)
{
return v_uint16x8(vcombine_u16(
vshrn_n_u32(vmull_u16( vget_low_u16(a.val), vget_low_u16(b.val)), 16),
vshrn_n_u32(vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val)), 16)
));
}
inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
{
int32x4_t c = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val));

View File

@ -737,6 +737,9 @@ inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b,
d.val = _mm_unpackhi_epi64(c0, c1);
}
inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b) { return v_int16x8(_mm_mulhi_epi16(a.val, b.val)); }
inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) { return v_uint16x8(_mm_mulhi_epu16(a.val, b.val)); }
inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
{
return v_int32x4(_mm_madd_epi16(a.val, b.val));

View File

@ -457,6 +457,21 @@ inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b, v_uint64x2& c
d.val = vec_mul(vec_unpacklu(a.val), vec_unpacklu(b.val));
}
inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b)
{
return v_int16x8(vec_packs(
vec_sra(vec_mul(vec_unpackh(a.val), vec_unpackh(b.val)), vec_uint4_sp(16)),
vec_sra(vec_mul(vec_unpackl(a.val), vec_unpackl(b.val)), vec_uint4_sp(16))
));
}
inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b)
{
return v_uint16x8(vec_packs(
vec_sr(vec_mul(vec_unpackhu(a.val), vec_unpackhu(b.val)), vec_uint4_sp(16)),
vec_sr(vec_mul(vec_unpacklu(a.val), vec_unpacklu(b.val)), vec_uint4_sp(16))
));
}
/** Non-saturating arithmetics **/
#define OPENCV_HAL_IMPL_VSX_BIN_FUNC(func, intrin) \
template<typename _Tpvec> \

View File

@ -45,7 +45,7 @@
#ifndef OPENCV_CORE_IPPASYNC_HPP
#define OPENCV_CORE_IPPASYNC_HPP
#ifdef HAVE_IPP_A
#ifdef HAVE_IPP_A // this file will be removed in OpenCV 4.0
#include "opencv2/core.hpp"
#include <ipp_async_op.h>

View File

@ -146,6 +146,12 @@ synonym is needed to generate Python/Java etc. wrappers properly. At the functio
level their use is similar, but _InputArray::getMat(idx) should be used to get header for the
idx-th component of the outer vector and _InputArray::size().area() should be used to find the
number of components (vectors/matrices) of the outer vector.
In general, type support is limited to cv::Mat types. Other types are forbidden.
But in some cases we need to support passing of custom non-general Mat types, like arrays of cv::KeyPoint, cv::DMatch, etc.
This data is not intented to be interpreted as an image data, or processed somehow like regular cv::Mat.
To pass such custom type use rawIn() / rawOut() / rawInOut() wrappers.
Custom type is wrapped as Mat-compatible `CV_8UC<N>` values (N = sizeof(T), N <= CV_CN_MAX).
*/
class CV_EXPORTS _InputArray
{
@ -199,6 +205,9 @@ public:
template<typename _Tp, std::size_t _Nm> _InputArray(const std::array<_Tp, _Nm>& arr);
template<std::size_t _Nm> _InputArray(const std::array<Mat, _Nm>& arr);
template<typename _Tp> static _InputArray rawIn(const std::vector<_Tp>& vec);
template<typename _Tp, std::size_t _Nm> static _InputArray rawIn(const std::array<_Tp, _Nm>& arr);
Mat getMat(int idx=-1) const;
Mat getMat_(int idx=-1) const;
UMat getUMat(int idx=-1) const;
@ -328,12 +337,13 @@ public:
_OutputArray(const UMat& m);
_OutputArray(const std::vector<UMat>& vec);
#ifdef CV_CXX_STD_ARRAY
template<typename _Tp, std::size_t _Nm> _OutputArray(std::array<_Tp, _Nm>& arr);
template<typename _Tp, std::size_t _Nm> _OutputArray(const std::array<_Tp, _Nm>& arr);
template<std::size_t _Nm> _OutputArray(std::array<Mat, _Nm>& arr);
template<std::size_t _Nm> _OutputArray(const std::array<Mat, _Nm>& arr);
#endif
template<typename _Tp> static _OutputArray rawOut(std::vector<_Tp>& vec);
template<typename _Tp, std::size_t _Nm> static _OutputArray rawOut(std::array<_Tp, _Nm>& arr);
bool fixedSize() const;
bool fixedType() const;
@ -397,15 +407,23 @@ public:
_InputOutputArray(const UMat& m);
_InputOutputArray(const std::vector<UMat>& vec);
#ifdef CV_CXX_STD_ARRAY
template<typename _Tp, std::size_t _Nm> _InputOutputArray(std::array<_Tp, _Nm>& arr);
template<typename _Tp, std::size_t _Nm> _InputOutputArray(const std::array<_Tp, _Nm>& arr);
template<std::size_t _Nm> _InputOutputArray(std::array<Mat, _Nm>& arr);
template<std::size_t _Nm> _InputOutputArray(const std::array<Mat, _Nm>& arr);
#endif
template<typename _Tp> static _InputOutputArray rawInOut(std::vector<_Tp>& vec);
template<typename _Tp, std::size_t _Nm> _InputOutputArray rawInOut(std::array<_Tp, _Nm>& arr);
};
/** Helper to wrap custom types. @see InputArray */
template<typename _Tp> static inline _InputArray rawIn(_Tp& v);
/** Helper to wrap custom types. @see InputArray */
template<typename _Tp> static inline _OutputArray rawOut(_Tp& v);
/** Helper to wrap custom types. @see InputArray */
template<typename _Tp> static inline _InputOutputArray rawInOut(_Tp& v);
CV__DEBUG_NS_END
typedef const _InputArray& InputArray;
@ -991,11 +1009,9 @@ public:
*/
template<typename _Tp> explicit Mat(const std::initializer_list<int> sizes, const std::initializer_list<_Tp> list);
#ifdef CV_CXX_STD_ARRAY
/** @overload
*/
template<typename _Tp, size_t _Nm> explicit Mat(const std::array<_Tp, _Nm>& arr, bool copyData=false);
#endif
/** @overload
*/
@ -1630,9 +1646,7 @@ public:
template<typename _Tp, int n> operator Vec<_Tp, n>() const;
template<typename _Tp, int m, int n> operator Matx<_Tp, m, n>() const;
#ifdef CV_CXX_STD_ARRAY
template<typename _Tp, std::size_t _Nm> operator std::array<_Tp, _Nm>() const;
#endif
/** @brief Reports whether the matrix is continuous or not.
@ -2214,9 +2228,7 @@ public:
Mat_(std::initializer_list<_Tp> values);
explicit Mat_(const std::initializer_list<int> sizes, const std::initializer_list<_Tp> values);
#ifdef CV_CXX_STD_ARRAY
template <std::size_t _Nm> explicit Mat_(const std::array<_Tp, _Nm>& arr, bool copyData=false);
#endif
Mat_& operator = (const Mat& m);
Mat_& operator = (const Mat_& m);
@ -2314,10 +2326,8 @@ public:
//! conversion to vector.
operator std::vector<_Tp>() const;
#ifdef CV_CXX_STD_ARRAY
//! conversion to array.
template<std::size_t _Nm> operator std::array<_Tp, _Nm>() const;
#endif
//! conversion to Vec
template<int n> operator Vec<typename DataType<_Tp>::channel_type, n>() const;

View File

@ -61,6 +61,16 @@ CV__DEBUG_NS_BEGIN
//! @cond IGNORED
////////////////////////// Custom (raw) type wrapper //////////////////////////
template<typename _Tp> static inline
int rawType()
{
CV_StaticAssert(sizeof(_Tp) <= CV_CN_MAX, "sizeof(_Tp) is too large");
const int elemSize = sizeof(_Tp);
return (int)CV_MAKETYPE(CV_8U, elemSize);
}
//////////////////////// Input/Output Arrays ////////////////////////
inline void _InputArray::init(int _flags, const void* _obj)
@ -134,6 +144,25 @@ inline _InputArray::_InputArray(const ogl::Buffer& buf)
inline _InputArray::_InputArray(const cuda::HostMem& cuda_mem)
{ init(CUDA_HOST_MEM + ACCESS_READ, &cuda_mem); }
template<typename _Tp> inline
_InputArray _InputArray::rawIn(const std::vector<_Tp>& vec)
{
_InputArray v;
v.flags = _InputArray::FIXED_TYPE + _InputArray::STD_VECTOR + rawType<_Tp>() + ACCESS_READ;
v.obj = (void*)&vec;
return v;
}
template<typename _Tp, std::size_t _Nm> inline
_InputArray _InputArray::rawIn(const std::array<_Tp, _Nm>& arr)
{
_InputArray v;
v.flags = FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_READ;
v.obj = (void*)arr.data();
v.sz = Size(1, _Nm);
return v;
}
inline _InputArray::~_InputArray() {}
inline Mat _InputArray::getMat(int i) const
@ -261,6 +290,25 @@ inline _OutputArray::_OutputArray(const ogl::Buffer& buf)
inline _OutputArray::_OutputArray(const cuda::HostMem& cuda_mem)
{ init(FIXED_TYPE + FIXED_SIZE + CUDA_HOST_MEM + ACCESS_WRITE, &cuda_mem); }
template<typename _Tp> inline
_OutputArray _OutputArray::rawOut(std::vector<_Tp>& vec)
{
_OutputArray v;
v.flags = _InputArray::FIXED_TYPE + _InputArray::STD_VECTOR + rawType<_Tp>() + ACCESS_WRITE;
v.obj = (void*)&vec;
return v;
}
template<typename _Tp, std::size_t _Nm> inline
_OutputArray _OutputArray::rawOut(std::array<_Tp, _Nm>& arr)
{
_OutputArray v;
v.flags = FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_WRITE;
v.obj = (void*)arr.data();
v.sz = Size(1, _Nm);
return v;
}
///////////////////////////////////////////////////////////////////////////////////////////
inline _InputOutputArray::_InputOutputArray() { init(ACCESS_RW, 0); }
@ -370,6 +418,30 @@ inline _InputOutputArray::_InputOutputArray(const ogl::Buffer& buf)
inline _InputOutputArray::_InputOutputArray(const cuda::HostMem& cuda_mem)
{ init(FIXED_TYPE + FIXED_SIZE + CUDA_HOST_MEM + ACCESS_RW, &cuda_mem); }
template<typename _Tp> inline
_InputOutputArray _InputOutputArray::rawInOut(std::vector<_Tp>& vec)
{
_InputOutputArray v;
v.flags = _InputArray::FIXED_TYPE + _InputArray::STD_VECTOR + rawType<_Tp>() + ACCESS_RW;
v.obj = (void*)&vec;
return v;
}
template<typename _Tp, std::size_t _Nm> inline
_InputOutputArray _InputOutputArray::rawInOut(std::array<_Tp, _Nm>& arr)
{
_InputOutputArray v;
v.flags = FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_RW;
v.obj = (void*)arr.data();
v.sz = Size(1, _Nm);
return v;
}
template<typename _Tp> static inline _InputArray rawIn(_Tp& v) { return _InputArray::rawIn(v); }
template<typename _Tp> static inline _OutputArray rawOut(_Tp& v) { return _OutputArray::rawOut(v); }
template<typename _Tp> static inline _InputOutputArray rawInOut(_Tp& v) { return _InputOutputArray::rawInOut(v); }
CV__DEBUG_NS_END
//////////////////////////////////////////// Mat //////////////////////////////////////////

View File

@ -270,7 +270,7 @@ static void binary_op( InputArray _src1, InputArray _src2, OutputArray _dst,
if( !haveScalar )
{
const Mat* arrays[] = { &src1, &src2, &dst, &mask, 0 };
uchar* ptrs[4]{};
uchar* ptrs[4] = {};
NAryMatIterator it(arrays, ptrs);
size_t total = it.size, blocksize = total;
@ -306,7 +306,7 @@ static void binary_op( InputArray _src1, InputArray _src2, OutputArray _dst,
else
{
const Mat* arrays[] = { &src1, &dst, &mask, 0 };
uchar* ptrs[3]{};
uchar* ptrs[3] = {};
NAryMatIterator it(arrays, ptrs);
size_t total = it.size, blocksize = std::min(total, blocksize0);
@ -745,7 +745,7 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
if( !haveScalar )
{
const Mat* arrays[] = { &src1, &src2, &dst, &mask, 0 };
uchar* ptrs[4]{};
uchar* ptrs[4] = {};
NAryMatIterator it(arrays, ptrs);
size_t total = it.size, blocksize = total;
@ -812,7 +812,7 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
else
{
const Mat* arrays[] = { &src1, &dst, &mask, 0 };
uchar* ptrs[3]{};
uchar* ptrs[3] = {};
NAryMatIterator it(arrays, ptrs);
size_t total = it.size, blocksize = std::min(total, blocksize0);
@ -1240,7 +1240,7 @@ void cv::compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op)
if( !haveScalar )
{
const Mat* arrays[] = { &src1, &src2, &dst, 0 };
uchar* ptrs[3]{};
uchar* ptrs[3] = {};
NAryMatIterator it(arrays, ptrs);
size_t total = it.size;
@ -1251,7 +1251,7 @@ void cv::compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op)
else
{
const Mat* arrays[] = { &src1, &dst, 0 };
uchar* ptrs[2]{};
uchar* ptrs[2] = {};
NAryMatIterator it(arrays, ptrs);
size_t total = it.size, blocksize = std::min(total, blocksize0);
@ -1748,7 +1748,7 @@ void cv::inRange(InputArray _src, InputArray _lowerb,
const Mat* arrays_sc[] = { &src, &dst, 0 };
const Mat* arrays_nosc[] = { &src, &dst, &lb, &ub, 0 };
uchar* ptrs[4]{};
uchar* ptrs[4] = {};
NAryMatIterator it(lbScalar && ubScalar ? arrays_sc : arrays_nosc, ptrs);
size_t total = it.size, blocksize = std::min(total, blocksize0);

View File

@ -1347,7 +1347,7 @@ void cv::Mat::convertTo(OutputArray _dst, int _type, double alpha, double beta)
else
{
const Mat* arrays[] = {&src, &dst, 0};
uchar* ptrs[2]{};
uchar* ptrs[2] = {};
NAryMatIterator it(arrays, ptrs);
Size sz((int)(it.size*cn), 1);
@ -1496,7 +1496,7 @@ void cv::convertFp16( InputArray _src, OutputArray _dst)
else
{
const Mat* arrays[] = {&src, &dst, 0};
uchar* ptrs[2]{};
uchar* ptrs[2] = {};
NAryMatIterator it(arrays, ptrs);
Size sz((int)(it.size*cn), 1);

View File

@ -1775,7 +1775,7 @@ void cv::convertScaleAbs( InputArray _src, OutputArray _dst, double alpha, doubl
else
{
const Mat* arrays[] = {&src, &dst, 0};
uchar* ptrs[2]{};
uchar* ptrs[2] = {};
NAryMatIterator it(arrays, ptrs);
Size sz((int)it.size*cn, 1);

View File

@ -306,7 +306,7 @@ void Mat::copyTo( OutputArray _dst ) const
if( total() != 0 )
{
const Mat* arrays[] = { this, &dst };
uchar* ptrs[2]{};
uchar* ptrs[2] = {};
NAryMatIterator it(arrays, ptrs, 2);
size_t sz = it.size*elemSize();
@ -399,7 +399,7 @@ void Mat::copyTo( OutputArray _dst, InputArray _mask ) const
}
const Mat* arrays[] = { this, &dst, &mask, 0 };
uchar* ptrs[3]{};
uchar* ptrs[3] = {};
NAryMatIterator it(arrays, ptrs);
Size sz((int)(it.size*mcn), 1);

View File

@ -25,51 +25,34 @@ static int countNonZero_(const T* src, int len )
static int countNonZero8u( const uchar* src, int len )
{
int i=0, nz = 0;
#if CV_SSE2
if(USE_SSE2)//5x-6x
#if CV_SIMD
int len0 = len & -v_uint8::nlanes;
v_uint8 v_zero = vx_setzero_u8();
v_uint8 v_one = vx_setall_u8(1);
v_uint32 v_sum32 = vx_setzero_u32();
while (i < len0)
{
__m128i v_zero = _mm_setzero_si128();
__m128i sum = _mm_setzero_si128();
for (; i<=len-16; i+=16)
v_uint16 v_sum16 = vx_setzero_u16();
int j = i;
while (j < std::min(len0, i + 65280 * v_uint16::nlanes))
{
__m128i r0 = _mm_loadu_si128((const __m128i*)(src+i));
sum = _mm_add_epi32(sum, _mm_sad_epu8(_mm_sub_epi8(v_zero, _mm_cmpeq_epi8(r0, v_zero)), v_zero));
v_uint8 v_sum8 = vx_setzero_u8();
int k = j;
for (; k < std::min(len0, j + 255 * v_uint8::nlanes); k += v_uint8::nlanes)
v_sum8 += v_one & (vx_load(src + k) == v_zero);
v_uint16 part1, part2;
v_expand(v_sum8, part1, part2);
v_sum16 += part1 + part2;
j = k;
}
nz = i - _mm_cvtsi128_si32(_mm_add_epi32(sum, _mm_unpackhi_epi64(sum, sum)));
v_uint32 part1, part2;
v_expand(v_sum16, part1, part2);
v_sum32 += part1 + part2;
i = j;
}
#elif CV_NEON
int len0 = len & -16, blockSize1 = (1 << 8) - 16, blockSize0 = blockSize1 << 6;
uint32x4_t v_nz = vdupq_n_u32(0u);
uint8x16_t v_zero = vdupq_n_u8(0), v_1 = vdupq_n_u8(1);
const uchar * src0 = src;
while( i < len0 )
{
int blockSizei = std::min(len0 - i, blockSize0), j = 0;
while (j < blockSizei)
{
int blockSizej = std::min(blockSizei - j, blockSize1), k = 0;
uint8x16_t v_pz = v_zero;
for( ; k <= blockSizej - 16; k += 16 )
v_pz = vaddq_u8(v_pz, vandq_u8(vceqq_u8(vld1q_u8(src0 + k), v_zero), v_1));
uint16x8_t v_p1 = vmovl_u8(vget_low_u8(v_pz)), v_p2 = vmovl_u8(vget_high_u8(v_pz));
v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_p1), vget_high_u16(v_p1)), v_nz);
v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_p2), vget_high_u16(v_p2)), v_nz);
src0 += blockSizej;
j += blockSizej;
}
i += blockSizei;
}
CV_DECL_ALIGNED(16) unsigned int buf[4];
vst1q_u32(buf, v_nz);
nz += i - saturate_cast<int>(buf[0] + buf[1] + buf[2] + buf[3]);
nz = i - v_reduce_sum(v_sum32);
v_cleanup();
#endif
for( ; i < len; i++ )
nz += src[i] != 0;
@ -79,159 +62,112 @@ static int countNonZero8u( const uchar* src, int len )
static int countNonZero16u( const ushort* src, int len )
{
int i = 0, nz = 0;
#if CV_SSE2
if (USE_SSE2)
#if CV_SIMD
int len0 = len & -v_int8::nlanes;
v_uint16 v_zero = vx_setzero_u16();
v_int8 v_one = vx_setall_s8(1);
v_int32 v_sum32 = vx_setzero_s32();
while (i < len0)
{
__m128i v_zero = _mm_setzero_si128 ();
__m128i sum = _mm_setzero_si128();
for ( ; i <= len - 8; i += 8)
v_int16 v_sum16 = vx_setzero_s16();
int j = i;
while (j < std::min(len0, i + 32766 * v_int16::nlanes))
{
__m128i r0 = _mm_loadu_si128((const __m128i*)(src + i));
sum = _mm_add_epi32(sum, _mm_sad_epu8(_mm_sub_epi8(v_zero, _mm_cmpeq_epi16(r0, v_zero)), v_zero));
v_int8 v_sum8 = vx_setzero_s8();
int k = j;
for (; k < std::min(len0, j + 127 * v_int8::nlanes); k += v_int8::nlanes)
v_sum8 += v_one & v_pack(v_reinterpret_as_s16(vx_load(src + k) == v_zero), v_reinterpret_as_s16(vx_load(src + k + v_uint16::nlanes) == v_zero));
v_int16 part1, part2;
v_expand(v_sum8, part1, part2);
v_sum16 += part1 + part2;
j = k;
}
nz = i - (_mm_cvtsi128_si32(_mm_add_epi32(sum, _mm_unpackhi_epi64(sum, sum))) >> 1);
src += i;
v_int32 part1, part2;
v_expand(v_sum16, part1, part2);
v_sum32 += part1 + part2;
i = j;
}
#elif CV_NEON
int len0 = len & -8, blockSize1 = (1 << 15), blockSize0 = blockSize1 << 6;
uint32x4_t v_nz = vdupq_n_u32(0u);
uint16x8_t v_zero = vdupq_n_u16(0), v_1 = vdupq_n_u16(1);
while( i < len0 )
{
int blockSizei = std::min(len0 - i, blockSize0), j = 0;
while (j < blockSizei)
{
int blockSizej = std::min(blockSizei - j, blockSize1), k = 0;
uint16x8_t v_pz = v_zero;
for( ; k <= blockSizej - 8; k += 8 )
v_pz = vaddq_u16(v_pz, vandq_u16(vceqq_u16(vld1q_u16(src + k), v_zero), v_1));
v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_pz), vget_high_u16(v_pz)), v_nz);
src += blockSizej;
j += blockSizej;
}
i += blockSizei;
}
CV_DECL_ALIGNED(16) unsigned int buf[4];
vst1q_u32(buf, v_nz);
nz += i - saturate_cast<int>(buf[0] + buf[1] + buf[2] + buf[3]);
nz = i - v_reduce_sum(v_sum32);
v_cleanup();
#endif
return nz + countNonZero_(src, len - i);
return nz + countNonZero_(src + i, len - i);
}
static int countNonZero32s( const int* src, int len )
{
int i = 0, nz = 0;
#if CV_SSE2
if (USE_SSE2)
#if CV_SIMD
int len0 = len & -v_int8::nlanes;
v_int32 v_zero = vx_setzero_s32();
v_int8 v_one = vx_setall_s8(1);
v_int32 v_sum32 = vx_setzero_s32();
while (i < len0)
{
__m128i v_zero = _mm_setzero_si128 ();
__m128i sum = _mm_setzero_si128();
for ( ; i <= len - 4; i += 4)
v_int16 v_sum16 = vx_setzero_s16();
int j = i;
while (j < std::min(len0, i + 32766 * v_int16::nlanes))
{
__m128i r0 = _mm_loadu_si128((const __m128i*)(src + i));
sum = _mm_add_epi32(sum, _mm_sad_epu8(_mm_sub_epi8(v_zero, _mm_cmpeq_epi32(r0, v_zero)), v_zero));
v_int8 v_sum8 = vx_setzero_s8();
int k = j;
for (; k < std::min(len0, j + 127 * v_int8::nlanes); k += v_int8::nlanes)
v_sum8 += v_one & v_pack(
v_pack(vx_load(src + k ) == v_zero, vx_load(src + k + v_int32::nlanes) == v_zero),
v_pack(vx_load(src + k + 2*v_int32::nlanes) == v_zero, vx_load(src + k + 3*v_int32::nlanes) == v_zero)
);
v_int16 part1, part2;
v_expand(v_sum8, part1, part2);
v_sum16 += part1 + part2;
j = k;
}
nz = i - (_mm_cvtsi128_si32(_mm_add_epi32(sum, _mm_unpackhi_epi64(sum, sum))) >> 2);
src += i;
v_int32 part1, part2;
v_expand(v_sum16, part1, part2);
v_sum32 += part1 + part2;
i = j;
}
#elif CV_NEON
int len0 = len & -8, blockSize1 = (1 << 15), blockSize0 = blockSize1 << 6;
uint32x4_t v_nz = vdupq_n_u32(0u);
int32x4_t v_zero = vdupq_n_s32(0.0f);
uint16x8_t v_1 = vdupq_n_u16(1u), v_zerou = vdupq_n_u16(0u);
while( i < len0 )
{
int blockSizei = std::min(len0 - i, blockSize0), j = 0;
while (j < blockSizei)
{
int blockSizej = std::min(blockSizei - j, blockSize1), k = 0;
uint16x8_t v_pz = v_zerou;
for( ; k <= blockSizej - 8; k += 8 )
v_pz = vaddq_u16(v_pz, vandq_u16(vcombine_u16(vmovn_u32(vceqq_s32(vld1q_s32(src + k), v_zero)),
vmovn_u32(vceqq_s32(vld1q_s32(src + k + 4), v_zero))), v_1));
v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_pz), vget_high_u16(v_pz)), v_nz);
src += blockSizej;
j += blockSizej;
}
i += blockSizei;
}
CV_DECL_ALIGNED(16) unsigned int buf[4];
vst1q_u32(buf, v_nz);
nz += i - saturate_cast<int>(buf[0] + buf[1] + buf[2] + buf[3]);
nz = i - v_reduce_sum(v_sum32);
v_cleanup();
#endif
return nz + countNonZero_(src, len - i);
return nz + countNonZero_(src + i, len - i);
}
static int countNonZero32f( const float* src, int len )
{
int i = 0, nz = 0;
#if CV_SSE2
if (USE_SSE2)
#if CV_SIMD
int len0 = len & -v_int8::nlanes;
v_float32 v_zero = vx_setzero_f32();
v_int8 v_one = vx_setall_s8(1);
v_int32 v_sum32 = vx_setzero_s32();
while (i < len0)
{
__m128 v_zero_f = _mm_setzero_ps();
__m128i v_zero = _mm_setzero_si128 ();
__m128i sum = _mm_setzero_si128();
for ( ; i <= len - 4; i += 4)
v_int16 v_sum16 = vx_setzero_s16();
int j = i;
while (j < std::min(len0, i + 32766 * v_int16::nlanes))
{
__m128 r0 = _mm_loadu_ps(src + i);
sum = _mm_add_epi32(sum, _mm_sad_epu8(_mm_sub_epi8(v_zero, _mm_castps_si128(_mm_cmpeq_ps(r0, v_zero_f))), v_zero));
v_int8 v_sum8 = vx_setzero_s8();
int k = j;
for (; k < std::min(len0, j + 127 * v_int8::nlanes); k += v_int8::nlanes)
v_sum8 += v_one & v_pack(
v_pack(v_reinterpret_as_s32(vx_load(src + k ) == v_zero), v_reinterpret_as_s32(vx_load(src + k + v_float32::nlanes) == v_zero)),
v_pack(v_reinterpret_as_s32(vx_load(src + k + 2*v_float32::nlanes) == v_zero), v_reinterpret_as_s32(vx_load(src + k + 3*v_float32::nlanes) == v_zero))
);
v_int16 part1, part2;
v_expand(v_sum8, part1, part2);
v_sum16 += part1 + part2;
j = k;
}
nz = i - (_mm_cvtsi128_si32(_mm_add_epi32(sum, _mm_unpackhi_epi64(sum, sum))) >> 2);
src += i;
v_int32 part1, part2;
v_expand(v_sum16, part1, part2);
v_sum32 += part1 + part2;
i = j;
}
#elif CV_NEON
int len0 = len & -8, blockSize1 = (1 << 15), blockSize0 = blockSize1 << 6;
uint32x4_t v_nz = vdupq_n_u32(0u);
float32x4_t v_zero = vdupq_n_f32(0.0f);
uint16x8_t v_1 = vdupq_n_u16(1u), v_zerou = vdupq_n_u16(0u);
while( i < len0 )
{
int blockSizei = std::min(len0 - i, blockSize0), j = 0;
while (j < blockSizei)
{
int blockSizej = std::min(blockSizei - j, blockSize1), k = 0;
uint16x8_t v_pz = v_zerou;
for( ; k <= blockSizej - 8; k += 8 )
v_pz = vaddq_u16(v_pz, vandq_u16(vcombine_u16(vmovn_u32(vceqq_f32(vld1q_f32(src + k), v_zero)),
vmovn_u32(vceqq_f32(vld1q_f32(src + k + 4), v_zero))), v_1));
v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_pz), vget_high_u16(v_pz)), v_nz);
src += blockSizej;
j += blockSizej;
}
i += blockSizei;
}
CV_DECL_ALIGNED(16) unsigned int buf[4];
vst1q_u32(buf, v_nz);
nz += i - saturate_cast<int>(buf[0] + buf[1] + buf[2] + buf[3]);
nz = i - v_reduce_sum(v_sum32);
v_cleanup();
#endif
return nz + countNonZero_(src, len - i);
return nz + countNonZero_(src + i, len - i);
}
static int countNonZero64f( const double* src, int len )
@ -378,7 +314,7 @@ int cv::countNonZero( InputArray _src )
CV_Assert( func != 0 );
const Mat* arrays[] = {&src, 0};
uchar* ptrs[1]{};
uchar* ptrs[1] = {};
NAryMatIterator it(arrays, ptrs);
int total = (int)it.size, nz = 0;

View File

@ -98,6 +98,10 @@ int solveLP(const Mat& Func, const Mat& Constr, Mat& z){
CV_Assert(Constr.type()==CV_64FC1 || Constr.type()==CV_32FC1);
CV_Assert((Func.rows==1 && (Constr.cols-Func.cols==1))||
(Func.cols==1 && (Constr.cols-Func.rows==1)));
if (!z.empty())
CV_CheckTypeEQ(z.type(), CV_64FC1, "");
else
CV_CheckType(z.type(), z.type() == CV_64FC1 || z.type() == CV_8UC1/*empty cv::Mat*/, "");
//copy arguments for we will shall modify them
Mat_<double> bigC=Mat_<double>(1,(Func.rows==1?Func.cols:Func.rows)+1),

View File

@ -342,7 +342,7 @@ public:
int lutcn = lut_.channels();
const Mat* arrays[] = {&src, &dst, 0};
uchar* ptrs[2]{};
uchar* ptrs[2] = {};
NAryMatIterator it(arrays, ptrs);
int len = (int)it.size;
@ -408,7 +408,7 @@ void cv::LUT( InputArray _src, InputArray _lut, OutputArray _dst )
CV_Assert( func != 0 );
const Mat* arrays[] = {&src, &dst, 0};
uchar* ptrs[2]{};
uchar* ptrs[2] = {};
NAryMatIterator it(arrays, ptrs);
int len = (int)it.size;

View File

@ -158,7 +158,7 @@ void magnitude( InputArray src1, InputArray src2, OutputArray dst )
Mat Mag = dst.getMat();
const Mat* arrays[] = {&X, &Y, &Mag, 0};
uchar* ptrs[3]{};
uchar* ptrs[3] = {};
NAryMatIterator it(arrays, ptrs);
int len = (int)it.size*cn;
@ -194,7 +194,7 @@ void phase( InputArray src1, InputArray src2, OutputArray dst, bool angleInDegre
Mat Angle = dst.getMat();
const Mat* arrays[] = {&X, &Y, &Angle, 0};
uchar* ptrs[3]{};
uchar* ptrs[3] = {};
NAryMatIterator it(arrays, ptrs);
int j, total = (int)(it.size*cn), blockSize = total;
size_t esz1 = X.elemSize1();
@ -280,7 +280,7 @@ void cartToPolar( InputArray src1, InputArray src2,
Mat Mag = dst1.getMat(), Angle = dst2.getMat();
const Mat* arrays[] = {&X, &Y, &Mag, &Angle, 0};
uchar* ptrs[4]{};
uchar* ptrs[4] = {};
NAryMatIterator it(arrays, ptrs);
int j, total = (int)(it.size*cn), blockSize = std::min(total, ((BLOCK_SIZE+cn-1)/cn)*cn);
size_t esz1 = X.elemSize1();
@ -577,7 +577,7 @@ void polarToCart( InputArray src1, InputArray src2,
CV_IPP_RUN(!angleInDegrees, ipp_polarToCart(Mag, Angle, X, Y));
const Mat* arrays[] = {&Mag, &Angle, &X, &Y, 0};
uchar* ptrs[4]{};
uchar* ptrs[4] = {};
NAryMatIterator it(arrays, ptrs);
cv::AutoBuffer<float> _buf;
float* buf[2] = {0, 0};
@ -676,7 +676,7 @@ void exp( InputArray _src, OutputArray _dst )
Mat dst = _dst.getMat();
const Mat* arrays[] = {&src, &dst, 0};
uchar* ptrs[2]{};
uchar* ptrs[2] = {};
NAryMatIterator it(arrays, ptrs);
int len = (int)(it.size*cn);
@ -709,7 +709,7 @@ void log( InputArray _src, OutputArray _dst )
Mat dst = _dst.getMat();
const Mat* arrays[] = {&src, &dst, 0};
uchar* ptrs[2]{};
uchar* ptrs[2] = {};
NAryMatIterator it(arrays, ptrs);
int len = (int)(it.size*cn);
@ -1241,7 +1241,7 @@ void pow( InputArray _src, double power, OutputArray _dst )
Mat dst = _dst.getMat();
const Mat* arrays[] = {&src, &dst, 0};
uchar* ptrs[2]{};
uchar* ptrs[2] = {};
NAryMatIterator it(arrays, ptrs);
int len = (int)(it.size*cn);
@ -1588,7 +1588,7 @@ void patchNaNs( InputOutputArray _a, double _val )
Mat a = _a.getMat();
const Mat* arrays[] = {&a, 0};
int* ptrs[1]{};
int* ptrs[1] = {};
NAryMatIterator it(arrays, (uchar**)ptrs);
size_t len = it.size*a.channels();
Cv32suf val;

View File

@ -2144,7 +2144,7 @@ void cv::transform( InputArray _src, OutputArray _dst, InputArray _mtx )
CV_Assert( func != 0 );
const Mat* arrays[] = {&src, &dst, 0};
uchar* ptrs[2]{};
uchar* ptrs[2] = {};
NAryMatIterator it(arrays, ptrs);
size_t i, total = it.size;
@ -2290,7 +2290,7 @@ void cv::perspectiveTransform( InputArray _src, OutputArray _dst, InputArray _mt
CV_Assert( func != 0 );
const Mat* arrays[] = {&src, &dst, 0};
uchar* ptrs[2]{};
uchar* ptrs[2] = {};
NAryMatIterator it(arrays, ptrs);
size_t i, total = it.size;
@ -2441,7 +2441,7 @@ void cv::scaleAdd( InputArray _src1, double alpha, InputArray _src2, OutputArray
}
const Mat* arrays[] = {&src1, &src2, &dst, 0};
uchar* ptrs[3]{};
uchar* ptrs[3] = {};
NAryMatIterator it(arrays, ptrs);
size_t i, len = it.size*cn;
@ -3301,7 +3301,7 @@ double Mat::dot(InputArray _mat) const
}
const Mat* arrays[] = {this, &mat, 0};
uchar* ptrs[2]{};
uchar* ptrs[2] = {};
NAryMatIterator it(arrays, ptrs);
int len = (int)(it.size*cn);
double r = 0;

View File

@ -1413,18 +1413,39 @@ void _OutputArray::create(int d, const int* sizes, int mtype, int i,
case 16:
((std::vector<Vec4i>*)v)->resize(len);
break;
case 20:
((std::vector<Vec<int, 5> >*)v)->resize(len);
break;
case 24:
((std::vector<Vec6i>*)v)->resize(len);
break;
case 28:
((std::vector<Vec<int, 7> >*)v)->resize(len);
break;
case 32:
((std::vector<Vec8i>*)v)->resize(len);
break;
case 36:
((std::vector<Vec<int, 9> >*)v)->resize(len);
break;
case 40:
((std::vector<Vec<int, 10> >*)v)->resize(len);
break;
case 44:
((std::vector<Vec<int, 11> >*)v)->resize(len);
break;
case 48:
((std::vector<Vec<int, 12> >*)v)->resize(len);
break;
case 52:
((std::vector<Vec<int, 13> >*)v)->resize(len);
break;
case 56:
((std::vector<Vec<int, 14> >*)v)->resize(len);
break;
case 60:
((std::vector<Vec<int, 15> >*)v)->resize(len);
break;
case 64:
((std::vector<Vec<int, 16> >*)v)->resize(len);
break;

View File

@ -121,7 +121,7 @@ cv::Scalar cv::mean( InputArray _src, InputArray _mask )
CV_Assert( cn <= 4 && func != 0 );
const Mat* arrays[] = {&src, &mask, 0};
uchar* ptrs[2]{};
uchar* ptrs[2] = {};
NAryMatIterator it(arrays, ptrs);
int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
int j, count = 0;
@ -786,7 +786,7 @@ void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, Input
CV_Assert( func != 0 );
const Mat* arrays[] = {&src, &mask, 0};
uchar* ptrs[2]{};
uchar* ptrs[2] = {};
NAryMatIterator it(arrays, ptrs);
int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
int j, count = 0, nz0 = 0;

View File

@ -770,7 +770,7 @@ void cv::minMaxIdx(InputArray _src, double* minVal,
CV_Assert( func != 0 );
const Mat* arrays[] = {&src, &mask, 0};
uchar* ptrs[2]{};
uchar* ptrs[2] = {};
NAryMatIterator it(arrays, ptrs);
size_t minidx = 0, maxidx = 0;

View File

@ -710,7 +710,7 @@ double cv::norm( InputArray _src, int normType, InputArray _mask )
int cellSize = normType == NORM_HAMMING ? 1 : 2;
const Mat* arrays[] = {&src, 0};
uchar* ptrs[1]{};
uchar* ptrs[1] = {};
NAryMatIterator it(arrays, ptrs);
int total = (int)it.size;
int result = 0;
@ -727,7 +727,7 @@ double cv::norm( InputArray _src, int normType, InputArray _mask )
CV_Assert( func != 0 );
const Mat* arrays[] = {&src, &mask, 0};
uchar* ptrs[2]{};
uchar* ptrs[2] = {};
union
{
double d;
@ -1168,7 +1168,7 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m
int cellSize = normType == NORM_HAMMING ? 1 : 2;
const Mat* arrays[] = {&src1, &src2, 0};
uchar* ptrs[2]{};
uchar* ptrs[2] = {};
NAryMatIterator it(arrays, ptrs);
int total = (int)it.size;
int result = 0;
@ -1185,7 +1185,7 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m
CV_Assert( func != 0 );
const Mat* arrays[] = {&src1, &src2, &mask, 0};
uchar* ptrs[3]{};
uchar* ptrs[3] = {};
union
{
double d;

View File

@ -584,6 +584,11 @@ void RNG::fill( InputOutputArray _mat, int disttype,
}
ip[j][1] = cvCeil(a);
int idiff = ip[j][0] = cvFloor(b) - ip[j][1] - 1;
if (idiff < 0)
{
idiff = 0;
ip[j][0] = 0;
}
double diff = b - a;
fast_int_mode = fast_int_mode && diff <= 4294967296. && (idiff & (idiff+1)) == 0;

View File

@ -602,7 +602,7 @@ cv::Scalar cv::sum( InputArray _src )
CV_Assert( cn <= 4 && func != 0 );
const Mat* arrays[] = {&src, 0};
uchar* ptrs[1]{};
uchar* ptrs[1] = {};
NAryMatIterator it(arrays, ptrs);
Scalar s;
int total = (int)it.size, blockSize = total, intSumBlockSize = 0;

View File

@ -1,171 +0,0 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "test_precomp.hpp"
#include "opencv2/ts/ocl_test.hpp"
#ifdef HAVE_IPP_A
#include "opencv2/core/ippasync.hpp"
using namespace cv;
using namespace std;
using namespace opencv_test;
namespace opencv_test {
namespace ocl {
PARAM_TEST_CASE(IPPAsync, MatDepth, Channels, hppAccelType)
{
int type;
int cn;
int depth;
hppAccelType accelType;
Mat matrix, result;
hppiMatrix * hppMat;
hppAccel accel;
hppiVirtualMatrix * virtMatrix;
hppStatus sts;
virtual void SetUp()
{
type = CV_MAKE_TYPE(GET_PARAM(0), GET_PARAM(1));
depth = GET_PARAM(0);
cn = GET_PARAM(1);
accelType = GET_PARAM(2);
}
void generateTestData()
{
Size matrix_Size = randomSize(2, 100);
const double upValue = 100;
matrix = randomMat(matrix_Size, type, -upValue, upValue);
}
void Near(double threshold = 0.0)
{
EXPECT_MAT_NEAR(matrix, result, threshold);
}
};
TEST_P(IPPAsync, accuracy)
{
sts = hppCreateInstance(accelType, 0, &accel);
if (sts!=HPP_STATUS_NO_ERROR) printf("hppStatus = %d\n",sts);
CV_Assert(sts==HPP_STATUS_NO_ERROR);
virtMatrix = hppiCreateVirtualMatrices(accel, 2);
for (int j = 0; j < test_loop_times; j++)
{
generateTestData();
hppMat = hpp::getHpp(matrix,accel);
hppScalar a = 3;
sts = hppiAddC(accel, hppMat, a, 0, virtMatrix[0]);
CV_Assert(sts==HPP_STATUS_NO_ERROR);
sts = hppiSubC(accel, virtMatrix[0], a, 0, virtMatrix[1]);
CV_Assert(sts==HPP_STATUS_NO_ERROR);
sts = hppWait(accel, HPP_TIME_OUT_INFINITE);
CV_Assert(sts==HPP_STATUS_NO_ERROR);
result = hpp::getMat(virtMatrix[1], accel, cn);
Near(5.0e-6);
sts = hppiFreeMatrix(hppMat);
CV_Assert(sts==HPP_STATUS_NO_ERROR);
}
sts = hppiDeleteVirtualMatrices(accel, virtMatrix);
CV_Assert(sts==HPP_STATUS_NO_ERROR);
sts = hppDeleteInstance(accel);
CV_Assert(sts==HPP_STATUS_NO_ERROR);
}
PARAM_TEST_CASE(IPPAsyncShared, Channels, hppAccelType)
{
int cn;
int type;
hppAccelType accelType;
Mat matrix, result;
hppiMatrix* hppMat;
hppAccel accel;
hppiVirtualMatrix * virtMatrix;
hppStatus sts;
virtual void SetUp()
{
cn = GET_PARAM(0);
accelType = GET_PARAM(1);
type=CV_MAKE_TYPE(CV_8U, GET_PARAM(0));
}
void generateTestData()
{
Size matrix_Size = randomSize(2, 100);
hpp32u pitch, size;
const int upValue = 100;
sts = hppQueryMatrixAllocParams(accel, (hpp32u)(matrix_Size.width*cn), (hpp32u)matrix_Size.height, HPP_DATA_TYPE_8U, &pitch, &size);
matrix = randomMat(matrix_Size, type, 0, upValue);
}
void Near(double threshold = 0.0)
{
EXPECT_MAT_NEAR(matrix, result, threshold);
}
};
TEST_P(IPPAsyncShared, accuracy)
{
sts = hppCreateInstance(accelType, 0, &accel);
if (sts!=HPP_STATUS_NO_ERROR) printf("hppStatus = %d\n",sts);
CV_Assert(sts==HPP_STATUS_NO_ERROR);
virtMatrix = hppiCreateVirtualMatrices(accel, 2);
for (int j = 0; j < test_loop_times; j++)
{
generateTestData();
hppMat = hpp::getHpp(matrix,accel);
hppScalar a = 3;
sts = hppiAddC(accel, hppMat, a, 0, virtMatrix[0]);
CV_Assert(sts==HPP_STATUS_NO_ERROR);
sts = hppiSubC(accel, virtMatrix[0], a, 0, virtMatrix[1]);
CV_Assert(sts==HPP_STATUS_NO_ERROR);
sts = hppWait(accel, HPP_TIME_OUT_INFINITE);
CV_Assert(sts==HPP_STATUS_NO_ERROR);
result = hpp::getMat(virtMatrix[1], accel, cn);
Near(0);
sts = hppiFreeMatrix(hppMat);
CV_Assert(sts==HPP_STATUS_NO_ERROR);
}
sts = hppiDeleteVirtualMatrices(accel, virtMatrix);
CV_Assert(sts==HPP_STATUS_NO_ERROR);
sts = hppDeleteInstance(accel);
CV_Assert(sts==HPP_STATUS_NO_ERROR);
}
INSTANTIATE_TEST_CASE_P(IppATest, IPPAsyncShared, Combine(Values(1, 2, 3, 4),
Values( HPP_ACCEL_TYPE_CPU, HPP_ACCEL_TYPE_GPU)));
INSTANTIATE_TEST_CASE_P(IppATest, IPPAsync, Combine(Values(CV_8U, CV_16U, CV_16S, CV_32F),
Values(1, 2, 3, 4),
Values( HPP_ACCEL_TYPE_CPU, HPP_ACCEL_TYPE_GPU)));
}
}
#endif

View File

@ -141,4 +141,14 @@ TEST(Core_LPSolver, regression_cycling){
#endif
}
TEST(Core_LPSolver, issue_12337)
{
Mat A=(cv::Mat_<double>(3,1)<<3,1,2);
Mat B=(cv::Mat_<double>(3,4)<<1,1,3,30,2,2,5,24,4,1,2,36);
EXPECT_ANY_THROW(Mat1f z_float; cv::solveLP(A, B, z_float));
EXPECT_NO_THROW(Mat1d z_double; cv::solveLP(A, B, z_double));
EXPECT_ANY_THROW(Mat1i z_int; cv::solveLP(A, B, z_int));
//need to update interface: EXPECT_ANY_THROW(Mat1b z_8u; cv::solveLP(A, B, z_8u));
}
}} // namespace

View File

@ -1872,4 +1872,63 @@ TEST(Core_Split, crash_12171)
EXPECT_EQ(2, dst2.ptr<uchar>(1)[1]);
}
struct CustomType // like cv::Keypoint
{
Point2f pt;
float size;
float angle;
float response;
int octave;
int class_id;
};
static void test_CustomType(InputArray src_, OutputArray dst_)
{
Mat src = src_.getMat();
ASSERT_EQ(sizeof(CustomType), src.elemSize());
CV_CheckTypeEQ(src.type(), CV_MAKETYPE(CV_8U, sizeof(CustomType)), "");
CustomType* kpt = NULL;
{
Mat dst = dst_.getMat();
for (size_t i = 0; i < dst.total(); i++)
{
kpt = dst.ptr<CustomType>(0) + i;
kpt->octave = (int)i;
}
}
const int N = (int)src.total();
dst_.create(1, N * 2, rawType<CustomType>());
Mat dst = dst_.getMat();
for (size_t i = N; i < dst.total(); i++)
{
kpt = dst.ptr<CustomType>(0) + i;
kpt->octave = -(int)i;
}
#if 0 // Compilation error
CustomType& kpt = dst.at<CustomType>(0, 5);
#endif
}
TEST(Core_InputArray, support_CustomType)
{
std::vector<CustomType> kp1(5);
std::vector<CustomType> kp2(3);
test_CustomType(rawIn(kp1), rawOut(kp2));
ASSERT_EQ((size_t)10, kp2.size());
for (int i = 0; i < 3; i++)
{
EXPECT_EQ(i, kp2[i].octave);
}
for (int i = 3; i < 5; i++)
{
EXPECT_EQ(0, kp2[i].octave);
}
for (int i = 5; i < 10; i++)
{
EXPECT_EQ(-i, kp2[i].octave);
}
}
}} // namespace

View File

@ -222,7 +222,7 @@ CUDA_TEST_P(ORB, Accuracy)
{
std::vector<cv::KeyPoint> keypoints;
cv::cuda::GpuMat descriptors;
orb->detectAndComputeAsync(loadMat(image), loadMat(mask), keypoints, descriptors);
orb->detectAndComputeAsync(loadMat(image), loadMat(mask), rawOut(keypoints), descriptors);
}
catch (const cv::Exception& e)
{

View File

@ -95,7 +95,7 @@ ocv_glob_module_sources(${sources_options} SOURCES ${fw_srcs})
ocv_create_module(${libs} ${INF_ENGINE_TARGET})
ocv_add_samples()
ocv_add_accuracy_tests(${INF_ENGINE_TARGET})
ocv_add_perf_tests()
ocv_add_perf_tests(${INF_ENGINE_TARGET})
ocv_option(${the_module}_PERF_CAFFE "Add performance tests of Caffe framework" OFF)
ocv_option(${the_module}_PERF_CLCAFFE "Add performance tests of clCaffe framework" OFF)

View File

@ -878,6 +878,14 @@ CV__DNN_INLINE_NS_BEGIN
CV_EXPORTS_W void shrinkCaffeModel(const String& src, const String& dst,
const std::vector<String>& layersTypes = std::vector<String>());
/** @brief Create a text representation for a binary network stored in protocol buffer format.
* @param[in] model A path to binary network.
* @param[in] output A path to output text file to be created.
*
* @note To reduce output file size, trained weights are not included.
*/
CV_EXPORTS_W void writeTextGraph(const String& model, const String& output);
/** @brief Performs non maximum suppression given boxes and corresponding scores.
* @param bboxes a set of bounding boxes to apply NMS.

View File

@ -0,0 +1,119 @@
package org.opencv.test.dnn;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.opencv.core.Core;
import org.opencv.core.Mat;
import org.opencv.core.MatOfInt;
import org.opencv.core.MatOfFloat;
import org.opencv.core.MatOfByte;
import org.opencv.core.Scalar;
import org.opencv.core.Size;
import org.opencv.dnn.DictValue;
import org.opencv.dnn.Dnn;
import org.opencv.dnn.Layer;
import org.opencv.dnn.Net;
import org.opencv.imgcodecs.Imgcodecs;
import org.opencv.imgproc.Imgproc;
import org.opencv.test.OpenCVTestCase;
/*
* regression test for #12324,
* testing various java.util.List invocations,
* which use the LIST_GET macro
*/
public class DnnListRegressionTest extends OpenCVTestCase {
private final static String ENV_OPENCV_DNN_TEST_DATA_PATH = "OPENCV_DNN_TEST_DATA_PATH";
private final static String ENV_OPENCV_TEST_DATA_PATH = "OPENCV_TEST_DATA_PATH";
String modelFileName = "";
String sourceImageFile = "";
Net net;
@Override
protected void setUp() throws Exception {
super.setUp();
String envDnnTestDataPath = System.getenv(ENV_OPENCV_DNN_TEST_DATA_PATH);
if(envDnnTestDataPath == null){
isTestCaseEnabled = false;
return;
}
File dnnTestDataPath = new File(envDnnTestDataPath);
modelFileName = new File(dnnTestDataPath, "dnn/tensorflow_inception_graph.pb").toString();
String envTestDataPath = System.getenv(ENV_OPENCV_TEST_DATA_PATH);
if(envTestDataPath == null) throw new Exception(ENV_OPENCV_TEST_DATA_PATH + " has to be defined!");
File testDataPath = new File(envTestDataPath);
File f = new File(testDataPath, "dnn/grace_hopper_227.png");
sourceImageFile = f.toString();
if(!f.exists()) throw new Exception("Test image is missing: " + sourceImageFile);
net = Dnn.readNetFromTensorflow(modelFileName);
Mat image = Imgcodecs.imread(sourceImageFile);
assertNotNull("Loading image from file failed!", image);
Mat inputBlob = Dnn.blobFromImage(image, 1.0, new Size(224, 224), new Scalar(0), true, true);
assertNotNull("Converting image to blob failed!", inputBlob);
net.setInput(inputBlob, "input");
}
public void testSetInputsNames() {
List<String> inputs = new ArrayList();
inputs.add("input");
try {
net.setInputsNames(inputs);
} catch(Exception e) {
fail("Net setInputsNames failed: " + e.getMessage());
}
}
public void testForward() {
List<Mat> outs = new ArrayList();
List<String> outNames = new ArrayList();
outNames.add("softmax2");
try {
net.forward(outs,outNames);
} catch(Exception e) {
fail("Net forward failed: " + e.getMessage());
}
}
public void testGetMemoryConsumption() {
int layerId = 1;
List<MatOfInt> netInputShapes = new ArrayList();
netInputShapes.add(new MatOfInt(1, 3, 224, 224));
long[] weights=null;
long[] blobs=null;
try {
net.getMemoryConsumption(layerId, netInputShapes, weights, blobs);
} catch(Exception e) {
fail("Net getMemoryConsumption failed: " + e.getMessage());
}
}
public void testGetFLOPS() {
int layerId = 1;
List<MatOfInt> netInputShapes = new ArrayList();
netInputShapes.add(new MatOfInt(1, 3, 224, 224));
try {
net.getFLOPS(layerId, netInputShapes);
} catch(Exception e) {
fail("Net getFLOPS failed: " + e.getMessage());
}
}
}

View File

@ -1,107 +0,0 @@
#include "../perf_precomp.hpp"
#include "opencv2/ts/ocl_perf.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#ifdef HAVE_OPENCL
namespace opencv_test { namespace ocl {
using namespace ::perf;
namespace {
enum {STRIDE_OFF = 1, STRIDE_ON = 2};
CV_ENUM(StrideSize, STRIDE_OFF, STRIDE_ON);
enum {GROUP_OFF = 1, GROUP_2 = 2};
CV_ENUM(GroupSize, GROUP_OFF, GROUP_2);
} // namespace
//Squared Size
#define SSZ(n) cv::Size(n, n)
typedef std::pair<MatShape, int> InpShapeNumOut;
typedef tuple<Size, InpShapeNumOut, GroupSize, StrideSize> ConvParam; //kernel_size, inp shape, groups, stride
typedef TestBaseWithParam<ConvParam> ConvolutionPerfTest;
static inline MatShape blobShape(int count, int nplanes, int height, int width)
{
int data[] = {count, nplanes, height, width};
return MatShape(data, data+4);
}
OCL_PERF_TEST_P( ConvolutionPerfTest, perf, Combine(
Values(Size(1, 1), Size(3, 3), Size(5, 5), Size(11, 11)),
Values(make_pair(blobShape(1, 4, 224, 224), 64),
make_pair(blobShape(1, 64, 112, 122), 128),
make_pair(blobShape(1, 256, 28, 28), 512)),
GroupSize::all(),
StrideSize::all())
)
{
RNG rng(0);
ConvParam params = GetParam();
int ksz = get<0>(params).width;
MatShape inpShape = get<1>(params).first;
int outCn = get<1>(params).second;
int groups = get<2>(params);
int stride = (ksz >= 11) ? 4 : (int)get<3>(params);
int inpCn = inpShape[1];
int wgtSize[] = { outCn, inpCn/groups, ksz, ksz };
int biasSize[] = { outCn, 1, 1, 1 };
const int wtype = CV_32F;
Mat wgtBlob(4, wgtSize, wtype), biasBlob(4, biasSize, wtype);
Mat inpBlob(4, &inpShape[0], wtype);
rng.fill(biasBlob, RNG::UNIFORM, -1, +1);
rng.fill(wgtBlob, RNG::UNIFORM, -1, +1);
rng.fill(inpBlob, RNG::UNIFORM, -1, +1);
LayerParams lp;
lp.set("num_output", outCn);
lp.set("group", groups);
lp.set("stride", stride);
lp.set("kernel_size", ksz);
lp.blobs.reserve(2);
lp.blobs.push_back(wgtBlob);
lp.blobs.push_back(biasBlob);
std::vector<Mat*> inpBlobs(1, &inpBlob);
std::vector<Mat> outBlobs, internalBlobs;
Ptr<Layer> layer = cv::dnn::LayerFactory::createLayerInstance("Convolution", lp);
std::vector<MatShape> inputShapes(1, shape(inpBlob)), outShapes, internals;
layer->getMemoryShapes(inputShapes, 0, outShapes, internals);
for (size_t i = 0; i < outShapes.size(); i++)
{
outBlobs.push_back(Mat(outShapes[i], CV_32F));
}
for (size_t i = 0; i < internals.size(); i++)
{
internalBlobs.push_back(Mat());
if (total(internals[i]))
internalBlobs.back().create(internals[i], CV_32F);
}
layer->finalize(inpBlobs, outBlobs);
layer->preferableTarget = DNN_TARGET_OPENCL;
Mat inpBlob2D = inpBlob.reshape(1, outCn);
Mat wgtBlob2D = wgtBlob.reshape(1, outCn*(inpCn/groups));
Mat outBlob2D = outBlobs[0].reshape(1, outBlobs[0].size[0]);
declare.in(inpBlob2D, wgtBlob2D, WARMUP_RNG).out(outBlob2D);
// warmup
layer->forward(inpBlobs, outBlobs, internalBlobs);
TEST_CYCLE()
{
layer->forward(inpBlobs, outBlobs, internalBlobs);
}
SANITY_CHECK_NOTHING();
}
}
}
#endif

View File

@ -1,92 +1,674 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "perf_precomp.hpp"
#include <opencv2/dnn/shape_utils.hpp>
namespace opencv_test {
enum {STRIDE_OFF = 1, STRIDE_ON = 2};
CV_ENUM(StrideSize, STRIDE_OFF, STRIDE_ON);
enum {GROUP_OFF = 1, GROUP_2 = 2};
CV_ENUM(GroupSize, GROUP_OFF, GROUP_2);
typedef std::pair<MatShape, int> InpShapeNumOut;
typedef tuple<Size, InpShapeNumOut, GroupSize, StrideSize> ConvParam; //kernel_size, inp shape, groups, stride
typedef TestBaseWithParam<ConvParam> ConvolutionPerfTest;
static inline MatShape blobShape(int count, int nplanes, int height, int width)
// Flops_Kernel_Input_OutCN_Group_Stride_Pad_Dilation_PadAdjust_PadMode_Bias
struct TestSize_ {
int width, height;
operator Size() const { return Size(width, height); }
};
struct ConvParam_t {
struct TestSize_ kernel;
struct BlobShape { int dims[4]; } shapeIn;
int outCN;
int groups;
struct TestSize_ stride;
struct TestSize_ dilation;
struct TestSize_ pad;
struct TestSize_ padAdjust;
const char* padMode;
bool hasBias;
double declared_flops;
};
// Details: #12142
static const ConvParam_t testConvolutionConfigs[] = {
/* GFLOPS 10.087 x 1 = 10.087 */ {{3, 3}, {{1, 576, 38, 50}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 10086963200.},
/* GFLOPS 1.704 x 5 = 8.518 */ {{3, 3}, {{1, 512, 19, 19}}, 512, 512, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 1703596544.},
/* GFLOPS 1.704 x 5 = 8.518 */ {{3, 3}, {{1, 512, 19, 19}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1703596544.},
/* GFLOPS 6.641 x 1 = 6.641 */ {{3, 3}, {{1, 64, 150, 200}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 6641280000.},
/* GFLOPS 1.659 x 3 = 4.977 */ {{3, 3}, {{1, 960, 10, 10}}, 960, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1658976000.},
/* GFLOPS 2.156 x 2 = 4.312 */ {{3, 3}, {{1, 576, 19, 19}}, 576, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 2156088384.},
/* GFLOPS 0.958 x 4 = 3.833 */ {{3, 3}, {{1, 384, 19, 19}}, 384, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 958307712.},
/* GFLOPS 0.830 x 4 = 3.321 */ {{3, 3}, {{1, 64, 75, 100}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 830160000.},
/* GFLOPS 1.245 x 2 = 2.490 */ {{3, 3}, {{1, 96, 75, 100}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1244880000.},
/* GFLOPS 2.100 x 1 = 2.100 */ {{3, 3}, {{1, 144, 75, 75}}, 144, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 2100330000.},
/* GFLOPS 1.022 x 2 = 2.044 */ {{3, 3}, {{1, 576, 19, 19}}, 273, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1021896057.},
/* GFLOPS 0.958 x 2 = 1.917 */ {{3, 3}, {{1, 192, 38, 38}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 958446336.},
/* GFLOPS 1.888 x 1 = 1.888 */ {{3, 3}, {{1, 1024, 10, 10}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1887539200.},
/* GFLOPS 1.888 x 1 = 1.888 */ {{3, 3}, {{1, 1024, 10, 10}}, 1024, 1024, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 1887539200.},
/* GFLOPS 1.704 x 1 = 1.704 */ {{3, 3}, {{1, 256, 38, 38}}, 256, 256, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 1703781376.},
/* GFLOPS 1.704 x 1 = 1.704 */ {{3, 3}, {{1, 256, 38, 38}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1703781376.},
/* GFLOPS 1.660 x 1 = 1.660 */ {{3, 3}, {{1, 128, 75, 75}}, 128, 128, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 1659600000.},
/* GFLOPS 1.660 x 1 = 1.660 */ {{3, 3}, {{1, 128, 75, 75}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1659600000.},
/* GFLOPS 0.280 x 5 = 1.402 */ {{1, 1}, {{1, 576, 38, 50}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 280409600.},
/* GFLOPS 0.701 x 2 = 1.401 */ {{3, 3}, {{1, 128, 38, 50}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 700720000.},
/* GFLOPS 0.231 x 6 = 1.388 */ {{3, 3}, {{1, 128, 56, 56}}, 32, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 231311360.},
/* GFLOPS 0.231 x 6 = 1.388 */ {{3, 3}, {{1, 256, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 231261184.},
/* GFLOPS 0.210 x 6 = 1.262 */ {{1, 1}, {{1, 576, 38, 50}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 210307200.},
/* GFLOPS 0.420 x 3 = 1.261 */ {{3, 3}, {{1, 96, 38, 50}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 420492800.},
/* GFLOPS 1.261 x 1 = 1.261 */ {{3, 3}, {{1, 192, 38, 50}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1261113600.},
/* GFLOPS 1.258 x 1 = 1.258 */ {{3, 3}, {{1, 1280, 10, 10}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1258038600.},
/* GFLOPS 1.245 x 1 = 1.245 */ {{3, 3}, {{1, 64, 75, 75}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1245240000.},
/* GFLOPS 0.561 x 2 = 1.121 */ {{3, 3}, {{1, 128, 38, 50}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 560576000.},
/* GFLOPS 1.051 x 1 = 1.051 */ {{3, 3}, {{1, 160, 38, 50}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1050988800.},
/* GFLOPS 1.006 x 1 = 1.006 */ {{3, 3}, {{1, 1024, 10, 10}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1006441800.},
/* GFLOPS 0.246 x 4 = 0.985 */ {{1, 1}, {{1, 256, 75, 100}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 246240000.},
/* GFLOPS 0.189 x 5 = 0.947 */ {{1, 1}, {{1, 512, 19, 19}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 189452800.},
/* GFLOPS 0.189 x 5 = 0.947 */ {{1, 1}, {{1, 512, 19, 19}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 189452800.},
/* GFLOPS 0.934 x 1 = 0.934 */ {{3, 3}, {{1, 96, 150, 150}}, 96, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 933660000.},
/* GFLOPS 0.231 x 4 = 0.925 */ {{3, 3}, {{1, 128, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 231311360.},
/* GFLOPS 0.896 x 1 = 0.896 */ {{5, 5}, {{1, 96, 27, 27}}, 256, 2, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 895981824.},
/* GFLOPS 0.876 x 1 = 0.876 */ {{3, 3}, {{1, 160, 38, 50}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 875824000.},
/* GFLOPS 0.850 x 1 = 0.850 */ {{7, 7}, {{1, 3, 600, 800}}, 24, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 849600000.},
/* GFLOPS 0.841 x 1 = 0.841 */ {{3, 3}, {{1, 128, 38, 50}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 840864000.},
/* GFLOPS 0.415 x 2 = 0.831 */ {{3, 3}, {{1, 32, 150, 150}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 415440000.},
/* GFLOPS 0.351 x 2 = 0.701 */ {{1, 1}, {{1, 576, 38, 50}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 350512000.},
/* GFLOPS 0.701 x 1 = 0.701 */ {{3, 3}, {{1, 128, 75, 100}}, 160, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 700720000.},
/* GFLOPS 0.694 x 1 = 0.694 */ {{3, 3}, {{1, 64, 56, 56}}, 192, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 694235136.},
/* GFLOPS 0.694 x 1 = 0.694 */ {{3, 3}, {{1, 64, 56, 56}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 694235136.},
/* GFLOPS 0.231 x 3 = 0.694 */ {{3, 3}, {{1, 64, 56, 56}}, 64, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 231411712.},
/* GFLOPS 0.058 x 12 = 0.694 */ {{3, 3}, {{1, 128, 28, 28}}, 32, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 57827840.},
/* GFLOPS 0.231 x 3 = 0.694 */ {{3, 3}, {{1, 512, 7, 7}}, 512, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 231236096.},
/* GFLOPS 0.160 x 4 = 0.639 */ {{3, 3}, {{1, 64, 38, 38}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 159833472.},
/* GFLOPS 0.103 x 6 = 0.618 */ {{1, 1}, {{1, 256, 14, 14}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 102961152.},
/* GFLOPS 0.615 x 1 = 0.615 */ {{1, 1}, {{1, 320, 75, 100}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 615360000.},
/* GFLOPS 0.597 x 1 = 0.597 */ {{3, 3}, {{1, 576, 19, 19}}, 576, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 597254400.},
/* GFLOPS 0.185 x 3 = 0.554 */ {{1, 1}, {{1, 192, 75, 100}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 184800000.},
/* GFLOPS 0.553 x 1 = 0.553 */ {{3, 3}, {{1, 64, 75, 100}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 553440000.},
/* GFLOPS 0.539 x 1 = 0.539 */ {{3, 3}, {{1, 144, 75, 75}}, 144, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 539178048.},
/* GFLOPS 0.103 x 5 = 0.514 */ {{1, 1}, {{1, 1024, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 102810624.},
/* GFLOPS 0.491 x 1 = 0.491 */ {{1, 1}, {{1, 576, 38, 50}}, 224, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 490716800.},
/* GFLOPS 0.240 x 2 = 0.479 */ {{3, 3}, {{1, 96, 38, 38}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 239680896.},
/* GFLOPS 0.237 x 2 = 0.474 */ {{7, 7}, {{1, 3, 224, 224}}, 64, 1, {2, 2}, {1, 1}, {3, 3}, {0, 0}, "", true, 236830720.},
/* GFLOPS 0.472 x 1 = 0.472 */ {{3, 3}, {{1, 512, 19, 19}}, 512, 512, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 471910400.},
/* GFLOPS 0.472 x 1 = 0.472 */ {{3, 3}, {{1, 512, 19, 19}}, 512, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 471910400.},
/* GFLOPS 0.449 x 1 = 0.449 */ {{3, 3}, {{1, 384, 13, 13}}, 384, 2, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 448626048.},
/* GFLOPS 0.426 x 1 = 0.426 */ {{3, 3}, {{1, 128, 75, 75}}, 128, 128, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 426037760.},
/* GFLOPS 0.426 x 1 = 0.426 */ {{3, 3}, {{1, 128, 75, 75}}, 128, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 426037760.},
/* GFLOPS 0.426 x 1 = 0.426 */ {{3, 3}, {{1, 128, 38, 38}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 426037760.},
/* GFLOPS 0.426 x 1 = 0.426 */ {{3, 3}, {{1, 256, 38, 38}}, 256, 256, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 425945344.},
/* GFLOPS 0.426 x 1 = 0.426 */ {{3, 3}, {{1, 256, 38, 38}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 425945344.},
/* GFLOPS 0.426 x 1 = 0.426 */ {{3, 3}, {{1, 256, 19, 19}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 425945344.},
/* GFLOPS 0.421 x 1 = 0.421 */ {{1, 1}, {{1, 576, 38, 50}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 420614400.},
/* GFLOPS 0.415 x 1 = 0.415 */ {{3, 3}, {{1, 32, 150, 150}}, 32, 32, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 415440000.},
/* GFLOPS 0.415 x 1 = 0.415 */ {{3, 3}, {{1, 64, 150, 150}}, 64, 64, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 415080000.},
/* GFLOPS 0.415 x 1 = 0.415 */ {{3, 3}, {{1, 64, 150, 150}}, 64, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 415080000.},
/* GFLOPS 0.104 x 4 = 0.414 */ {{1, 1}, {{1, 64, 56, 56}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 103563264.},
/* GFLOPS 0.103 x 4 = 0.413 */ {{1, 1}, {{1, 128, 28, 28}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 103161856.},
/* GFLOPS 0.376 x 1 = 0.376 */ {{1, 1}, {{1, 24, 300, 400}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 376320000.},
/* GFLOPS 0.347 x 1 = 0.347 */ {{3, 3}, {{1, 128, 28, 28}}, 192, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 346967040.},
/* GFLOPS 0.347 x 1 = 0.347 */ {{3, 3}, {{1, 128, 28, 28}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 346967040.},
/* GFLOPS 0.014 x 24 = 0.347 */ {{3, 3}, {{1, 128, 14, 14}}, 32, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 14456960.},
/* GFLOPS 0.053 x 6 = 0.320 */ {{1, 1}, {{1, 576, 19, 19}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 53277824.},
/* GFLOPS 0.319 x 1 = 0.319 */ {{3, 3}, {{1, 192, 19, 19}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 319482112.},
/* GFLOPS 0.315 x 1 = 0.315 */ {{3, 3}, {{1, 96, 75, 100}}, 96, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 315369600.},
/* GFLOPS 0.103 x 3 = 0.309 */ {{1, 1}, {{1, 512, 7, 7}}, 2048, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 102860800.},
/* GFLOPS 0.103 x 3 = 0.309 */ {{1, 1}, {{1, 512, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 102860800.},
/* GFLOPS 0.308 x 1 = 0.308 */ {{1, 1}, {{1, 320, 75, 100}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 307680000.},
/* GFLOPS 0.299 x 1 = 0.299 */ {{3, 3}, {{1, 256, 13, 13}}, 384, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 299105664.},
/* GFLOPS 0.299 x 1 = 0.299 */ {{3, 3}, {{1, 384, 13, 13}}, 256, 2, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 299084032.},
/* GFLOPS 0.017 x 17 = 0.290 */ {{1, 1}, {{1, 32, 32, 64}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 17039360.},
/* GFLOPS 0.017 x 16 = 0.269 */ {{1, 1}, {{1, 128, 32, 64}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 16842752.},
/* GFLOPS 0.133 x 2 = 0.266 */ {{3, 3}, {{1, 128, 19, 19}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 133136800.},
/* GFLOPS 0.038 x 7 = 0.265 */ {{3, 3}, {{1, 16, 64, 128}}, 16, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 37879808.},
/* GFLOPS 0.126 x 2 = 0.252 */ {{3, 3}, {{1, 512, 5, 5}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 125812050.},
/* GFLOPS 0.248 x 1 = 0.248 */ {{1, 1}, {{1, 64, 150, 200}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 247680000.},
/* GFLOPS 0.040 x 6 = 0.240 */ {{1, 1}, {{1, 576, 19, 19}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 39958368.},
/* GFLOPS 0.080 x 3 = 0.240 */ {{3, 3}, {{1, 96, 19, 19}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 79893632.},
/* GFLOPS 0.240 x 1 = 0.240 */ {{3, 3}, {{1, 192, 38, 38}}, 192, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 239611584.},
/* GFLOPS 0.240 x 1 = 0.240 */ {{3, 3}, {{1, 192, 19, 19}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 239611584.},
/* GFLOPS 0.237 x 1 = 0.237 */ {{7, 7}, {{1, 3, 224, 224}}, 64, 1, {2, 2}, {1, 1}, {3, 3}, {0, 0}, "", false, 236830720.},
/* GFLOPS 0.237 x 1 = 0.237 */ {{7, 7}, {{1, 3, 224, 224}}, 64, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 236830720.},
/* GFLOPS 0.111 x 2 = 0.221 */ {{3, 3}, {{1, 192, 10, 10}}, 320, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 110624000.},
/* GFLOPS 0.213 x 1 = 0.213 */ {{3, 3}, {{1, 128, 38, 38}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 213018880.},
/* GFLOPS 0.213 x 1 = 0.213 */ {{3, 3}, {{1, 128, 19, 19}}, 256, 1, {1, 1}, {2, 2}, {2, 2}, {0, 0}, "", false, 213018880.},
/* GFLOPS 0.107 x 2 = 0.213 */ {{3, 3}, {{1, 128, 19, 19}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 106509440.},
/* GFLOPS 0.213 x 1 = 0.213 */ {{3, 3}, {{1, 256, 19, 19}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 212972672.},
/* GFLOPS 0.212 x 1 = 0.212 */ {{7, 7}, {{1, 3, 300, 300}}, 32, 1, {2, 2}, {1, 1}, {3, 3}, {0, 0}, "", true, 212400000.},
/* GFLOPS 0.211 x 1 = 0.211 */ {{11, 11}, {{1, 3, 227, 227}}, 96, 1, {4, 4}, {1, 1}, {0, 0}, {0, 0}, "", true, 211120800.},
/* GFLOPS 0.210 x 1 = 0.210 */ {{3, 3}, {{1, 64, 38, 50}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 210307200.},
/* GFLOPS 0.210 x 1 = 0.210 */ {{1, 1}, {{1, 1024, 10, 10}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 209817600.},
/* GFLOPS 0.210 x 1 = 0.210 */ {{1, 1}, {{1, 1024, 10, 10}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 209817600.},
/* GFLOPS 0.104 x 2 = 0.208 */ {{3, 3}, {{1, 32, 75, 75}}, 32, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 103860000.},
/* GFLOPS 0.206 x 1 = 0.206 */ {{1, 1}, {{1, 256, 56, 56}}, 512, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 205922304.},
/* GFLOPS 0.206 x 1 = 0.206 */ {{1, 1}, {{1, 256, 56, 56}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 205922304.},
/* GFLOPS 0.103 x 2 = 0.206 */ {{1, 1}, {{1, 256, 56, 56}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 102961152.},
/* GFLOPS 0.206 x 1 = 0.206 */ {{1, 1}, {{1, 512, 28, 28}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 205721600.},
/* GFLOPS 0.206 x 1 = 0.206 */ {{1, 1}, {{1, 512, 28, 28}}, 1024, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 205721600.},
/* GFLOPS 0.206 x 1 = 0.206 */ {{1, 1}, {{1, 1024, 14, 14}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 205621248.},
/* GFLOPS 0.206 x 1 = 0.206 */ {{1, 1}, {{1, 1024, 14, 14}}, 2048, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 205621248.},
/* GFLOPS 0.103 x 2 = 0.206 */ {{1, 1}, {{1, 2048, 7, 7}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 102785536.},
/* GFLOPS 0.201 x 1 = 0.201 */ {{1, 1}, {{1, 512, 14, 14}}, 1000, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 200900000.},
/* GFLOPS 0.200 x 1 = 0.200 */ {{3, 3}, {{1, 160, 19, 19}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 199687872.},
/* GFLOPS 0.190 x 1 = 0.190 */ {{1, 1}, {{1, 256, 38, 38}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 189637632.},
/* GFLOPS 0.190 x 1 = 0.190 */ {{1, 1}, {{1, 256, 38, 38}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 189637632.},
/* GFLOPS 0.047 x 4 = 0.190 */ {{1, 1}, {{1, 256, 38, 38}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 47409408.},
/* GFLOPS 0.038 x 5 = 0.189 */ {{3, 3}, {{1, 32, 32, 64}}, 32, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 37814272.},
/* GFLOPS 0.185 x 1 = 0.185 */ {{1, 1}, {{1, 128, 75, 75}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 185040000.},
/* GFLOPS 0.185 x 1 = 0.185 */ {{1, 1}, {{1, 128, 75, 75}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 185040000.},
/* GFLOPS 0.181 x 1 = 0.181 */ {{3, 3}, {{1, 160, 14, 14}}, 320, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 180696320.},
/* GFLOPS 0.181 x 1 = 0.181 */ {{3, 3}, {{1, 160, 14, 14}}, 320, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 180696320.},
/* GFLOPS 0.090 x 2 = 0.181 */ {{3, 3}, {{1, 224, 10, 10}}, 224, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 90339200.},
/* GFLOPS 0.180 x 1 = 0.180 */ {{1, 1}, {{1, 224, 56, 56}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 180232192.},
/* GFLOPS 0.174 x 1 = 0.174 */ {{3, 3}, {{1, 96, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 173508608.},
/* GFLOPS 0.174 x 1 = 0.174 */ {{3, 3}, {{1, 96, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 173508608.},
/* GFLOPS 0.166 x 1 = 0.166 */ {{3, 3}, {{1, 160, 19, 19}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 166406560.},
/* GFLOPS 0.080 x 2 = 0.160 */ {{1, 1}, {{1, 576, 19, 19}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 79916736.},
/* GFLOPS 0.160 x 1 = 0.160 */ {{3, 3}, {{1, 128, 19, 19}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 159764160.},
/* GFLOPS 0.159 x 1 = 0.159 */ {{7, 7}, {{1, 3, 300, 300}}, 24, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 159300000.},
/* GFLOPS 0.155 x 1 = 0.155 */ {{1, 1}, {{1, 192, 56, 56}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 154542080.},
/* GFLOPS 0.146 x 1 = 0.146 */ {{3, 3}, {{1, 144, 14, 14}}, 288, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 146369664.},
/* GFLOPS 0.146 x 1 = 0.146 */ {{3, 3}, {{1, 144, 14, 14}}, 288, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 146369664.},
/* GFLOPS 0.072 x 2 = 0.144 */ {{1, 1}, {{1, 1024, 10, 10}}, 352, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 72124800.},
/* GFLOPS 0.140 x 1 = 0.140 */ {{1, 1}, {{1, 576, 38, 50}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 140204800.},
/* GFLOPS 0.017 x 8 = 0.138 */ {{1, 1}, {{1, 16, 64, 128}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 17301504.},
/* GFLOPS 0.067 x 2 = 0.133 */ {{1, 1}, {{1, 576, 19, 19}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 66597280.},
/* GFLOPS 0.133 x 1 = 0.133 */ {{3, 3}, {{1, 128, 38, 38}}, 160, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 133136800.},
/* GFLOPS 0.129 x 1 = 0.129 */ {{1, 1}, {{1, 160, 56, 56}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 128851968.},
/* GFLOPS 0.128 x 1 = 0.128 */ {{3, 3}, {{1, 64, 24, 24}}, 192, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 127512576.},
/* GFLOPS 0.120 x 1 = 0.120 */ {{5, 5}, {{1, 32, 28, 28}}, 96, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 120497664.},
/* GFLOPS 0.120 x 1 = 0.120 */ {{5, 5}, {{1, 32, 28, 28}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 120497664.},
/* GFLOPS 0.040 x 3 = 0.120 */ {{1, 1}, {{1, 96, 19, 19}}, 576, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 40131648.},
/* GFLOPS 0.118 x 1 = 0.118 */ {{1, 1}, {{1, 320, 38, 38}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 118477312.},
/* GFLOPS 0.017 x 7 = 0.118 */ {{1, 1}, {{1, 64, 64, 128}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 16908288.},
/* GFLOPS 0.039 x 3 = 0.118 */ {{1, 1}, {{1, 1024, 10, 10}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 39340800.},
/* GFLOPS 0.118 x 1 = 0.118 */ {{3, 3}, {{1, 256, 19, 19}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 117990400.},
/* GFLOPS 0.058 x 2 = 0.116 */ {{3, 3}, {{1, 16, 56, 56}}, 64, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 58003456.},
/* GFLOPS 0.058 x 2 = 0.116 */ {{3, 3}, {{1, 32, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 57903104.},
/* GFLOPS 0.058 x 2 = 0.116 */ {{3, 3}, {{1, 64, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 57852928.},
/* GFLOPS 0.116 x 1 = 0.116 */ {{3, 3}, {{1, 128, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 115655680.},
/* GFLOPS 0.116 x 1 = 0.116 */ {{3, 3}, {{1, 128, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 115655680.},
/* GFLOPS 0.112 x 1 = 0.112 */ {{1, 1}, {{1, 1024, 10, 10}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 111875400.},
/* GFLOPS 0.036 x 3 = 0.107 */ {{1, 1}, {{1, 192, 38, 38}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 35580160.},
/* GFLOPS 0.107 x 1 = 0.107 */ {{3, 3}, {{1, 32, 75, 75}}, 128, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 106648064.},
/* GFLOPS 0.107 x 1 = 0.107 */ {{3, 3}, {{1, 64, 38, 38}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 106555648.},
/* GFLOPS 0.105 x 1 = 0.105 */ {{1, 1}, {{1, 512, 10, 10}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 104960000.},
/* GFLOPS 0.105 x 1 = 0.105 */ {{1, 1}, {{1, 512, 10, 10}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 104960000.},
/* GFLOPS 0.103 x 1 = 0.103 */ {{1, 1}, {{1, 128, 56, 56}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 103161856.},
/* GFLOPS 0.051 x 2 = 0.103 */ {{1, 1}, {{1, 256, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 51480576.},
/* GFLOPS 0.051 x 2 = 0.103 */ {{1, 1}, {{1, 256, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 51480576.},
/* GFLOPS 0.101 x 1 = 0.101 */ {{1, 1}, {{1, 512, 19, 19}}, 273, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 101016825.},
/* GFLOPS 0.096 x 1 = 0.096 */ {{1, 1}, {{1, 480, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 96438272.},
/* GFLOPS 0.095 x 1 = 0.095 */ {{1, 1}, {{1, 128, 38, 38}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 95003648.},
/* GFLOPS 0.095 x 1 = 0.095 */ {{1, 1}, {{1, 128, 38, 38}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 95003648.},
/* GFLOPS 0.095 x 1 = 0.095 */ {{1, 1}, {{1, 256, 19, 19}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 94818816.},
/* GFLOPS 0.095 x 1 = 0.095 */ {{1, 1}, {{1, 256, 19, 19}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 94818816.},
/* GFLOPS 0.094 x 1 = 0.094 */ {{1, 1}, {{1, 32, 150, 150}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 93600000.},
/* GFLOPS 0.094 x 1 = 0.094 */ {{1, 1}, {{1, 32, 150, 150}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 93600000.},
/* GFLOPS 0.093 x 1 = 0.093 */ {{1, 1}, {{1, 512, 38, 50}}, 48, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 93480000.},
/* GFLOPS 0.093 x 1 = 0.093 */ {{1, 1}, {{1, 576, 19, 19}}, 224, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 93236192.},
/* GFLOPS 0.093 x 1 = 0.093 */ {{1, 1}, {{1, 64, 75, 75}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 92880000.},
/* GFLOPS 0.093 x 1 = 0.093 */ {{1, 1}, {{1, 64, 75, 75}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 92880000.},
/* GFLOPS 0.031 x 3 = 0.092 */ {{1, 1}, {{1, 160, 10, 10}}, 960, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 30816000.},
/* GFLOPS 0.092 x 1 = 0.092 */ {{1, 1}, {{1, 192, 75, 100}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 92400000.},
/* GFLOPS 0.090 x 1 = 0.090 */ {{1, 1}, {{1, 448, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 90015744.},
/* GFLOPS 0.045 x 2 = 0.090 */ {{3, 3}, {{1, 576, 19, 19}}, 12, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 44918508.},
/* GFLOPS 0.089 x 1 = 0.089 */ {{3, 3}, {{1, 112, 14, 14}}, 224, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 88554368.},
/* GFLOPS 0.089 x 1 = 0.089 */ {{3, 3}, {{1, 112, 14, 14}}, 224, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 88554368.},
/* GFLOPS 0.021 x 4 = 0.084 */ {{5, 1}, {{1, 32, 32, 64}}, 32, 1, {1, 1}, {1, 1}, {2, 0}, {0, 0}, "", false, 21037056.},
/* GFLOPS 0.021 x 4 = 0.084 */ {{1, 5}, {{1, 32, 32, 64}}, 32, 1, {1, 1}, {1, 1}, {0, 2}, {0, 0}, "", true, 21037056.},
/* GFLOPS 0.084 x 1 = 0.084 */ {{1, 1}, {{1, 416, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 83593216.},
/* GFLOPS 0.082 x 1 = 0.082 */ {{1, 1}, {{1, 320, 10, 10}}, 1280, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 82048000.},
/* GFLOPS 0.040 x 2 = 0.080 */ {{1, 1}, {{1, 576, 19, 19}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 39958368.},
/* GFLOPS 0.040 x 2 = 0.079 */ {{1, 1}, {{1, 24, 75, 75}}, 144, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 39690000.},
/* GFLOPS 0.040 x 2 = 0.079 */ {{3, 3}, {{1, 3, 300, 300}}, 32, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 39600000.},
/* GFLOPS 0.077 x 1 = 0.077 */ {{1, 1}, {{1, 96, 56, 56}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 77471744.},
/* GFLOPS 0.077 x 1 = 0.077 */ {{3, 3}, {{1, 192, 10, 10}}, 224, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 77436800.},
/* GFLOPS 0.077 x 1 = 0.077 */ {{1, 1}, {{1, 384, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 77170688.},
/* GFLOPS 0.038 x 2 = 0.076 */ {{3, 3}, {{1, 32, 32, 64}}, 32, 1, {1, 1}, {8, 8}, {8, 8}, {0, 0}, "", true, 37814272.},
/* GFLOPS 0.038 x 2 = 0.076 */ {{3, 3}, {{1, 32, 32, 64}}, 32, 1, {1, 1}, {4, 4}, {4, 4}, {0, 0}, "", true, 37814272.},
/* GFLOPS 0.038 x 2 = 0.076 */ {{3, 3}, {{1, 32, 32, 64}}, 32, 1, {1, 1}, {2, 2}, {2, 2}, {0, 0}, "", true, 37814272.},
/* GFLOPS 0.038 x 2 = 0.076 */ {{3, 3}, {{1, 32, 32, 64}}, 32, 1, {1, 1}, {16, 16}, {16, 16}, {0, 0}, "", true, 37814272.},
/* GFLOPS 0.018 x 4 = 0.072 */ {{1, 1}, {{1, 64, 19, 19}}, 384, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 17882496.},
/* GFLOPS 0.071 x 1 = 0.071 */ {{1, 1}, {{1, 16, 150, 150}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 71280000.},
/* GFLOPS 0.071 x 1 = 0.071 */ {{1, 1}, {{1, 352, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 70748160.},
/* GFLOPS 0.071 x 1 = 0.071 */ {{1, 1}, {{1, 24, 150, 150}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 70560000.},
/* GFLOPS 0.070 x 1 = 0.070 */ {{3, 3}, {{1, 96, 14, 14}}, 208, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 70487872.},
/* GFLOPS 0.069 x 1 = 0.069 */ {{3, 3}, {{1, 96, 14, 14}}, 204, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 69132336.},
/* GFLOPS 0.066 x 1 = 0.066 */ {{1, 1}, {{1, 1280, 10, 10}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 65561600.},
/* GFLOPS 0.033 x 2 = 0.065 */ {{3, 3}, {{1, 48, 14, 14}}, 192, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 32551680.},
/* GFLOPS 0.065 x 1 = 0.065 */ {{3, 3}, {{1, 192, 7, 7}}, 384, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 65046912.},
/* GFLOPS 0.065 x 1 = 0.065 */ {{3, 3}, {{1, 192, 7, 7}}, 384, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 65046912.},
/* GFLOPS 0.065 x 1 = 0.065 */ {{3, 3}, {{1, 160, 10, 10}}, 224, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 64534400.},
/* GFLOPS 0.064 x 1 = 0.064 */ {{1, 1}, {{1, 320, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 64325632.},
/* GFLOPS 0.032 x 2 = 0.064 */ {{3, 3}, {{1, 96, 12, 12}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 31868928.},
/* GFLOPS 0.061 x 1 = 0.061 */ {{1, 1}, {{1, 960, 10, 10}}, 320, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 61472000.},
/* GFLOPS 0.031 x 2 = 0.061 */ {{1, 1}, {{1, 960, 10, 10}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 30736000.},
/* GFLOPS 0.060 x 1 = 0.060 */ {{3, 3}, {{1, 96, 38, 38}}, 96, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 59920224.},
/* GFLOPS 0.059 x 1 = 0.059 */ {{1, 1}, {{1, 320, 38, 38}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 59238656.},
/* GFLOPS 0.059 x 1 = 0.059 */ {{3, 3}, {{1, 128, 19, 19}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 59008000.},
/* GFLOPS 0.059 x 1 = 0.059 */ {{3, 3}, {{1, 256, 10, 10}}, 512, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 58995200.},
/* GFLOPS 0.059 x 1 = 0.059 */ {{3, 3}, {{1, 256, 10, 10}}, 512, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 58995200.},
/* GFLOPS 0.059 x 1 = 0.059 */ {{3, 3}, {{1, 256, 10, 10}}, 512, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 58995200.},
/* GFLOPS 0.058 x 1 = 0.058 */ {{1, 1}, {{1, 288, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 57903104.},
/* GFLOPS 0.004 x 16 = 0.058 */ {{3, 3}, {{1, 128, 7, 7}}, 32, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 3614240.},
/* GFLOPS 0.055 x 1 = 0.055 */ {{3, 3}, {{1, 1280, 10, 10}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 55298400.},
/* GFLOPS 0.018 x 3 = 0.054 */ {{1, 1}, {{1, 32, 38, 38}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 18021120.},
/* GFLOPS 0.018 x 3 = 0.053 */ {{1, 1}, {{1, 384, 19, 19}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 17766976.},
/* GFLOPS 0.053 x 1 = 0.053 */ {{3, 3}, {{1, 128, 38, 38}}, 16, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 53254720.},
/* GFLOPS 0.053 x 1 = 0.053 */ {{1, 1}, {{1, 528, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 53036032.},
/* GFLOPS 0.053 x 1 = 0.053 */ {{1, 1}, {{1, 528, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 53036032.},
/* GFLOPS 0.052 x 1 = 0.052 */ {{1, 1}, {{1, 1024, 10, 10}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 52454400.},
/* GFLOPS 0.052 x 1 = 0.052 */ {{1, 1}, {{1, 1024, 10, 10}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 52454400.},
/* GFLOPS 0.052 x 1 = 0.052 */ {{1, 1}, {{1, 1024, 10, 10}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 52454400.},
/* GFLOPS 0.026 x 2 = 0.052 */ {{1, 1}, {{1, 1024, 10, 10}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 26227200.},
/* GFLOPS 0.052 x 1 = 0.052 */ {{1, 1}, {{1, 64, 56, 56}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 51781632.},
/* GFLOPS 0.051 x 1 = 0.051 */ {{1, 1}, {{1, 256, 56, 56}}, 128, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 51480576.},
/* GFLOPS 0.051 x 1 = 0.051 */ {{1, 1}, {{1, 256, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 51480576.},
/* GFLOPS 0.051 x 1 = 0.051 */ {{1, 1}, {{1, 512, 28, 28}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 51430400.},
/* GFLOPS 0.026 x 2 = 0.051 */ {{1, 1}, {{1, 512, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 25715200.},
/* GFLOPS 0.026 x 2 = 0.051 */ {{1, 1}, {{1, 512, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 25715200.},
/* GFLOPS 0.013 x 4 = 0.051 */ {{1, 1}, {{1, 512, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 12857600.},
/* GFLOPS 0.051 x 1 = 0.051 */ {{1, 1}, {{1, 1024, 14, 14}}, 512, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 51405312.},
/* GFLOPS 0.050 x 1 = 0.050 */ {{1, 1}, {{1, 992, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 49799680.},
/* GFLOPS 0.048 x 1 = 0.048 */ {{1, 1}, {{1, 960, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 48194048.},
/* GFLOPS 0.047 x 1 = 0.047 */ {{1, 1}, {{1, 256, 19, 19}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 47409408.},
/* GFLOPS 0.047 x 1 = 0.047 */ {{1, 1}, {{1, 512, 38, 50}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 46740000.},
/* GFLOPS 0.047 x 1 = 0.047 */ {{1, 1}, {{1, 928, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 46588416.},
/* GFLOPS 0.046 x 1 = 0.046 */ {{1, 1}, {{1, 64, 75, 75}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 46440000.},
/* GFLOPS 0.023 x 2 = 0.045 */ {{3, 3}, {{1, 256, 3, 3}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 22648626.},
/* GFLOPS 0.045 x 1 = 0.045 */ {{3, 3}, {{1, 160, 7, 7}}, 320, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 45174080.},
/* GFLOPS 0.045 x 1 = 0.045 */ {{3, 3}, {{1, 160, 7, 7}}, 320, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 45174080.},
/* GFLOPS 0.045 x 1 = 0.045 */ {{1, 1}, {{1, 224, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 45058048.},
/* GFLOPS 0.023 x 2 = 0.045 */ {{1, 1}, {{1, 512, 14, 14}}, 112, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 22500800.},
/* GFLOPS 0.045 x 1 = 0.045 */ {{1, 1}, {{1, 896, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 44982784.},
/* GFLOPS 0.045 x 1 = 0.045 */ {{3, 3}, {{1, 3, 227, 227}}, 64, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", true, 44946880.},
/* GFLOPS 0.044 x 1 = 0.044 */ {{3, 3}, {{1, 128, 19, 19}}, 192, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 44256000.},
/* GFLOPS 0.044 x 1 = 0.044 */ {{3, 3}, {{1, 1024, 10, 10}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 44239200.},
/* GFLOPS 0.043 x 1 = 0.043 */ {{7, 7}, {{1, 3, 96, 96}}, 64, 1, {2, 2}, {1, 1}, {3, 3}, {0, 0}, "", true, 43499520.},
/* GFLOPS 0.043 x 1 = 0.043 */ {{1, 1}, {{1, 864, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 43377152.},
/* GFLOPS 0.042 x 1 = 0.042 */ {{1, 1}, {{1, 832, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 41771520.},
/* GFLOPS 0.040 x 1 = 0.040 */ {{5, 5}, {{1, 32, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 40165888.},
/* GFLOPS 0.040 x 1 = 0.040 */ {{5, 5}, {{1, 32, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 40165888.},
/* GFLOPS 0.040 x 1 = 0.040 */ {{1, 1}, {{1, 800, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 40165888.},
/* GFLOPS 0.040 x 1 = 0.040 */ {{3, 3}, {{1, 64, 19, 19}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 39958368.},
/* GFLOPS 0.040 x 1 = 0.040 */ {{3, 3}, {{1, 256, 19, 19}}, 24, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 39932376.},
/* GFLOPS 0.040 x 1 = 0.040 */ {{3, 3}, {{1, 3, 300, 300}}, 32, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 39600000.},
/* GFLOPS 0.039 x 1 = 0.039 */ {{1, 1}, {{1, 144, 75, 75}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 39015000.},
/* GFLOPS 0.039 x 1 = 0.039 */ {{1, 1}, {{1, 192, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 38635520.},
/* GFLOPS 0.039 x 1 = 0.039 */ {{1, 1}, {{1, 768, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 38560256.},
/* GFLOPS 0.037 x 1 = 0.037 */ {{1, 1}, {{1, 736, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 36954624.},
/* GFLOPS 0.036 x 1 = 0.036 */ {{1, 1}, {{1, 480, 14, 14}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 36164352.},
/* GFLOPS 0.036 x 1 = 0.036 */ {{1, 1}, {{1, 480, 14, 14}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 36164352.},
/* GFLOPS 0.018 x 2 = 0.036 */ {{1, 1}, {{1, 192, 38, 38}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 17790080.},
/* GFLOPS 0.035 x 1 = 0.035 */ {{1, 1}, {{1, 704, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 35348992.},
/* GFLOPS 0.034 x 1 = 0.034 */ {{1, 1}, {{1, 672, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 33743360.},
/* GFLOPS 0.034 x 1 = 0.034 */ {{1, 1}, {{1, 128, 32, 64}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 33685504.},
/* GFLOPS 0.034 x 1 = 0.034 */ {{2, 2}, {{1, 64, 64, 128}}, 32, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 33619968.},
/* GFLOPS 0.033 x 1 = 0.033 */ {{1, 1}, {{1, 528, 14, 14}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 33147520.},
/* GFLOPS 0.033 x 1 = 0.033 */ {{1, 1}, {{1, 528, 14, 14}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 33147520.},
/* GFLOPS 0.033 x 1 = 0.033 */ {{1, 1}, {{1, 1024, 10, 10}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 32784000.},
/* GFLOPS 0.032 x 1 = 0.032 */ {{1, 1}, {{1, 160, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 32212992.},
/* GFLOPS 0.032 x 1 = 0.032 */ {{1, 1}, {{1, 512, 14, 14}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 32144000.},
/* GFLOPS 0.032 x 1 = 0.032 */ {{1, 1}, {{1, 640, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 32137728.},
/* GFLOPS 0.032 x 1 = 0.032 */ {{1, 1}, {{1, 508, 14, 14}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 31893120.},
/* GFLOPS 0.031 x 1 = 0.031 */ {{1, 1}, {{1, 832, 7, 7}}, 384, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 31328640.},
/* GFLOPS 0.031 x 1 = 0.031 */ {{1, 1}, {{1, 832, 7, 7}}, 384, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 31328640.},
/* GFLOPS 0.031 x 1 = 0.031 */ {{1, 1}, {{1, 608, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 30532096.},
/* GFLOPS 0.015 x 2 = 0.030 */ {{5, 5}, {{1, 24, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 15065344.},
/* GFLOPS 0.015 x 2 = 0.030 */ {{5, 5}, {{1, 24, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 15065344.},
/* GFLOPS 0.015 x 2 = 0.030 */ {{5, 5}, {{1, 48, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 15059072.},
/* GFLOPS 0.029 x 1 = 0.029 */ {{3, 3}, {{1, 256, 10, 10}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 29497600.},
/* GFLOPS 0.029 x 1 = 0.029 */ {{1, 1}, {{1, 192, 28, 28}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 28976640.},
/* GFLOPS 0.029 x 1 = 0.029 */ {{1, 1}, {{1, 192, 28, 28}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 28976640.},
/* GFLOPS 0.029 x 1 = 0.029 */ {{1, 1}, {{1, 512, 14, 14}}, 144, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 28929600.},
/* GFLOPS 0.029 x 1 = 0.029 */ {{1, 1}, {{1, 512, 14, 14}}, 144, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 28929600.},
/* GFLOPS 0.029 x 1 = 0.029 */ {{1, 1}, {{1, 576, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 28926464.},
/* GFLOPS 0.027 x 1 = 0.027 */ {{1, 1}, {{1, 544, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 27320832.},
/* GFLOPS 0.027 x 1 = 0.027 */ {{1, 1}, {{1, 384, 19, 19}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 26650464.},
/* GFLOPS 0.027 x 1 = 0.027 */ {{1, 1}, {{1, 576, 19, 19}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 26638912.},
/* GFLOPS 0.027 x 1 = 0.027 */ {{3, 3}, {{1, 128, 38, 38}}, 8, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 26627360.},
/* GFLOPS 0.027 x 1 = 0.027 */ {{1, 1}, {{1, 528, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 26518016.},
/* GFLOPS 0.027 x 1 = 0.027 */ {{1, 1}, {{1, 528, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 26518016.},
/* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 96, 75, 75}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 26055000.},
/* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 64, 56, 56}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 25890816.},
/* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 64, 56, 56}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 25890816.},
/* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 64, 56, 56}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 25890816.},
/* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 1024, 10, 10}}, 126, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 25817400.},
/* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 128, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 25790464.},
/* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 256, 28, 28}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 25740288.},
/* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 256, 28, 28}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 25740288.},
/* GFLOPS 0.013 x 2 = 0.026 */ {{1, 1}, {{1, 256, 28, 28}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 12870144.},
/* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 512, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 25715200.},
/* GFLOPS 0.013 x 2 = 0.026 */ {{1, 1}, {{1, 512, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 12857600.},
/* GFLOPS 0.024 x 1 = 0.024 */ {{1, 1}, {{1, 480, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 24109568.},
/* GFLOPS 0.024 x 1 = 0.024 */ {{1, 1}, {{1, 128, 38, 38}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 23750912.},
/* GFLOPS 0.024 x 1 = 0.024 */ {{1, 1}, {{1, 256, 19, 19}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 23704704.},
/* GFLOPS 0.023 x 1 = 0.023 */ {{3, 3}, {{1, 3, 256, 512}}, 13, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 23429120.},
/* GFLOPS 0.023 x 1 = 0.023 */ {{1, 1}, {{1, 32, 150, 150}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 23400000.},
/* GFLOPS 0.023 x 1 = 0.023 */ {{1, 1}, {{1, 512, 19, 19}}, 63, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 23311575.},
/* GFLOPS 0.023 x 1 = 0.023 */ {{1, 1}, {{1, 448, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 22503936.},
/* GFLOPS 0.023 x 1 = 0.023 */ {{1, 1}, {{1, 512, 14, 14}}, 112, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 22500800.},
/* GFLOPS 0.022 x 1 = 0.022 */ {{1, 1}, {{1, 508, 14, 14}}, 112, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 22325184.},
/* GFLOPS 0.021 x 1 = 0.021 */ {{3, 3}, {{1, 128, 12, 12}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 21242880.},
/* GFLOPS 0.021 x 1 = 0.021 */ {{1, 1}, {{1, 416, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 20898304.},
/* GFLOPS 0.021 x 1 = 0.021 */ {{1, 1}, {{1, 832, 7, 7}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 20885760.},
/* GFLOPS 0.021 x 1 = 0.021 */ {{1, 1}, {{1, 832, 7, 7}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 20885760.},
/* GFLOPS 0.010 x 2 = 0.021 */ {{1, 1}, {{1, 832, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 10442880.},
/* GFLOPS 0.010 x 2 = 0.021 */ {{1, 1}, {{1, 832, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 10442880.},
/* GFLOPS 0.010 x 2 = 0.020 */ {{3, 3}, {{1, 256, 2, 2}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 10066056.},
/* GFLOPS 0.020 x 1 = 0.020 */ {{5, 5}, {{1, 16, 28, 28}}, 32, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 20095488.},
/* GFLOPS 0.020 x 1 = 0.020 */ {{5, 5}, {{1, 16, 28, 28}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 20095488.},
/* GFLOPS 0.020 x 1 = 0.020 */ {{5, 5}, {{1, 32, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 20082944.},
/* GFLOPS 0.020 x 1 = 0.020 */ {{5, 5}, {{1, 32, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 20082944.},
/* GFLOPS 0.020 x 1 = 0.020 */ {{3, 3}, {{1, 256, 19, 19}}, 12, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 19966188.},
/* GFLOPS 0.019 x 1 = 0.019 */ {{1, 1}, {{1, 192, 28, 28}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 19317760.},
/* GFLOPS 0.019 x 1 = 0.019 */ {{1, 1}, {{1, 192, 28, 28}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 19317760.},
/* GFLOPS 0.019 x 1 = 0.019 */ {{1, 1}, {{1, 384, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 19292672.},
/* GFLOPS 0.018 x 1 = 0.018 */ {{1, 1}, {{1, 576, 10, 10}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 18448000.},
/* GFLOPS 0.018 x 1 = 0.018 */ {{1, 1}, {{1, 480, 14, 14}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 18082176.},
/* GFLOPS 0.018 x 1 = 0.018 */ {{1, 1}, {{1, 480, 14, 14}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 18082176.},
/* GFLOPS 0.018 x 1 = 0.018 */ {{1, 1}, {{1, 192, 38, 38}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 17790080.},
/* GFLOPS 0.018 x 1 = 0.018 */ {{1, 1}, {{1, 352, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 17687040.},
/* GFLOPS 0.017 x 1 = 0.017 */ {{2, 2}, {{1, 16, 128, 256}}, 16, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 16908288.},
/* GFLOPS 0.016 x 1 = 0.016 */ {{1, 1}, {{1, 320, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 16081408.},
/* GFLOPS 0.016 x 1 = 0.016 */ {{1, 1}, {{1, 832, 7, 7}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 15664320.},
/* GFLOPS 0.016 x 1 = 0.016 */ {{1, 1}, {{1, 832, 7, 7}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 15664320.},
/* GFLOPS 0.015 x 1 = 0.015 */ {{5, 5}, {{1, 48, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 15059072.},
/* GFLOPS 0.015 x 1 = 0.015 */ {{5, 5}, {{1, 32, 12, 12}}, 64, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 14754816.},
/* GFLOPS 0.014 x 1 = 0.014 */ {{1, 1}, {{1, 288, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 14475776.},
/* GFLOPS 0.014 x 1 = 0.014 */ {{1, 1}, {{1, 512, 5, 5}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 13991250.},
/* GFLOPS 0.013 x 1 = 0.013 */ {{1, 1}, {{1, 144, 38, 38}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 13354112.},
/* GFLOPS 0.007 x 2 = 0.013 */ {{1, 1}, {{1, 16, 56, 56}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6623232.},
/* GFLOPS 0.013 x 1 = 0.013 */ {{1, 1}, {{1, 832, 7, 7}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 13053600.},
/* GFLOPS 0.013 x 1 = 0.013 */ {{1, 1}, {{1, 832, 7, 7}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 13053600.},
/* GFLOPS 0.007 x 2 = 0.013 */ {{1, 1}, {{1, 32, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6522880.},
/* GFLOPS 0.006 x 2 = 0.013 */ {{1, 1}, {{1, 64, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6472704.},
/* GFLOPS 0.013 x 1 = 0.013 */ {{1, 1}, {{1, 128, 56, 56}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 12895232.},
/* GFLOPS 0.013 x 1 = 0.013 */ {{1, 1}, {{1, 256, 28, 28}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 12870144.},
/* GFLOPS 0.013 x 1 = 0.013 */ {{1, 1}, {{1, 256, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 12870144.},
/* GFLOPS 0.013 x 1 = 0.013 */ {{1, 1}, {{1, 508, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 12757248.},
/* GFLOPS 0.012 x 1 = 0.012 */ {{1, 1}, {{1, 992, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 12449920.},
/* GFLOPS 0.012 x 1 = 0.012 */ {{1, 1}, {{1, 480, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 12054784.},
/* GFLOPS 0.012 x 1 = 0.012 */ {{1, 1}, {{1, 480, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 12054784.},
/* GFLOPS 0.012 x 1 = 0.012 */ {{1, 1}, {{1, 960, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 12048512.},
/* GFLOPS 0.012 x 1 = 0.012 */ {{1, 1}, {{1, 32, 75, 75}}, 128, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 12014080.},
/* GFLOPS 0.012 x 1 = 0.012 */ {{3, 3}, {{1, 96, 6, 6}}, 192, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 11950848.},
/* GFLOPS 0.006 x 2 = 0.012 */ {{3, 3}, {{1, 96, 3, 3}}, 384, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 5975424.},
/* GFLOPS 0.012 x 1 = 0.012 */ {{1, 1}, {{1, 320, 12, 12}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 11814912.},
/* GFLOPS 0.012 x 1 = 0.012 */ {{1, 1}, {{1, 640, 6, 6}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 11805696.},
/* GFLOPS 0.012 x 1 = 0.012 */ {{1, 1}, {{1, 928, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 11647104.},
/* GFLOPS 0.011 x 1 = 0.011 */ {{1, 1}, {{1, 896, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 11245696.},
/* GFLOPS 0.011 x 1 = 0.011 */ {{3, 3}, {{1, 256, 10, 10}}, 24, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 11061600.},
/* GFLOPS 0.006 x 2 = 0.011 */ {{3, 3}, {{1, 512, 5, 5}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 5530200.},
/* GFLOPS 0.011 x 1 = 0.011 */ {{1, 1}, {{1, 864, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 10844288.},
/* GFLOPS 0.010 x 1 = 0.010 */ {{1, 1}, {{1, 832, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 10442880.},
/* GFLOPS 0.010 x 1 = 0.010 */ {{5, 5}, {{1, 32, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 10041472.},
/* GFLOPS 0.010 x 1 = 0.010 */ {{1, 1}, {{1, 800, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 10041472.},
/* GFLOPS 0.010 x 1 = 0.010 */ {{1, 1}, {{1, 192, 28, 28}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 9658880.},
/* GFLOPS 0.010 x 1 = 0.010 */ {{1, 1}, {{1, 192, 28, 28}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 9658880.},
/* GFLOPS 0.010 x 1 = 0.010 */ {{1, 1}, {{1, 384, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 9646336.},
/* GFLOPS 0.005 x 2 = 0.010 */ {{1, 1}, {{1, 512, 14, 14}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4821600.},
/* GFLOPS 0.010 x 1 = 0.010 */ {{1, 1}, {{1, 768, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 9640064.},
/* GFLOPS 0.010 x 1 = 0.010 */ {{3, 3}, {{1, 4, 128, 256}}, 4, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 9568256.},
/* GFLOPS 0.005 x 2 = 0.009 */ {{1, 1}, {{1, 4, 128, 256}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 4718592.},
/* GFLOPS 0.009 x 1 = 0.009 */ {{1, 1}, {{1, 736, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 9238656.},
/* GFLOPS 0.009 x 1 = 0.009 */ {{1, 1}, {{1, 192, 19, 19}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 8895040.},
/* GFLOPS 0.009 x 1 = 0.009 */ {{1, 1}, {{1, 704, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 8837248.},
/* GFLOPS 0.008 x 1 = 0.008 */ {{1, 1}, {{1, 672, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 8435840.},
/* GFLOPS 0.008 x 1 = 0.008 */ {{1, 1}, {{1, 128, 32, 64}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 8421376.},
/* GFLOPS 0.008 x 1 = 0.008 */ {{1, 1}, {{1, 640, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 8034432.},
/* GFLOPS 0.004 x 2 = 0.008 */ {{1, 1}, {{1, 832, 7, 7}}, 48, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 3916080.},
/* GFLOPS 0.008 x 1 = 0.008 */ {{1, 1}, {{1, 608, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 7633024.},
/* GFLOPS 0.008 x 1 = 0.008 */ {{5, 5}, {{1, 16, 14, 14}}, 48, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 7535808.},
/* GFLOPS 0.008 x 1 = 0.008 */ {{5, 5}, {{1, 16, 14, 14}}, 48, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 7535808.},
/* GFLOPS 0.004 x 2 = 0.007 */ {{3, 3}, {{1, 64, 5, 5}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 3689600.},
/* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 640, 6, 6}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 7378560.},
/* GFLOPS 0.004 x 2 = 0.007 */ {{1, 1}, {{1, 48, 14, 14}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 3650304.},
/* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 384, 14, 14}}, 48, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 7234752.},
/* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 576, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 7231616.},
/* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 256, 12, 12}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 7091712.},
/* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 544, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 6830208.},
/* GFLOPS 0.007 x 1 = 0.007 */ {{3, 3}, {{1, 160, 6, 6}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 6637824.},
/* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 528, 14, 14}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6629504.},
/* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 528, 14, 14}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 6629504.},
/* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 256, 5, 5}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 6566400.},
/* GFLOPS 0.003 x 2 = 0.007 */ {{1, 1}, {{1, 512, 5, 5}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 3280000.},
/* GFLOPS 0.006 x 1 = 0.006 */ {{1, 1}, {{1, 64, 56, 56}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6472704.},
/* GFLOPS 0.006 x 1 = 0.006 */ {{1, 1}, {{1, 128, 28, 28}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6447616.},
/* GFLOPS 0.006 x 1 = 0.006 */ {{1, 1}, {{1, 512, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 6428800.},
/* GFLOPS 0.006 x 1 = 0.006 */ {{1, 1}, {{1, 512, 14, 14}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6428800.},
/* GFLOPS 0.006 x 1 = 0.006 */ {{1, 1}, {{1, 512, 14, 14}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 6428800.},
/* GFLOPS 0.006 x 1 = 0.006 */ {{3, 3}, {{1, 256, 10, 10}}, 12, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 5530800.},
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 192, 12, 12}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 5322240.},
/* GFLOPS 0.005 x 1 = 0.005 */ {{3, 3}, {{1, 128, 5, 5}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 5310720.},
/* GFLOPS 0.005 x 1 = 0.005 */ {{3, 3}, {{1, 128, 5, 5}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 5310720.},
/* GFLOPS 0.005 x 1 = 0.005 */ {{3, 3}, {{1, 128, 5, 5}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 5310720.},
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 1024, 10, 10}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4917600.},
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 1024, 10, 10}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 4917600.},
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 192, 28, 28}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4829440.},
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 192, 28, 28}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 4829440.},
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 256, 14, 14}}, 48, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4826304.},
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 512, 14, 14}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 4821600.},
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 508, 14, 14}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 4783968.},
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 64, 24, 24}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4755456.},
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 256, 12, 12}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4727808.},
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 1024, 3, 3}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4720896.},
/* GFLOPS 0.004 x 1 = 0.004 */ {{1, 1}, {{1, 512, 19, 19}}, 12, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4440300.},
/* GFLOPS 0.004 x 1 = 0.004 */ {{1, 1}, {{1, 512, 19, 19}}, 12, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 4440300.},
/* GFLOPS 0.004 x 1 = 0.004 */ {{1, 1}, {{1, 640, 6, 6}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4427136.},
/* GFLOPS 0.004 x 1 = 0.004 */ {{1, 1}, {{1, 16, 128, 256}}, 4, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 4325376.},
/* GFLOPS 0.004 x 1 = 0.004 */ {{1, 1}, {{1, 64, 64, 128}}, 4, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 4227072.},
/* GFLOPS 0.004 x 1 = 0.004 */ {{1, 1}, {{1, 832, 7, 7}}, 48, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 3916080.},
/* GFLOPS 0.004 x 1 = 0.004 */ {{5, 5}, {{1, 16, 12, 12}}, 32, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 3691008.},
/* GFLOPS 0.004 x 1 = 0.004 */ {{3, 3}, {{1, 64, 10, 10}}, 128, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 3689600.},
/* GFLOPS 0.004 x 1 = 0.004 */ {{5, 5}, {{1, 32, 6, 6}}, 64, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 3688704.},
/* GFLOPS 0.004 x 1 = 0.004 */ {{5, 5}, {{1, 32, 12, 12}}, 64, 1, {2, 2}, {1, 1}, {2, 2}, {0, 0}, "", true, 3688704.},
/* GFLOPS 0.004 x 1 = 0.004 */ {{5, 5}, {{1, 64, 6, 6}}, 128, 1, {2, 2}, {1, 1}, {2, 2}, {0, 0}, "", true, 3687552.},
/* GFLOPS 0.004 x 1 = 0.004 */ {{1, 1}, {{1, 192, 12, 12}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 3548160.},
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 736, 3, 3}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 3393792.},
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 256, 10, 10}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 3283200.},
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 512, 5, 5}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 3280000.},
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 512, 5, 5}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 3280000.},
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 512, 5, 5}}, 126, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 3228750.},
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 480, 14, 14}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 3013696.},
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 480, 14, 14}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 3013696.},
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 320, 12, 12}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 2953728.},
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 640, 6, 6}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 2951424.},
/* GFLOPS 0.003 x 1 = 0.003 */ {{3, 3}, {{1, 128, 5, 5}}, 128, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 2655360.},
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 832, 7, 7}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 2610720.},
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 256, 3, 3}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 2520882.},
/* GFLOPS 0.001 x 2 = 0.003 */ {{3, 3}, {{1, 128, 1, 1}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1258530.},
/* GFLOPS 0.002 x 1 = 0.002 */ {{1, 1}, {{1, 256, 12, 12}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 2363904.},
/* GFLOPS 0.002 x 1 = 0.002 */ {{3, 3}, {{1, 128, 3, 3}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 2360320.},
/* GFLOPS 0.002 x 1 = 0.002 */ {{3, 3}, {{1, 128, 3, 3}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 2360320.},
/* GFLOPS 0.002 x 1 = 0.002 */ {{3, 3}, {{1, 128, 3, 3}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 2360320.},
/* GFLOPS 0.002 x 1 = 0.002 */ {{1, 1}, {{1, 528, 4, 4}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 2164736.},
/* GFLOPS 0.002 x 1 = 0.002 */ {{1, 1}, {{1, 508, 4, 4}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 2082816.},
/* GFLOPS 0.002 x 1 = 0.002 */ {{1, 1}, {{1, 1024, 1, 1}}, 1000, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 2049000.},
/* GFLOPS 0.001 x 2 = 0.002 */ {{3, 3}, {{1, 256, 3, 3}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 995544.},
/* GFLOPS 0.001 x 2 = 0.002 */ {{3, 3}, {{1, 128, 5, 5}}, 16, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 922000.},
/* GFLOPS 0.002 x 1 = 0.002 */ {{1, 1}, {{1, 1024, 3, 3}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1770336.},
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 640, 6, 6}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1475712.},
/* GFLOPS 0.001 x 1 = 0.001 */ {{3, 3}, {{1, 128, 5, 5}}, 24, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 1383000.},
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 736, 3, 3}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1272672.},
/* GFLOPS 0.001 x 2 = 0.001 */ {{1, 1}, {{1, 256, 3, 3}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 590976.},
/* GFLOPS 0.001 x 1 = 0.001 */ {{3, 3}, {{1, 128, 3, 3}}, 128, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1180160.},
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 256, 2, 2}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1120392.},
/* GFLOPS 0.000 x 2 = 0.001 */ {{3, 3}, {{1, 128, 5, 5}}, 8, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 461000.},
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 192, 12, 12}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 887040.},
/* GFLOPS 0.000 x 2 = 0.001 */ {{3, 3}, {{1, 256, 2, 2}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 442464.},
/* GFLOPS 0.000 x 2 = 0.001 */ {{1, 1}, {{1, 128, 5, 5}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 411200.},
/* GFLOPS 0.001 x 1 = 0.001 */ {{3, 3}, {{1, 128, 5, 5}}, 12, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 691500.},
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 640, 2, 2}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 655872.},
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 512, 5, 5}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 615000.},
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 512, 5, 5}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 615000.},
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 128, 3, 3}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 592128.},
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 256, 3, 3}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 590976.},
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 256, 3, 3}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 590976.},
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 256, 3, 3}}, 126, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 581742.},
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 256, 4, 4}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 525312.},
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 192, 5, 5}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 308000.},
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 128, 2, 2}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 263168.},
/* GFLOPS 0.000 x 2 = 0.000 */ {{1, 1}, {{1, 256, 2, 2}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 131328.},
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 256, 2, 2}}, 126, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 258552.},
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 1024, 1, 1}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 196704.},
/* GFLOPS 0.000 x 1 = 0.000 */ {{3, 3}, {{1, 64, 2, 2}}, 128, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 147584.},
/* GFLOPS 0.000 x 1 = 0.000 */ {{3, 3}, {{1, 64, 2, 2}}, 128, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 147584.},
/* GFLOPS 0.000 x 1 = 0.000 */ {{3, 3}, {{1, 64, 2, 2}}, 128, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 147584.},
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 736, 1, 1}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 141408.},
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 128, 1, 1}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 140322.},
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 256, 2, 2}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 131328.},
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 256, 2, 2}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 131328.},
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 256, 3, 3}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 110808.},
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 256, 3, 3}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 110808.},
/* GFLOPS 0.000 x 2 = 0.000 */ {{3, 3}, {{1, 128, 1, 1}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 55320.},
/* GFLOPS 0.000 x 1 = 0.000 */ {{3, 3}, {{1, 64, 2, 2}}, 64, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 73792.},
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 256, 2, 2}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 49248.},
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 256, 2, 2}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 49248.},
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 128, 1, 1}}, 126, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 32382.},
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 64, 1, 1}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 16512.},
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 128, 1, 1}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6168.},
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 128, 1, 1}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 6168.}
};
struct ConvParamID
{
int data[] = {count, nplanes, height, width};
return MatShape(data, data+4);
enum {
CONV_0 = 0,
CONV_100 = 100,
CONV_LAST = sizeof(testConvolutionConfigs) / sizeof(testConvolutionConfigs[0])
};
int val_; \
ConvParamID(int val = 0) : val_(val) {}
operator int() const { return val_; }
static ::testing::internal::ParamGenerator<ConvParamID> all()
{
#if 0
enum { NUM = (int)CONV_LAST };
#else
enum { NUM = (int)CONV_100 };
#endif
ConvParamID v_[NUM]; for (int i = 0; i < NUM; ++i) { v_[i] = ConvParamID(i); } // reduce generated code size
return ::testing::ValuesIn(v_, v_ + NUM);
}
}; \
static inline void PrintTo(const ConvParamID& v, std::ostream* os)
{
CV_Assert((int)v >= 0); CV_Assert((int)v < ConvParamID::CONV_LAST);
const ConvParam_t& p = testConvolutionConfigs[(int)v];
*os << "GFLOPS=" << cv::format("%.3f", p.declared_flops * 1e-9)
<< ", K=" << (Size)p.kernel
<< ", IN={" << p.shapeIn.dims[0] << ", " << p.shapeIn.dims[1] << ", " << p.shapeIn.dims[2] << ", " << p.shapeIn.dims[3] << "}"
<< ", OCN=" << p.outCN;
if (p.groups > 1)
*os << ", G=" << p.groups;
if (((Size)p.stride).area() != 1)
*os << ", S=" << ((Size)p.stride);
if (((Size)p.dilation).area() != 1)
*os << ", D=" << ((Size)p.dilation);
if (((Size)p.pad).area() != 0)
*os << ", P=" << ((Size)p.pad);
if (((Size)p.padAdjust).area() != 0)
*os << ", PAdj=" << ((Size)p.padAdjust);
if (!((std::string)p.padMode).empty())
*os << ", PM=" << ((std::string)p.padMode);
if (p.hasBias)
*os << ", BIAS";
}
PERF_TEST_P( ConvolutionPerfTest, perf, Combine(
Values(Size(1, 1), Size(3, 3), Size(5, 5), Size(11, 11)),
Values(make_pair(blobShape(1, 4, 224, 224), 64),
make_pair(blobShape(1, 64, 112, 122), 128),
make_pair(blobShape(1, 256, 28, 28), 512)),
GroupSize::all(),
StrideSize::all())
)
typedef tuple<ConvParamID, tuple<Backend, Target> > ConvTestParam_t;
typedef TestBaseWithParam<ConvTestParam_t> Conv;
PERF_TEST_P_(Conv, conv)
{
RNG rng(0);
int test_id = (int)get<0>(GetParam());
ASSERT_GE(test_id, 0); ASSERT_LT(test_id, ConvParamID::CONV_LAST);
const ConvParam_t& params = testConvolutionConfigs[test_id];
double declared_flops = params.declared_flops;
Size kernel = params.kernel;
MatShape inputShape = MatShape(params.shapeIn.dims, params.shapeIn.dims + 4);
int outChannels = params.outCN;
int groups = params.groups;
Size stride = params.stride;
Size dilation = params.dilation;
Size pad = params.pad;
Size padAdjust = params.padAdjust;
std::string padMode(params.padMode);
bool hasBias = params.hasBias;
Backend backendId = get<0>(get<1>(GetParam()));
Target targetId = get<1>(get<1>(GetParam()));
ConvParam params = GetParam();
int ksz = get<0>(params).width;
MatShape inpShape = get<1>(params).first;
int outCn = get<1>(params).second;
int groups = get<2>(params);
int stride = (ksz >= 11) ? 4 : (int)get<3>(params);
int inChannels = inputShape[1];
Size inSize(inputShape[3], inputShape[2]);
int inpCn = inpShape[1];
int wgtSize[] = { outCn, inpCn/groups, ksz, ksz };
int biasSize[] = { outCn, 1, 1, 1 };
const int wtype = CV_32F;
Mat wgtBlob(4, wgtSize, wtype), biasBlob(4, biasSize, wtype);
Mat inpBlob(4, &inpShape[0], wtype);
rng.fill(biasBlob, RNG::UNIFORM, -1, +1);
rng.fill(wgtBlob, RNG::UNIFORM, -1, +1);
rng.fill(inpBlob, RNG::UNIFORM, -1, +1);
int sz[] = {outChannels, inChannels / groups, kernel.height, kernel.width};
Mat weights(4, &sz[0], CV_32F);
randu(weights, -1.0f, 1.0f);
LayerParams lp;
lp.set("num_output", outCn);
lp.set("kernel_w", kernel.width);
lp.set("kernel_h", kernel.height);
lp.set("pad_w", pad.width);
lp.set("pad_h", pad.height);
if (padAdjust.width > 0 || padAdjust.height > 0)
{
lp.set("adj_w", padAdjust.width);
lp.set("adj_h", padAdjust.height);
}
if (!padMode.empty())
lp.set("pad_mode", padMode);
lp.set("stride_w", stride.width);
lp.set("stride_h", stride.height);
lp.set("dilation_w", dilation.width);
lp.set("dilation_h", dilation.height);
lp.set("num_output", outChannels);
lp.set("group", groups);
lp.set("stride", stride);
lp.set("kernel_size", ksz);
lp.blobs.reserve(2);
lp.blobs.push_back(wgtBlob);
lp.blobs.push_back(biasBlob);
std::vector<Mat*> inpBlobs(1, &inpBlob);
std::vector<Mat> outBlobs, internalBlobs;
Ptr<Layer> layer = cv::dnn::LayerFactory::createLayerInstance("Convolution", lp);
std::vector<MatShape> inputShapes(1, shape(inpBlob)), outShapes, internals;
layer->getMemoryShapes(inputShapes, 0, outShapes, internals);
for (size_t i = 0; i < outShapes.size(); i++)
lp.set("bias_term", hasBias);
lp.type = "Convolution";
lp.name = "testLayer";
lp.blobs.push_back(weights);
if (hasBias)
{
outBlobs.push_back(Mat(outShapes[i], CV_32F));
Mat bias(1, outChannels, CV_32F);
randu(bias, -1.0f, 1.0f);
lp.blobs.push_back(bias);
}
for (size_t i = 0; i < internals.size(); i++)
int inpSz[] = {1, inChannels, inSize.height, inSize.width};
Mat input(4, &inpSz[0], CV_32F);
randu(input, -1.0f, 1.0f);
Net net;
net.addLayerToPrev(lp.name, lp.type, lp);
net.setInput(input);
net.setPreferableBackend(backendId);
net.setPreferableTarget(targetId);
// warmup
Mat output = net.forward();
MatShape netInputShape = shape(input);
size_t weightsMemory = 0, blobsMemory = 0;
net.getMemoryConsumption(netInputShape, weightsMemory, blobsMemory);
int64 flops = net.getFLOPS(netInputShape);
CV_Assert(flops > 0);
std::cout
<< "IN=" << divUp(input.total() * input.elemSize(), 1u<<10) << " Kb " << netInputShape
<< " OUT=" << divUp(output.total() * output.elemSize(), 1u<<10) << " Kb " << shape(output)
<< " Weights(parameters): " << divUp(weightsMemory, 1u<<10) << " Kb"
<< " MFLOPS=" << flops * 1e-6 << std::endl;
TEST_CYCLE()
{
internalBlobs.push_back(Mat());
if (total(internals[i]))
internalBlobs.back().create(internals[i], CV_32F);
Mat res = net.forward();
}
layer->finalize(inpBlobs, outBlobs);
Mat inpBlob2D = inpBlob.reshape(1, outCn);
Mat wgtBlob2D = wgtBlob.reshape(1, outCn*(inpCn/groups));
Mat outBlob2D = outBlobs[0].reshape(1, outBlobs[0].size[0]);
declare.in(inpBlob2D, wgtBlob2D, WARMUP_RNG).out(outBlob2D);
layer->forward(inpBlobs, outBlobs, internalBlobs); /// warmup
PERF_SAMPLE_BEGIN()
layer->forward(inpBlobs, outBlobs, internalBlobs);
PERF_SAMPLE_END()
EXPECT_NEAR(flops, declared_flops, declared_flops * 1e-6);
SANITY_CHECK_NOTHING();
}
INSTANTIATE_TEST_CASE_P(/**/, Conv, Combine(
ConvParamID::all(),
dnnBackendsAndTargets(false, false) // defined in ../test/test_common.hpp
));
} // namespace

View File

@ -14,10 +14,7 @@
namespace opencv_test {
CV_ENUM(DNNBackend, DNN_BACKEND_DEFAULT, DNN_BACKEND_HALIDE, DNN_BACKEND_INFERENCE_ENGINE, DNN_BACKEND_OPENCV)
CV_ENUM(DNNTarget, DNN_TARGET_CPU, DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16, DNN_TARGET_MYRIAD)
class DNNTestNetwork : public ::perf::TestBaseWithParam< tuple<DNNBackend, DNNTarget> >
class DNNTestNetwork : public ::perf::TestBaseWithParam< tuple<Backend, Target> >
{
public:
dnn::Backend backend;
@ -269,22 +266,6 @@ PERF_TEST_P_(DNNTestNetwork, Inception_v2_Faster_RCNN)
Mat(cv::Size(800, 600), CV_32FC3));
}
const tuple<DNNBackend, DNNTarget> testCases[] = {
#ifdef HAVE_HALIDE
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_HALIDE, DNN_TARGET_CPU),
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL),
#endif
#ifdef HAVE_INF_ENGINE
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU),
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL),
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16),
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD),
#endif
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_CPU),
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL),
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16)
};
INSTANTIATE_TEST_CASE_P(/*nothing*/, DNNTestNetwork, testing::ValuesIn(testCases));
INSTANTIATE_TEST_CASE_P(/*nothing*/, DNNTestNetwork, dnnBackendsAndTargets());
} // namespace

View File

@ -4,6 +4,8 @@
#include <opencv2/ts.hpp>
#include <opencv2/dnn.hpp>
#include "../test/test_common.hpp"
namespace opencv_test {
using namespace perf;
using namespace cv::dnn;

View File

@ -1676,14 +1676,6 @@ struct Net::Impl
// with the current layer if they follow it. Normally, the are fused with the convolution layer,
// but some of them (like activation) may be fused with fully-connected, elemwise (+) and
// some other layers.
// TODO: OpenCL target support more fusion styles.
if ( preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget) &&
(!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" &&
ld.layerInstance->type != "MVN" && ld.layerInstance->type != "Pooling" &&
ld.layerInstance->type != "Concat")) )
continue;
Ptr<Layer>& currLayer = ld.layerInstance;
if( ld.consumers.size() == 1 && pinsToKeep.count(LayerPin(lid, 0)) == 0 )
{
@ -1717,6 +1709,13 @@ struct Net::Impl
if (preferableBackend != DNN_BACKEND_OPENCV)
continue; // Go to the next layer.
// TODO: OpenCL target support more fusion styles.
if ( preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget) &&
(!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" &&
ld.layerInstance->type != "MVN" && ld.layerInstance->type != "Pooling" &&
ld.layerInstance->type != "Concat")) )
continue;
while (nextData)
{
// For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh
@ -2693,8 +2692,7 @@ void Net::setInput(InputArray blob, const String& name, double scalefactor, cons
Mat Net::getParam(LayerId layer, int numParam)
{
LayerData &ld = impl->getLayerData(layer);
std::vector<Mat> &layerBlobs = ld.layerInstance->blobs;
std::vector<Mat> &layerBlobs = ld.getLayerInstance()->blobs;
CV_Assert(numParam < (int)layerBlobs.size());
return layerBlobs[numParam];
}
@ -2703,7 +2701,7 @@ void Net::setParam(LayerId layer, int numParam, const Mat &blob)
{
LayerData &ld = impl->getLayerData(layer);
std::vector<Mat> &layerBlobs = ld.layerInstance->blobs;
std::vector<Mat> &layerBlobs = ld.getLayerInstance()->blobs;
CV_Assert(numParam < (int)layerBlobs.size());
//we don't make strong checks, use this function carefully
layerBlobs[numParam] = blob;

View File

@ -350,12 +350,14 @@ public:
return false;
}
void fuseWeights(const Mat& w, const Mat& b)
void fuseWeights(const Mat& w_, const Mat& b_)
{
// Convolution weights have OIHW data layout. Parameters fusion in case of
// (conv(I) + b1 ) * w + b2
// means to replace convolution's weights to [w*conv(I)] and bias to [b1 * w + b2]
const int outCn = weightsMat.size[0];
Mat w = w_.total() == 1 ? Mat(1, outCn, CV_32F, Scalar(w_.at<float>(0))) : w_;
Mat b = b_.total() == 1 ? Mat(1, outCn, CV_32F, Scalar(b_.at<float>(0))) : b_;
CV_Assert_N(!weightsMat.empty(), biasvec.size() == outCn + 2,
w.empty() || outCn == w.total(), b.empty() || outCn == b.total());

View File

@ -41,6 +41,7 @@
//M*/
#include "../precomp.hpp"
#include "../op_inf_engine.hpp"
#include "layers_common.hpp"
namespace cv
@ -64,6 +65,12 @@ public:
}
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_INFERENCE_ENGINE && crop_ranges.size() == 4;
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
@ -109,7 +116,11 @@ public:
offset_final[i] = offset[i - start_axis];
}
crop_ranges.resize(dims, Range::all());
crop_ranges.resize(dims);
for (int i = 0; i < start_axis; i++)
{
crop_ranges[i] = Range(0, inpBlob.size[i]);
}
for (int i = start_axis; i < dims; i++)
{
if (offset_final[i] < 0 || offset_final[i] + inpSzBlob.size[i] > inpBlob.size[i])
@ -138,6 +149,38 @@ public:
input(&crop_ranges[0]).copyTo(output);
}
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
InferenceEngine::LayerParams lp;
lp.name = name;
lp.type = "Crop";
lp.precision = InferenceEngine::Precision::FP32;
std::shared_ptr<InferenceEngine::CropLayer> ieLayer(new InferenceEngine::CropLayer(lp));
CV_Assert(crop_ranges.size() == 4);
ieLayer->axis.push_back(0); // batch
ieLayer->offset.push_back(crop_ranges[0].start);
ieLayer->dim.push_back(crop_ranges[0].end - crop_ranges[0].start);
ieLayer->axis.push_back(1); // channels
ieLayer->offset.push_back(crop_ranges[1].start);
ieLayer->dim.push_back(crop_ranges[1].end - crop_ranges[1].start);
ieLayer->axis.push_back(3); // height
ieLayer->offset.push_back(crop_ranges[2].start);
ieLayer->dim.push_back(crop_ranges[2].end - crop_ranges[2].start);
ieLayer->axis.push_back(2); // width
ieLayer->offset.push_back(crop_ranges[3].start);
ieLayer->dim.push_back(crop_ranges[3].end - crop_ranges[3].start);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
}
std::vector<Range> crop_ranges;
};

View File

@ -161,6 +161,16 @@ public:
return Ptr<BackendNode>();
}
virtual bool tryFuse(Ptr<dnn::Layer>& top) CV_OVERRIDE
{
return func.tryFuse(top);
}
void getScaleShift(Mat& scale_, Mat& shift_) const CV_OVERRIDE
{
func.getScaleShift(scale_, shift_);
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
@ -343,6 +353,10 @@ struct ReLUFunctor
}
#endif // HAVE_INF_ENGINE
bool tryFuse(Ptr<dnn::Layer>&) { return false; }
void getScaleShift(Mat&, Mat&) const {}
int64 getFLOPSPerElement() const { return 1; }
};
@ -448,6 +462,10 @@ struct ReLU6Functor
}
#endif // HAVE_INF_ENGINE
bool tryFuse(Ptr<dnn::Layer>&) { return false; }
void getScaleShift(Mat&, Mat&) const {}
int64 getFLOPSPerElement() const { return 2; }
};
@ -518,6 +536,10 @@ struct TanHFunctor
}
#endif // HAVE_INF_ENGINE
bool tryFuse(Ptr<dnn::Layer>&) { return false; }
void getScaleShift(Mat&, Mat&) const {}
int64 getFLOPSPerElement() const { return 1; }
};
@ -588,6 +610,10 @@ struct SigmoidFunctor
}
#endif // HAVE_INF_ENGINE
bool tryFuse(Ptr<dnn::Layer>&) { return false; }
void getScaleShift(Mat&, Mat&) const {}
int64 getFLOPSPerElement() const { return 3; }
};
@ -659,6 +685,10 @@ struct ELUFunctor
}
#endif // HAVE_INF_ENGINE
bool tryFuse(Ptr<dnn::Layer>&) { return false; }
void getScaleShift(Mat&, Mat&) const {}
int64 getFLOPSPerElement() const { return 2; }
};
@ -727,6 +757,10 @@ struct AbsValFunctor
}
#endif // HAVE_INF_ENGINE
bool tryFuse(Ptr<dnn::Layer>&) { return false; }
void getScaleShift(Mat&, Mat&) const {}
int64 getFLOPSPerElement() const { return 1; }
};
@ -775,6 +809,10 @@ struct BNLLFunctor
}
#endif // HAVE_INF_ENGINE
bool tryFuse(Ptr<dnn::Layer>&) { return false; }
void getScaleShift(Mat&, Mat&) const {}
int64 getFLOPSPerElement() const { return 5; }
};
@ -875,15 +913,51 @@ struct PowerFunctor
#ifdef HAVE_INF_ENGINE
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
{
lp.type = "Power";
std::shared_ptr<InferenceEngine::PowerLayer> ieLayer(new InferenceEngine::PowerLayer(lp));
ieLayer->power = power;
ieLayer->scale = scale;
ieLayer->offset = shift;
return ieLayer;
if (power == 1.0f && scale == 1.0f && shift == 0.0f)
{
// It looks like there is a bug in Inference Engine for DNN_TARGET_OPENCL and DNN_TARGET_OPENCL_FP16
// if power layer do nothing so we replace it to Identity.
lp.type = "Split";
return std::shared_ptr<InferenceEngine::SplitLayer>(new InferenceEngine::SplitLayer(lp));
}
else
{
lp.type = "Power";
std::shared_ptr<InferenceEngine::PowerLayer> ieLayer(new InferenceEngine::PowerLayer(lp));
ieLayer->power = power;
ieLayer->scale = scale;
ieLayer->offset = shift;
return ieLayer;
}
}
#endif // HAVE_INF_ENGINE
bool tryFuse(Ptr<dnn::Layer>& top)
{
if (power != 1.0f && shift != 0.0f)
return false;
Mat w, b;
top->getScaleShift(w, b);
if ((w.empty() && b.empty()) || w.total() > 1 || b.total() > 1)
return false;
float nextScale = w.empty() ? 1.0f : w.at<float>(0);
float nextShift = b.empty() ? 0.0f : b.at<float>(0);
scale = std::pow(scale, power) * nextScale;
shift = nextScale * shift + nextShift;
return true;
}
void getScaleShift(Mat& _scale, Mat& _shift) const
{
if (power == 1.0f)
{
_scale = Mat(1, 1, CV_32F, Scalar(scale));
_shift = Mat(1, 1, CV_32F, Scalar(shift));
}
}
int64 getFLOPSPerElement() const { return power == 1 ? 2 : 10; }
};
@ -989,6 +1063,10 @@ struct ChannelsPReLUFunctor
}
#endif // HAVE_INF_ENGINE
bool tryFuse(Ptr<dnn::Layer>&) { return false; }
void getScaleShift(Mat&, Mat&) const {}
int64 getFLOPSPerElement() const { return 1; }
};

View File

@ -83,12 +83,6 @@ public:
int startAxis = clamp(_startAxis, numAxes);
int endAxis = clamp(_endAxis, numAxes);
for (size_t i = 1; i < inputs.size(); i++)
{
CV_Assert(inputs[i] == inputs[0]);
}
CV_Assert(startAxis >= 0);
CV_Assert(endAxis >= startAxis && endAxis < (int)numAxes);

View File

@ -350,17 +350,33 @@ public:
inshape = shape(outerSize, innerSize);
outshape = shape(outerSize, numOutput);
UMat srcMat, dstMat;
UMat srcMat, dstMat, srcMat_fp32, dstMat_fp32;
srcMat = inputs[i].reshape(1, inshape.size(), &inshape[0]);
dstMat = outputs[i].reshape(1, outshape.size(), &outshape[0]);
cv::gemm(srcMat, weights, 1, noArray(), 0, dstMat, GEMM_2_T);
if (use_half)
{
convertFp16(srcMat, srcMat_fp32);
convertFp16(dstMat, dstMat_fp32);
}
else
{
srcMat_fp32 = srcMat;
dstMat_fp32 = dstMat;
}
cv::gemm(srcMat_fp32, weights, 1, noArray(), 0, dstMat_fp32, GEMM_2_T);
if (bias)
{
UMat biasOnesMat = UMat::ones(outerSize, 1, umat_blobs[0].type());
UMat& biases = umat_blobs[1];
cv::gemm(biasOnesMat, biases, 1, dstMat, 1, dstMat, 0);
cv::gemm(biasOnesMat, biases, 1, dstMat_fp32, 1, dstMat_fp32, 0);
}
if (use_half)
{
convertFp16(srcMat_fp32, srcMat);
convertFp16(dstMat_fp32, dstMat);
}
}

View File

@ -453,8 +453,8 @@ public:
outputPtr = outputs[0].ptr<float>(0, 1);
if(_variance.size() == 1)
{
Mat secondChannel(outputs[0].size[2], outputs[0].size[3], CV_32F, outputPtr);
secondChannel.setTo(Scalar(_variance[0]));
Mat secondChannel(1, outputs[0].size[2], CV_32F, outputPtr);
secondChannel.setTo(Scalar::all(_variance[0]));
}
else
{

View File

@ -161,6 +161,7 @@ InfEngineBackendNet::InfEngineBackendNet(InferenceEngine::CNNNetwork& net)
inputs = net.getInputsInfo();
outputs = net.getOutputsInfo();
layers.resize(net.layerCount()); // A hack to execute InfEngineBackendNet::layerCount correctly.
netOwner = net;
}
void InfEngineBackendNet::Release() noexcept

View File

@ -131,6 +131,8 @@ private:
InferenceEngine::InferencePlugin plugin;
InferenceEngine::ExecutableNetwork netExec;
InferenceEngine::InferRequest infRequest;
// In case of models from Model Optimizer we need to manage their lifetime.
InferenceEngine::CNNNetwork netOwner;
std::string name;

View File

@ -782,6 +782,108 @@ void releaseTensor(tensorflow::TensorProto* tensor)
}
}
static void permute(google::protobuf::RepeatedPtrField<tensorflow::NodeDef>* data,
const std::vector<int>& indices)
{
const int num = data->size();
CV_Assert(num == indices.size());
std::vector<int> elemIdToPos(num);
std::vector<int> posToElemId(num);
for (int i = 0; i < num; ++i)
{
elemIdToPos[i] = i;
posToElemId[i] = i;
}
for (int i = 0; i < num; ++i)
{
int elemId = indices[i];
int pos = elemIdToPos[elemId];
if (pos != i)
{
data->SwapElements(i, pos);
const int swappedElemId = posToElemId[i];
elemIdToPos[elemId] = i;
elemIdToPos[swappedElemId] = pos;
posToElemId[i] = elemId;
posToElemId[pos] = swappedElemId;
}
}
}
// Is based on tensorflow::graph_transforms::SortByExecutionOrder
void sortByExecutionOrder(tensorflow::GraphDef& net)
{
// Maps node's name to index at net.node() list.
std::map<std::string, int> nodesMap;
std::map<std::string, int>::iterator nodesMapIt;
for (int i = 0; i < net.node_size(); ++i)
{
const tensorflow::NodeDef& node = net.node(i);
nodesMap.insert(std::make_pair(node.name(), i));
}
// Indices of nodes which use specific node as input.
std::vector<std::vector<int> > edges(nodesMap.size());
std::vector<int> numRefsToAdd(nodesMap.size(), 0);
std::vector<int> nodesToAdd;
for (int i = 0; i < net.node_size(); ++i)
{
const tensorflow::NodeDef& node = net.node(i);
for (int j = 0; j < node.input_size(); ++j)
{
std::string inpName = node.input(j);
inpName = inpName.substr(0, inpName.rfind(':'));
inpName = inpName.substr(inpName.find('^') + 1);
nodesMapIt = nodesMap.find(inpName);
CV_Assert(nodesMapIt != nodesMap.end());
edges[nodesMapIt->second].push_back(i);
}
if (node.input_size() == 0)
nodesToAdd.push_back(i);
else
{
if (node.op() == "Merge" || node.op() == "RefMerge")
{
int numControlEdges = 0;
for (int j = 0; j < node.input_size(); ++j)
numControlEdges += node.input(j)[0] == '^';
numRefsToAdd[i] = numControlEdges + 1;
}
else
numRefsToAdd[i] = node.input_size();
}
}
std::vector<int> permIds;
permIds.reserve(net.node_size());
while (!nodesToAdd.empty())
{
int nodeToAdd = nodesToAdd.back();
nodesToAdd.pop_back();
permIds.push_back(nodeToAdd);
// std::cout << net.node(nodeToAdd).name() << '\n';
for (int i = 0; i < edges[nodeToAdd].size(); ++i)
{
int consumerId = edges[nodeToAdd][i];
if (numRefsToAdd[consumerId] > 0)
{
if (numRefsToAdd[consumerId] == 1)
nodesToAdd.push_back(consumerId);
else
CV_Assert(numRefsToAdd[consumerId] >= 0);
numRefsToAdd[consumerId] -= 1;
}
}
}
CV_Assert(permIds.size() == net.node_size());
permute(net.mutable_node(), permIds);
}
CV__DNN_INLINE_NS_END
}} // namespace dnn, namespace cv

View File

@ -25,6 +25,8 @@ Mat getTensorContent(const tensorflow::TensorProto &tensor);
void releaseTensor(tensorflow::TensorProto* tensor);
void sortByExecutionOrder(tensorflow::GraphDef& net);
CV__DNN_INLINE_NS_END
}} // namespace dnn, namespace cv

View File

@ -1950,5 +1950,34 @@ Net readNetFromTensorflow(const std::vector<uchar>& bufferModel, const std::vect
bufferConfigPtr, bufferConfig.size());
}
void writeTextGraph(const String& _model, const String& output)
{
String model = _model;
const std::string modelExt = model.substr(model.rfind('.') + 1);
if (modelExt != "pb")
CV_Error(Error::StsNotImplemented, "Only TensorFlow models support export to text file");
tensorflow::GraphDef net;
ReadTFNetParamsFromBinaryFileOrDie(model.c_str(), &net);
sortByExecutionOrder(net);
RepeatedPtrField<tensorflow::NodeDef>::iterator it;
for (it = net.mutable_node()->begin(); it != net.mutable_node()->end(); ++it)
{
if (it->op() == "Const")
{
it->mutable_attr()->at("value").mutable_tensor()->clear_tensor_content();
}
}
std::string content;
google::protobuf::TextFormat::PrintToString(net, &content);
std::ofstream ofs(output.c_str());
ofs << content;
ofs.close();
}
CV__DNN_INLINE_NS_END
}} // namespace

View File

@ -161,7 +161,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_v1_TensorFlow)
if (backend == DNN_BACKEND_HALIDE)
throw SkipTestException("");
Mat sample = imread(findDataFile("dnn/street.png", false));
Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
Mat inp = blobFromImage(sample, 1.0f, Size(300, 300), Scalar(), false);
float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.011 : 0.0;
float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.06 : 0.0;
processNet("dnn/ssd_mobilenet_v1_coco_2017_11_17.pb", "dnn/ssd_mobilenet_v1_coco_2017_11_17.pbtxt",
@ -173,7 +173,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_v2_TensorFlow)
if (backend == DNN_BACKEND_HALIDE)
throw SkipTestException("");
Mat sample = imread(findDataFile("dnn/street.png", false));
Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
Mat inp = blobFromImage(sample, 1.0f, Size(300, 300), Scalar(), false);
float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.011 : 0.0;
float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.062 : 0.0;
processNet("dnn/ssd_mobilenet_v2_coco_2018_03_29.pb", "dnn/ssd_mobilenet_v2_coco_2018_03_29.pbtxt",
@ -247,8 +247,8 @@ TEST_P(DNNTestNetwork, Inception_v2_SSD_TensorFlow)
if (backend == DNN_BACKEND_HALIDE)
throw SkipTestException("");
Mat sample = imread(findDataFile("dnn/street.png", false));
Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.008 : 0.0;
Mat inp = blobFromImage(sample, 1.0f, Size(300, 300), Scalar(), false);
float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.015 : 0.0;
float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.0731 : 0.0;
processNet("dnn/ssd_inception_v2_coco_2017_11_17.pb", "dnn/ssd_inception_v2_coco_2017_11_17.pbtxt",
inp, "detection_out", "", l1, lInf);
@ -285,21 +285,6 @@ TEST_P(DNNTestNetwork, FastNeuralStyle_eccv16)
processNet("dnn/fast_neural_style_eccv16_starry_night.t7", "", inp, "", "", l1, lInf);
}
const tuple<Backend, Target> testCases[] = {
#ifdef HAVE_HALIDE
tuple<Backend, Target>(DNN_BACKEND_HALIDE, DNN_TARGET_CPU),
tuple<Backend, Target>(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL),
#endif
#ifdef HAVE_INF_ENGINE
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU),
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL),
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16),
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD),
#endif
tuple<Backend, Target>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL),
tuple<Backend, Target>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16)
};
INSTANTIATE_TEST_CASE_P(/*nothing*/, DNNTestNetwork, testing::ValuesIn(testCases));
INSTANTIATE_TEST_CASE_P(/*nothing*/, DNNTestNetwork, dnnBackendsAndTargets(true, true, false));
}} // namespace

View File

@ -417,7 +417,7 @@ TEST_P(Test_Caffe_nets, DenseNet_121)
float l1 = default_l1, lInf = default_lInf;
if (target == DNN_TARGET_OPENCL_FP16)
{
l1 = 0.017; lInf = 0.067;
l1 = 0.017; lInf = 0.0795;
}
else if (target == DNN_TARGET_MYRIAD)
{
@ -490,8 +490,7 @@ INSTANTIATE_TEST_CASE_P(Test_Caffe, opencv_face_detector,
TEST_P(Test_Caffe_nets, FasterRCNN_vgg16)
{
if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) ||
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
throw SkipTestException("");
static Mat ref = (Mat_<float>(3, 7) << 0, 2, 0.949398, 99.2454, 210.141, 601.205, 462.849,
0, 7, 0.997022, 481.841, 92.3218, 722.685, 175.953,
@ -502,8 +501,7 @@ TEST_P(Test_Caffe_nets, FasterRCNN_vgg16)
TEST_P(Test_Caffe_nets, FasterRCNN_zf)
{
if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) ||
(backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) ||
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
(backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD))
throw SkipTestException("");
static Mat ref = (Mat_<float>(3, 7) << 0, 2, 0.90121, 120.407, 115.83, 570.586, 528.395,
0, 7, 0.988779, 469.849, 75.1756, 718.64, 186.762,
@ -514,12 +512,13 @@ TEST_P(Test_Caffe_nets, FasterRCNN_zf)
TEST_P(Test_Caffe_nets, RFCN)
{
if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) ||
(backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) ||
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
(backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD))
throw SkipTestException("");
double scoreDiff = (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) ? 4e-3 : default_l1;
double iouDiff = (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) ? 8e-2 : default_lInf;
static Mat ref = (Mat_<float>(2, 7) << 0, 7, 0.991359, 491.822, 81.1668, 702.573, 178.234,
0, 12, 0.94786, 132.093, 223.903, 338.077, 566.16);
testFaster("rfcn_pascal_voc_resnet50.prototxt", "resnet50_rfcn_final.caffemodel", ref);
testFaster("rfcn_pascal_voc_resnet50.prototxt", "resnet50_rfcn_final.caffemodel", ref, scoreDiff, iouDiff);
}
INSTANTIATE_TEST_CASE_P(/**/, Test_Caffe_nets, dnnBackendsAndTargets());

View File

@ -42,6 +42,47 @@
#ifndef __OPENCV_TEST_COMMON_HPP__
#define __OPENCV_TEST_COMMON_HPP__
#ifdef HAVE_OPENCL
#include "opencv2/core/ocl.hpp"
#endif
namespace cv { namespace dnn {
CV__DNN_INLINE_NS_BEGIN
static inline void PrintTo(const cv::dnn::Backend& v, std::ostream* os)
{
switch (v) {
case DNN_BACKEND_DEFAULT: *os << "DEFAULT"; return;
case DNN_BACKEND_HALIDE: *os << "HALIDE"; return;
case DNN_BACKEND_INFERENCE_ENGINE: *os << "DLIE"; return;
case DNN_BACKEND_OPENCV: *os << "OCV"; return;
} // don't use "default:" to emit compiler warnings
*os << "DNN_BACKEND_UNKNOWN(" << v << ")";
}
static inline void PrintTo(const cv::dnn::Target& v, std::ostream* os)
{
switch (v) {
case DNN_TARGET_CPU: *os << "CPU"; return;
case DNN_TARGET_OPENCL: *os << "OCL"; return;
case DNN_TARGET_OPENCL_FP16: *os << "OCL_FP16"; return;
case DNN_TARGET_MYRIAD: *os << "MYRIAD"; return;
} // don't use "default:" to emit compiler warnings
*os << "DNN_TARGET_UNKNOWN(" << v << ")";
}
using opencv_test::tuple;
using opencv_test::get;
static inline void PrintTo(const tuple<cv::dnn::Backend, cv::dnn::Target> v, std::ostream* os)
{
PrintTo(get<0>(v), os);
*os << "/";
PrintTo(get<1>(v), os);
}
CV__DNN_INLINE_NS_END
}} // namespace
static inline const std::string &getOpenCVExtraDir()
{
return cvtest::TS::ptr()->get_data_path();
@ -190,4 +231,54 @@ static inline bool readFileInMemory(const std::string& filename, std::string& co
return true;
}
namespace opencv_test {
using namespace cv::dnn;
static testing::internal::ParamGenerator<tuple<Backend, Target> > dnnBackendsAndTargets(
bool withInferenceEngine = true,
bool withHalide = false,
bool withCpuOCV = true
)
{
std::vector<tuple<Backend, Target> > targets;
#ifdef HAVE_HALIDE
if (withHalide)
{
targets.push_back(make_tuple(DNN_BACKEND_HALIDE, DNN_TARGET_CPU));
#ifdef HAVE_OPENCL
if (cv::ocl::useOpenCL())
targets.push_back(make_tuple(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL));
#endif
}
#endif
#ifdef HAVE_INF_ENGINE
if (withInferenceEngine)
{
targets.push_back(make_tuple(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU));
#ifdef HAVE_OPENCL
if (cv::ocl::useOpenCL())
{
targets.push_back(make_tuple(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL));
targets.push_back(make_tuple(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16));
}
#endif
if (checkMyriadTarget())
targets.push_back(make_tuple(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD));
}
#endif
if (withCpuOCV)
targets.push_back(make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU));
#ifdef HAVE_OPENCL
if (cv::ocl::useOpenCL())
{
targets.push_back(make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL));
targets.push_back(make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16));
}
#endif
return testing::ValuesIn(targets);
}
} // namespace
#endif

View File

@ -44,23 +44,9 @@ static void test(LayerParams& params, Mat& input, Backend backendId, Target targ
test(input, net, backendId, targetId, skipCheck);
}
static testing::internal::ParamGenerator<tuple<Backend, Target> > dnnBackendsAndTargetsWithHalide()
static inline testing::internal::ParamGenerator<tuple<Backend, Target> > dnnBackendsAndTargetsWithHalide()
{
static const tuple<Backend, Target> testCases[] = {
#ifdef HAVE_HALIDE
tuple<Backend, Target>(DNN_BACKEND_HALIDE, DNN_TARGET_CPU),
tuple<Backend, Target>(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL),
#endif
#ifdef HAVE_INF_ENGINE
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU),
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL),
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16),
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD),
#endif
tuple<Backend, Target>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL),
tuple<Backend, Target>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16)
};
return testing::ValuesIn(testCases);
return dnnBackendsAndTargets(true, true, false); // OpenCV/CPU is used as reference
}
class Test_Halide_layers : public DNNTestLayer {};

View File

@ -177,10 +177,6 @@ TEST_P(DNNTestOpenVINO, models)
Target target = (dnn::Target)(int)get<0>(GetParam());
std::string modelName = get<1>(GetParam());
if ((modelName == "semantic-segmentation-adas-0001" && target == DNN_TARGET_OPENCL_FP16) ||
(modelName == "vehicle-license-plate-detection-barrier-0106"))
throw SkipTestException("");
std::string precision = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? "FP16" : "FP32";
std::string prefix = utils::fs::join("intel_models",
utils::fs::join(modelName,

View File

@ -49,35 +49,6 @@
#include "opencv2/dnn.hpp"
#include "test_common.hpp"
namespace cv {
namespace dnn {
CV__DNN_INLINE_NS_BEGIN
static inline void PrintTo(const cv::dnn::Backend& v, std::ostream* os)
{
switch (v) {
case DNN_BACKEND_DEFAULT: *os << "DNN_BACKEND_DEFAULT"; return;
case DNN_BACKEND_HALIDE: *os << "DNN_BACKEND_HALIDE"; return;
case DNN_BACKEND_INFERENCE_ENGINE: *os << "DNN_BACKEND_INFERENCE_ENGINE"; return;
case DNN_BACKEND_OPENCV: *os << "DNN_BACKEND_OPENCV"; return;
} // don't use "default:" to emit compiler warnings
*os << "DNN_BACKEND_UNKNOWN(" << v << ")";
}
static inline void PrintTo(const cv::dnn::Target& v, std::ostream* os)
{
switch (v) {
case DNN_TARGET_CPU: *os << "DNN_TARGET_CPU"; return;
case DNN_TARGET_OPENCL: *os << "DNN_TARGET_OPENCL"; return;
case DNN_TARGET_OPENCL_FP16: *os << "DNN_TARGET_OPENCL_FP16"; return;
case DNN_TARGET_MYRIAD: *os << "DNN_TARGET_MYRIAD"; return;
} // don't use "default:" to emit compiler warnings
*os << "DNN_TARGET_UNKNOWN(" << v << ")";
}
CV__DNN_INLINE_NS_END
}} // namespace
namespace opencv_test {
using namespace cv::dnn;
@ -95,22 +66,6 @@ static testing::internal::ParamGenerator<Target> availableDnnTargets()
return testing::ValuesIn(targets);
}
static testing::internal::ParamGenerator<tuple<Backend, Target> > dnnBackendsAndTargets()
{
static const tuple<Backend, Target> testCases[] = {
#ifdef HAVE_INF_ENGINE
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU),
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL),
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16),
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD),
#endif
tuple<Backend, Target>(DNN_BACKEND_OPENCV, DNN_TARGET_CPU),
tuple<Backend, Target>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL),
tuple<Backend, Target>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16)
};
return testing::ValuesIn(testCases);
}
class DNNTestLayer : public TestWithParam<tuple<Backend, Target> >
{
public:

View File

@ -296,7 +296,7 @@ TEST_P(Test_TensorFlow_nets, Inception_v2_SSD)
Net net = readNetFromTensorflow(model, proto);
Mat img = imread(findDataFile("dnn/street.png", false));
Mat blob = blobFromImage(img, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), true, false);
Mat blob = blobFromImage(img, 1.0f, Size(300, 300), Scalar(), true, false);
net.setPreferableBackend(backend);
net.setPreferableTarget(target);
@ -310,32 +310,61 @@ TEST_P(Test_TensorFlow_nets, Inception_v2_SSD)
0, 3, 0.75838411, 0.44668293, 0.45907149, 0.49459291, 0.52197015,
0, 10, 0.95932811, 0.38349164, 0.32528657, 0.40387636, 0.39165527,
0, 10, 0.93973452, 0.66561931, 0.37841269, 0.68074018, 0.42907384);
double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 5e-3 : default_l1;
double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.0097 : default_l1;
double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.09 : default_lInf;
normAssertDetections(ref, out, "", 0.5, scoreDiff, iouDiff);
}
TEST_P(Test_TensorFlow_nets, Inception_v2_Faster_RCNN)
TEST_P(Test_TensorFlow_nets, MobileNet_v1_SSD)
{
checkBackend();
std::string model = findDataFile("dnn/ssd_mobilenet_v1_coco_2017_11_17.pb", false);
std::string proto = findDataFile("dnn/ssd_mobilenet_v1_coco_2017_11_17.pbtxt", false);
Net net = readNetFromTensorflow(model, proto);
Mat img = imread(findDataFile("dnn/dog416.png", false));
Mat blob = blobFromImage(img, 1.0f, Size(300, 300), Scalar(), true, false);
net.setPreferableBackend(backend);
net.setPreferableTarget(target);
net.setInput(blob);
Mat out = net.forward();
Mat ref = blobFromNPY(findDataFile("dnn/tensorflow/ssd_mobilenet_v1_coco_2017_11_17.detection_out.npy"));
float scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 7e-3 : 1e-5;
float iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.0098 : 1e-3;
normAssertDetections(ref, out, "", 0.3, scoreDiff, iouDiff);
}
TEST_P(Test_TensorFlow_nets, Faster_RCNN)
{
static std::string names[] = {"faster_rcnn_inception_v2_coco_2018_01_28",
"faster_rcnn_resnet50_coco_2018_01_28"};
checkBackend();
if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU) ||
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
throw SkipTestException("");
std::string proto = findDataFile("dnn/faster_rcnn_inception_v2_coco_2018_01_28.pbtxt", false);
std::string model = findDataFile("dnn/faster_rcnn_inception_v2_coco_2018_01_28.pb", false);
for (int i = 1; i < 2; ++i)
{
std::string proto = findDataFile("dnn/" + names[i] + ".pbtxt", false);
std::string model = findDataFile("dnn/" + names[i] + ".pb", false);
Net net = readNetFromTensorflow(model, proto);
net.setPreferableBackend(backend);
net.setPreferableTarget(target);
Mat img = imread(findDataFile("dnn/dog416.png", false));
Mat blob = blobFromImage(img, 1.0f / 127.5, Size(800, 600), Scalar(127.5, 127.5, 127.5), true, false);
Net net = readNetFromTensorflow(model, proto);
net.setPreferableBackend(backend);
net.setPreferableTarget(target);
Mat img = imread(findDataFile("dnn/dog416.png", false));
Mat blob = blobFromImage(img, 1.0f, Size(800, 600), Scalar(), true, false);
net.setInput(blob);
Mat out = net.forward();
net.setInput(blob);
Mat out = net.forward();
Mat ref = blobFromNPY(findDataFile("dnn/tensorflow/faster_rcnn_inception_v2_coco_2018_01_28.detection_out.npy"));
normAssertDetections(ref, out, "", 0.3);
Mat ref = blobFromNPY(findDataFile("dnn/tensorflow/" + names[i] + ".detection_out.npy"));
normAssertDetections(ref, out, names[i].c_str(), 0.3);
}
}
TEST_P(Test_TensorFlow_nets, MobileNet_v1_SSD_PPN)
@ -347,15 +376,17 @@ TEST_P(Test_TensorFlow_nets, MobileNet_v1_SSD_PPN)
Net net = readNetFromTensorflow(model, proto);
Mat img = imread(findDataFile("dnn/dog416.png", false));
Mat ref = blobFromNPY(findDataFile("dnn/tensorflow/ssd_mobilenet_v1_ppn_coco.detection_out.npy", false));
Mat blob = blobFromImage(img, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), true, false);
Mat blob = blobFromImage(img, 1.0f, Size(300, 300), Scalar(), true, false);
net.setPreferableBackend(backend);
net.setPreferableTarget(target);
net.setInput(blob);
Mat out = net.forward();
double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.006 : default_l1;
normAssertDetections(ref, out, "", 0.4, scoreDiff, default_lInf);
double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.011 : default_l1;
double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.021 : default_lInf;
normAssertDetections(ref, out, "", 0.4, scoreDiff, iouDiff);
}
TEST_P(Test_TensorFlow_nets, opencv_face_detector_uint8)

View File

@ -301,14 +301,14 @@ TEST_P(Test_Torch_nets, ENet_accuracy)
// Due to numerical instability in Pooling-Unpooling layers (indexes jittering)
// thresholds for ENet must be changed. Accuracy of results was checked on
// Cityscapes dataset and difference in mIOU with Torch is 10E-4%
normAssert(ref, out, "", 0.00044, 0.44);
normAssert(ref, out, "", 0.00044, target == DNN_TARGET_CPU ? 0.453 : 0.44);
const int N = 3;
for (int i = 0; i < N; i++)
{
net.setInput(inputBlob, "");
Mat out = net.forward();
normAssert(ref, out, "", 0.00044, 0.44);
normAssert(ref, out, "", 0.00044, target == DNN_TARGET_CPU ? 0.453 : 0.44);
}
}

View File

@ -54,15 +54,21 @@
#include "opencv2/imgproc.hpp"
const size_t WEBP_HEADER_SIZE = 32;
#include <opencv2/core/utils/configuration.private.hpp>
namespace cv
{
// 64Mb limit to avoid memory DDOS
static size_t param_maxFileSize = utils::getConfigurationParameterSizeT("OPENCV_IMGCODECS_WEBP_MAX_FILE_SIZE", 64*1024*1024);
static const size_t WEBP_HEADER_SIZE = 32;
WebPDecoder::WebPDecoder()
{
m_buf_supported = true;
channels = 0;
fs_size = 0;
}
WebPDecoder::~WebPDecoder() {}
@ -96,48 +102,29 @@ ImageDecoder WebPDecoder::newDecoder() const
bool WebPDecoder::readHeader()
{
uint8_t header[WEBP_HEADER_SIZE] = { 0 };
if (m_buf.empty())
{
FILE * wfile = NULL;
fs.open(m_filename.c_str(), std::ios::binary);
fs.seekg(0, std::ios::end);
fs_size = safeCastToSizeT(fs.tellg(), "File is too large");
fs.seekg(0, std::ios::beg);
CV_Assert(fs && "File stream error");
CV_CheckGE(fs_size, WEBP_HEADER_SIZE, "File is too small");
CV_CheckLE(fs_size, param_maxFileSize, "File is too large. Increase OPENCV_IMGCODECS_WEBP_MAX_FILE_SIZE parameter if you want to process large files");
wfile = fopen(m_filename.c_str(), "rb");
if(wfile == NULL)
{
return false;
}
fseek(wfile, 0, SEEK_END);
long int wfile_size = ftell(wfile);
fseek(wfile, 0, SEEK_SET);
if(wfile_size > static_cast<long int>(INT_MAX))
{
fclose(wfile);
return false;
}
data.create(1, (int)wfile_size, CV_8U);
size_t data_size = fread(data.ptr(), 1, wfile_size, wfile);
if(wfile)
{
fclose(wfile);
}
if(static_cast<long int>(data_size) != wfile_size)
{
return false;
}
fs.read((char*)header, sizeof(header));
CV_Assert(fs && "Can't read WEBP_HEADER_SIZE bytes");
}
else
{
CV_CheckGE(m_buf.total(), WEBP_HEADER_SIZE, "Buffer is too small");
memcpy(header, m_buf.ptr(), sizeof(header));
data = m_buf;
}
WebPBitstreamFeatures features;
if(VP8_STATUS_OK == WebPGetFeatures(data.ptr(), WEBP_HEADER_SIZE, &features))
if (VP8_STATUS_OK == WebPGetFeatures(header, sizeof(header), &features))
{
m_width = features.width;
m_height = features.height;
@ -161,41 +148,75 @@ bool WebPDecoder::readHeader()
bool WebPDecoder::readData(Mat &img)
{
if( m_width > 0 && m_height > 0 )
{
bool convert_grayscale = (img.type() == CV_8UC1); // IMREAD_GRAYSCALE requested
CV_CheckGE(m_width, 0, ""); CV_CheckGE(m_height, 0, "");
if (img.cols != m_width || img.rows != m_height || img.type() != m_type)
CV_CheckEQ(img.cols, m_width, "");
CV_CheckEQ(img.rows, m_height, "");
if (m_buf.empty())
{
fs.seekg(0, std::ios::beg); CV_Assert(fs && "File stream error");
data.create(1, validateToInt(fs_size), CV_8UC1);
fs.read((char*)data.ptr(), fs_size);
CV_Assert(fs && "Can't read file data");
fs.close();
}
CV_Assert(data.type() == CV_8UC1); CV_Assert(data.rows == 1);
{
Mat read_img;
CV_CheckType(img.type(), img.type() == CV_8UC1 || img.type() == CV_8UC3 || img.type() == CV_8UC4, "");
if (img.type() != m_type)
{
img.create(m_height, m_width, m_type);
read_img.create(m_height, m_width, m_type);
}
else
{
read_img = img; // copy header
}
uchar* out_data = img.ptr();
size_t out_data_size = img.cols * img.rows * img.elemSize();
uchar* out_data = read_img.ptr();
size_t out_data_size = read_img.dataend - out_data;
uchar *res_ptr = 0;
uchar *res_ptr = NULL;
if (channels == 3)
{
CV_CheckTypeEQ(read_img.type(), CV_8UC3, "");
res_ptr = WebPDecodeBGRInto(data.ptr(), data.total(), out_data,
(int)out_data_size, (int)img.step);
(int)out_data_size, (int)read_img.step);
}
else if (channels == 4)
{
CV_CheckTypeEQ(read_img.type(), CV_8UC4, "");
res_ptr = WebPDecodeBGRAInto(data.ptr(), data.total(), out_data,
(int)out_data_size, (int)img.step);
(int)out_data_size, (int)read_img.step);
}
if(res_ptr == out_data)
if (res_ptr != out_data)
return false;
if (read_img.data == img.data && img.type() == m_type)
{
if (convert_grayscale)
{
cvtColor(img, img, COLOR_BGR2GRAY);
}
return true;
// nothing
}
else if (img.type() == CV_8UC1)
{
cvtColor(read_img, img, COLOR_BGR2GRAY);
}
else if (img.type() == CV_8UC3 && m_type == CV_8UC4)
{
cvtColor(read_img, img, COLOR_BGRA2BGR);
}
else if (img.type() == CV_8UC3 && m_type == CV_8UC4)
{
cvtColor(read_img, img, COLOR_BGRA2BGR);
}
else
{
CV_Error(Error::StsInternal, "");
}
}
return false;
return true;
}
WebPEncoder::WebPEncoder()
@ -213,12 +234,9 @@ ImageEncoder WebPEncoder::newEncoder() const
bool WebPEncoder::write(const Mat& img, const std::vector<int>& params)
{
int channels = img.channels(), depth = img.depth();
int width = img.cols, height = img.rows;
CV_CheckDepthEQ(img.depth(), CV_8U, "WebP codec supports 8U images only");
const Mat *image = &img;
Mat temp;
size_t size = 0;
const int width = img.cols, height = img.rows;
bool comp_lossless = true;
float quality = 100.0f;
@ -240,69 +258,64 @@ bool WebPEncoder::write(const Mat& img, const std::vector<int>& params)
}
}
uint8_t *out = NULL;
int channels = img.channels();
CV_Check(channels, channels == 1 || channels == 3 || channels == 4, "");
if(depth != CV_8U)
{
return false;
}
const Mat *image = &img;
Mat temp;
if(channels == 1)
if (channels == 1)
{
cvtColor(*image, temp, CV_GRAY2BGR);
image = &temp;
channels = 3;
}
else if (channels == 2)
{
return false;
}
uint8_t *out = NULL;
size_t size = 0;
if (comp_lossless)
{
if(channels == 3)
if (channels == 3)
{
size = WebPEncodeLosslessBGR(image->ptr(), width, height, (int)image->step, &out);
}
else if(channels == 4)
else if (channels == 4)
{
size = WebPEncodeLosslessBGRA(image->ptr(), width, height, (int)image->step, &out);
}
}
else
{
if(channels == 3)
if (channels == 3)
{
size = WebPEncodeBGR(image->ptr(), width, height, (int)image->step, quality, &out);
}
else if(channels == 4)
else if (channels == 4)
{
size = WebPEncodeBGRA(image->ptr(), width, height, (int)image->step, quality, &out);
}
}
#if WEBP_DECODER_ABI_VERSION >= 0x0206
Ptr<uint8_t> out_cleaner(out, WebPFree);
#else
Ptr<uint8_t> out_cleaner(out, free);
#endif
if(size > 0)
CV_Assert(size > 0);
if (m_buf)
{
if(m_buf)
{
m_buf->resize(size);
memcpy(&(*m_buf)[0], out, size);
}
else
{
FILE *fd = fopen(m_filename.c_str(), "wb");
if(fd != NULL)
{
fwrite(out, size, sizeof(uint8_t), fd);
fclose(fd); fd = NULL;
}
}
m_buf->resize(size);
memcpy(&(*m_buf)[0], out, size);
}
if (out != NULL)
else
{
free(out);
out = NULL;
FILE *fd = fopen(m_filename.c_str(), "wb");
if (fd != NULL)
{
fwrite(out, size, sizeof(uint8_t), fd);
fclose(fd); fd = NULL;
}
}
return size > 0;

View File

@ -47,7 +47,7 @@
#ifdef HAVE_WEBP
#include <fstream>
namespace cv
{
@ -61,7 +61,6 @@ public:
bool readData( Mat& img ) CV_OVERRIDE;
bool readHeader() CV_OVERRIDE;
void close();
size_t signatureLength() const CV_OVERRIDE;
bool checkSignature( const String& signature) const CV_OVERRIDE;
@ -69,6 +68,8 @@ public:
ImageDecoder newDecoder() const CV_OVERRIDE;
protected:
std::ifstream fs;
size_t fs_size;
Mat data;
int channels;
};

View File

@ -400,6 +400,8 @@ static void ApplyExifOrientation(const Mat& buf, Mat& img)
static void*
imread_( const String& filename, int flags, int hdrtype, Mat* mat=0 )
{
CV_Assert(mat || hdrtype != LOAD_MAT); // mat is required in LOAD_MAT case
IplImage* image = 0;
CvMat *matrix = 0;
Mat temp, *data = &temp;
@ -711,11 +713,22 @@ static bool imwrite_( const String& filename, const std::vector<Mat>& img_vec,
encoder->setDestination( filename );
CV_Assert(params.size() <= CV_IO_MAX_IMAGE_PARAMS*2);
bool code;
if (!isMultiImg)
code = encoder->write( write_vec[0], params );
else
code = encoder->writemulti( write_vec, params ); //to be implemented
bool code = false;
try
{
if (!isMultiImg)
code = encoder->write( write_vec[0], params );
else
code = encoder->writemulti( write_vec, params ); //to be implemented
}
catch (const cv::Exception& e)
{
std::cerr << "imwrite_('" << filename << "'): can't write data: " << e.what() << std::endl << std::flush;
}
catch (...)
{
std::cerr << "imwrite_('" << filename << "'): can't write data: unknown exception" << std::endl << std::flush;
}
// CV_Assert( code );
return code;

View File

@ -44,6 +44,15 @@
int validateToInt(size_t step);
template <typename _Tp> static inline
size_t safeCastToSizeT(const _Tp v_origin, const char* msg)
{
const size_t value_cast = (size_t)v_origin;
if ((_Tp)value_cast != v_origin)
CV_Error(cv::Error::StsError, msg ? msg : "Can't cast value into size_t");
return value_cast;
}
struct PaletteEntry
{
unsigned char b, g, r, a;

View File

@ -96,12 +96,17 @@ TEST(Imgcodecs_WebP, encode_decode_with_alpha_webp)
string output = cv::tempfile(".webp");
EXPECT_NO_THROW(cv::imwrite(output, img));
cv::Mat img_webp = cv::imread(output);
cv::Mat img_webp = cv::imread(output, IMREAD_UNCHANGED);
cv::Mat img_webp_bgr = cv::imread(output); // IMREAD_COLOR by default
EXPECT_EQ(0, remove(output.c_str()));
EXPECT_FALSE(img_webp.empty());
EXPECT_EQ(4, img_webp.channels());
EXPECT_EQ(512, img_webp.cols);
EXPECT_EQ(512, img_webp.rows);
EXPECT_FALSE(img_webp_bgr.empty());
EXPECT_EQ(3, img_webp_bgr.channels());
EXPECT_EQ(512, img_webp_bgr.cols);
EXPECT_EQ(512, img_webp_bgr.rows);
}
#endif // HAVE_WEBP

View File

@ -52,7 +52,7 @@ public class Subdiv2DTest extends OpenCVTestCase {
s2d.insert( new Point(10, 20) );
MatOfFloat6 triangles = new MatOfFloat6();
s2d.getTriangleList(triangles);
assertEquals(10, triangles.rows());
assertEquals(2, triangles.rows());
/*
int cnt = triangles.rows();
float buff[] = new float[cnt*6];

View File

@ -332,7 +332,7 @@ void cv::accumulate( InputArray _src, InputOutputArray _dst, InputArray _mask )
CV_Assert( func != 0 );
const Mat* arrays[] = {&src, &dst, &mask, 0};
uchar* ptrs[3]{};
uchar* ptrs[3] = {};
NAryMatIterator it(arrays, ptrs);
int len = (int)it.size;
@ -430,7 +430,7 @@ void cv::accumulateSquare( InputArray _src, InputOutputArray _dst, InputArray _m
CV_Assert( func != 0 );
const Mat* arrays[] = {&src, &dst, &mask, 0};
uchar* ptrs[3]{};
uchar* ptrs[3] = {};
NAryMatIterator it(arrays, ptrs);
int len = (int)it.size;
@ -533,7 +533,7 @@ void cv::accumulateProduct( InputArray _src1, InputArray _src2,
CV_Assert( func != 0 );
const Mat* arrays[] = {&src1, &src2, &dst, &mask, 0};
uchar* ptrs[4]{};
uchar* ptrs[4] = {};
NAryMatIterator it(arrays, ptrs);
int len = (int)it.size;
@ -635,7 +635,7 @@ void cv::accumulateWeighted( InputArray _src, InputOutputArray _dst,
CV_Assert( func != 0 );
const Mat* arrays[] = {&src, &dst, &mask, 0};
uchar* ptrs[3]{};
uchar* ptrs[3] = {};
NAryMatIterator it(arrays, ptrs);
int len = (int)it.size;

View File

@ -1123,7 +1123,6 @@ cvFindNextContour( CvContourScanner scanner )
#endif
{
_CvContourInfo *par_info = 0;
_CvContourInfo *l_cinfo = 0;
CvSeq *seq = 0;
int is_hole = 0;
CvPoint origin;
@ -1215,6 +1214,7 @@ cvFindNextContour( CvContourScanner scanner )
seq->flags |= is_hole ? CV_SEQ_FLAG_HOLE : 0;
/* initialize header */
_CvContourInfo *l_cinfo = 0;
if( mode <= 1 )
{
l_cinfo = &(scanner->cinfo_temp);
@ -1225,10 +1225,8 @@ cvFindNextContour( CvContourScanner scanner )
}
else
{
union { _CvContourInfo* ci; CvSetElem* se; } v;
v.ci = l_cinfo;
cvSetAdd( scanner->cinfo_set, 0, &v.se );
l_cinfo = v.ci;
cvSetAdd(scanner->cinfo_set, 0, (CvSetElem**)&l_cinfo);
CV_Assert(l_cinfo);
int lval;
if( img_i )
@ -1298,16 +1296,16 @@ cvFindNextContour( CvContourScanner scanner )
scanner->img = (schar *) img;
scanner->nbd = nbd;
return l_cinfo->contour;
resume_scan:
}
resume_scan:
{
prev = p;
/* update lnbd */
if( prev & -2 )
{
lnbd.x = x;
}
} /* end of prev != p */
}
} /* end of loop on x */
lnbd.x = 0;

View File

@ -45,7 +45,8 @@ namespace cv
{
static const int DIST_SHIFT = 16;
static const int INIT_DIST0 = (INT_MAX >> 2);
static const int INIT_DIST0 = INT_MAX;
static const int DIST_MAX = (INT_MAX >> 2);
#define CV_FLT_TO_FIX(x,n) cvRound((x)*(1<<(n)))
static void
@ -71,8 +72,8 @@ distanceTransform_3x3( const Mat& _src, Mat& _temp, Mat& _dist, const float* met
{
const int BORDER = 1;
int i, j;
const int HV_DIST = CV_FLT_TO_FIX( metrics[0], DIST_SHIFT );
const int DIAG_DIST = CV_FLT_TO_FIX( metrics[1], DIST_SHIFT );
const unsigned int HV_DIST = CV_FLT_TO_FIX( metrics[0], DIST_SHIFT );
const unsigned int DIAG_DIST = CV_FLT_TO_FIX( metrics[1], DIST_SHIFT );
const float scale = 1.f/(1 << DIST_SHIFT);
const uchar* src = _src.ptr();
@ -89,7 +90,7 @@ distanceTransform_3x3( const Mat& _src, Mat& _temp, Mat& _dist, const float* met
for( i = 0; i < size.height; i++ )
{
const uchar* s = src + i*srcstep;
int* tmp = (int*)(temp + (i+BORDER)*step) + BORDER;
unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER;
for( j = 0; j < BORDER; j++ )
tmp[-j-1] = tmp[size.width + j] = INIT_DIST0;
@ -100,8 +101,8 @@ distanceTransform_3x3( const Mat& _src, Mat& _temp, Mat& _dist, const float* met
tmp[j] = 0;
else
{
int t0 = tmp[j-step-1] + DIAG_DIST;
int t = tmp[j-step] + HV_DIST;
unsigned int t0 = tmp[j-step-1] + DIAG_DIST;
unsigned int t = tmp[j-step] + HV_DIST;
if( t0 > t ) t0 = t;
t = tmp[j-step+1] + DIAG_DIST;
if( t0 > t ) t0 = t;
@ -116,14 +117,14 @@ distanceTransform_3x3( const Mat& _src, Mat& _temp, Mat& _dist, const float* met
for( i = size.height - 1; i >= 0; i-- )
{
float* d = (float*)(dist + i*dststep);
int* tmp = (int*)(temp + (i+BORDER)*step) + BORDER;
unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER;
for( j = size.width - 1; j >= 0; j-- )
{
int t0 = tmp[j];
unsigned int t0 = tmp[j];
if( t0 > HV_DIST )
{
int t = tmp[j+step+1] + DIAG_DIST;
unsigned int t = tmp[j+step+1] + DIAG_DIST;
if( t0 > t ) t0 = t;
t = tmp[j+step] + HV_DIST;
if( t0 > t ) t0 = t;
@ -133,6 +134,7 @@ distanceTransform_3x3( const Mat& _src, Mat& _temp, Mat& _dist, const float* met
if( t0 > t ) t0 = t;
tmp[j] = t0;
}
t0 = (t0 > DIST_MAX) ? DIST_MAX : t0;
d[j] = (float)(t0 * scale);
}
}
@ -144,9 +146,9 @@ distanceTransform_5x5( const Mat& _src, Mat& _temp, Mat& _dist, const float* met
{
const int BORDER = 2;
int i, j;
const int HV_DIST = CV_FLT_TO_FIX( metrics[0], DIST_SHIFT );
const int DIAG_DIST = CV_FLT_TO_FIX( metrics[1], DIST_SHIFT );
const int LONG_DIST = CV_FLT_TO_FIX( metrics[2], DIST_SHIFT );
const unsigned int HV_DIST = CV_FLT_TO_FIX( metrics[0], DIST_SHIFT );
const unsigned int DIAG_DIST = CV_FLT_TO_FIX( metrics[1], DIST_SHIFT );
const unsigned int LONG_DIST = CV_FLT_TO_FIX( metrics[2], DIST_SHIFT );
const float scale = 1.f/(1 << DIST_SHIFT);
const uchar* src = _src.ptr();
@ -163,7 +165,7 @@ distanceTransform_5x5( const Mat& _src, Mat& _temp, Mat& _dist, const float* met
for( i = 0; i < size.height; i++ )
{
const uchar* s = src + i*srcstep;
int* tmp = (int*)(temp + (i+BORDER)*step) + BORDER;
unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER;
for( j = 0; j < BORDER; j++ )
tmp[-j-1] = tmp[size.width + j] = INIT_DIST0;
@ -174,8 +176,8 @@ distanceTransform_5x5( const Mat& _src, Mat& _temp, Mat& _dist, const float* met
tmp[j] = 0;
else
{
int t0 = tmp[j-step*2-1] + LONG_DIST;
int t = tmp[j-step*2+1] + LONG_DIST;
unsigned int t0 = tmp[j-step*2-1] + LONG_DIST;
unsigned int t = tmp[j-step*2+1] + LONG_DIST;
if( t0 > t ) t0 = t;
t = tmp[j-step-2] + LONG_DIST;
if( t0 > t ) t0 = t;
@ -198,14 +200,14 @@ distanceTransform_5x5( const Mat& _src, Mat& _temp, Mat& _dist, const float* met
for( i = size.height - 1; i >= 0; i-- )
{
float* d = (float*)(dist + i*dststep);
int* tmp = (int*)(temp + (i+BORDER)*step) + BORDER;
unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER;
for( j = size.width - 1; j >= 0; j-- )
{
int t0 = tmp[j];
unsigned int t0 = tmp[j];
if( t0 > HV_DIST )
{
int t = tmp[j+step*2+1] + LONG_DIST;
unsigned int t = tmp[j+step*2+1] + LONG_DIST;
if( t0 > t ) t0 = t;
t = tmp[j+step*2-1] + LONG_DIST;
if( t0 > t ) t0 = t;
@ -223,6 +225,7 @@ distanceTransform_5x5( const Mat& _src, Mat& _temp, Mat& _dist, const float* met
if( t0 > t ) t0 = t;
tmp[j] = t0;
}
t0 = (t0 > DIST_MAX) ? DIST_MAX : t0;
d[j] = (float)(t0 * scale);
}
}
@ -235,9 +238,9 @@ distanceTransformEx_5x5( const Mat& _src, Mat& _temp, Mat& _dist, Mat& _labels,
const int BORDER = 2;
int i, j;
const int HV_DIST = CV_FLT_TO_FIX( metrics[0], DIST_SHIFT );
const int DIAG_DIST = CV_FLT_TO_FIX( metrics[1], DIST_SHIFT );
const int LONG_DIST = CV_FLT_TO_FIX( metrics[2], DIST_SHIFT );
const unsigned int HV_DIST = CV_FLT_TO_FIX( metrics[0], DIST_SHIFT );
const unsigned int DIAG_DIST = CV_FLT_TO_FIX( metrics[1], DIST_SHIFT );
const unsigned int LONG_DIST = CV_FLT_TO_FIX( metrics[2], DIST_SHIFT );
const float scale = 1.f/(1 << DIST_SHIFT);
const uchar* src = _src.ptr();
@ -247,7 +250,7 @@ distanceTransformEx_5x5( const Mat& _src, Mat& _temp, Mat& _dist, Mat& _labels,
int srcstep = (int)(_src.step/sizeof(src[0]));
int step = (int)(_temp.step/sizeof(temp[0]));
int dststep = (int)(_dist.step/sizeof(dist[0]));
int lstep = (int)(_labels.step/sizeof(dist[0]));
int lstep = (int)(_labels.step/sizeof(labels[0]));
Size size = _src.size();
initTopBottom( _temp, BORDER );
@ -256,7 +259,7 @@ distanceTransformEx_5x5( const Mat& _src, Mat& _temp, Mat& _dist, Mat& _labels,
for( i = 0; i < size.height; i++ )
{
const uchar* s = src + i*srcstep;
int* tmp = (int*)(temp + (i+BORDER)*step) + BORDER;
unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER;
int* lls = (int*)(labels + i*lstep);
for( j = 0; j < BORDER; j++ )
@ -271,7 +274,7 @@ distanceTransformEx_5x5( const Mat& _src, Mat& _temp, Mat& _dist, Mat& _labels,
}
else
{
int t0 = INIT_DIST0, t;
unsigned int t0 = INIT_DIST0, t;
int l0 = 0;
t = tmp[j-step*2-1] + LONG_DIST;
@ -333,16 +336,16 @@ distanceTransformEx_5x5( const Mat& _src, Mat& _temp, Mat& _dist, Mat& _labels,
for( i = size.height - 1; i >= 0; i-- )
{
float* d = (float*)(dist + i*dststep);
int* tmp = (int*)(temp + (i+BORDER)*step) + BORDER;
unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER;
int* lls = (int*)(labels + i*lstep);
for( j = size.width - 1; j >= 0; j-- )
{
int t0 = tmp[j];
unsigned int t0 = tmp[j];
int l0 = lls[j];
if( t0 > HV_DIST )
{
int t = tmp[j+step*2+1] + LONG_DIST;
unsigned int t = tmp[j+step*2+1] + LONG_DIST;
if( t0 > t )
{
t0 = t;
@ -393,6 +396,7 @@ distanceTransformEx_5x5( const Mat& _src, Mat& _temp, Mat& _dist, Mat& _labels,
tmp[j] = t0;
lls[j] = l0;
}
t0 = (t0 > DIST_MAX) ? DIST_MAX : t0;
d[j] = (float)(t0 * scale);
}
}

View File

@ -340,51 +340,199 @@ static void hlineResizeCn(ET* src, int cn, int *ofst, FT* m, FT* dst, int dst_mi
hline<ET, FT, n, mulall, cncnt>::ResizeCn(src, cn, ofst, m, dst, dst_min, dst_max, dst_width);
};
#if CV_SIMD512
inline void v_load_indexed1(uint8_t* src, int *ofst, v_uint16 &v_src0, v_uint16 &v_src1)
{
v_expand(v_reinterpret_as_u8(v_uint16(
*((uint16_t*)(src + ofst[ 0])), *((uint16_t*)(src + ofst[ 1])), *((uint16_t*)(src + ofst[ 2])), *((uint16_t*)(src + ofst[ 3])),
*((uint16_t*)(src + ofst[ 4])), *((uint16_t*)(src + ofst[ 5])), *((uint16_t*)(src + ofst[ 6])), *((uint16_t*)(src + ofst[ 7])),
*((uint16_t*)(src + ofst[ 8])), *((uint16_t*)(src + ofst[ 9])), *((uint16_t*)(src + ofst[10])), *((uint16_t*)(src + ofst[11])),
*((uint16_t*)(src + ofst[12])), *((uint16_t*)(src + ofst[13])), *((uint16_t*)(src + ofst[14])), *((uint16_t*)(src + ofst[15])),
*((uint16_t*)(src + ofst[16])), *((uint16_t*)(src + ofst[17])), *((uint16_t*)(src + ofst[14])), *((uint16_t*)(src + ofst[15])),
*((uint16_t*)(src + ofst[20])), *((uint16_t*)(src + ofst[21])), *((uint16_t*)(src + ofst[14])), *((uint16_t*)(src + ofst[15])),
*((uint16_t*)(src + ofst[24])), *((uint16_t*)(src + ofst[25])), *((uint16_t*)(src + ofst[14])), *((uint16_t*)(src + ofst[15])),
*((uint16_t*)(src + ofst[28])), *((uint16_t*)(src + ofst[29])), *((uint16_t*)(src + ofst[14])), *((uint16_t*)(src + ofst[15])))),
v_src0, v_src1);
}
inline void v_load_indexed2(uint8_t* src, int *ofst, v_uint16 &v_src0, v_uint16 &v_src1)
{
v_expand(v_reinterpret_as_u8(v_uint32(
*((uint32_t*)(src + 2 * ofst[ 0])), *((uint32_t*)(src + 2 * ofst[ 1])), *((uint32_t*)(src + 2 * ofst[ 2])), *((uint32_t*)(src + 2 * ofst[ 3])),
*((uint32_t*)(src + 2 * ofst[ 4])), *((uint32_t*)(src + 2 * ofst[ 5])), *((uint32_t*)(src + 2 * ofst[ 6])), *((uint32_t*)(src + 2 * ofst[ 7])),
*((uint32_t*)(src + 2 * ofst[ 8])), *((uint32_t*)(src + 2 * ofst[ 9])), *((uint32_t*)(src + 2 * ofst[10])), *((uint32_t*)(src + 2 * ofst[11])),
*((uint32_t*)(src + 2 * ofst[12])), *((uint32_t*)(src + 2 * ofst[13])), *((uint32_t*)(src + 2 * ofst[14])), *((uint32_t*)(src + 2 * ofst[15])))),
v_src0, v_src1);
v_uint32 v_tmp0, v_tmp1, v_tmp2, v_tmp3;
v_zip(v_reinterpret_as_u32(v_src0), v_reinterpret_as_u32(v_src1), v_tmp2, v_tmp3);
v_zip(v_tmp2, v_tmp3, v_tmp0, v_tmp1);
v_zip(v_tmp0, v_tmp1, v_tmp2, v_tmp3);
v_zip(v_tmp2, v_tmp3, v_tmp0, v_tmp1);
v_zip(v_reinterpret_as_u16(v_tmp0), v_reinterpret_as_u16(v_tmp1), v_src0, v_src1);
}
inline void v_load_indexed4(uint8_t* src, int *ofst, v_uint16 &v_src0, v_uint16 &v_src1)
{
v_expand(v_reinterpret_as_u8(v_uint64(
*((uint64_t*)(src + 4 * ofst[0])), *((uint64_t*)(src + 4 * ofst[1])), *((uint64_t*)(src + 4 * ofst[2])), *((uint64_t*)(src + 4 * ofst[3])),
*((uint64_t*)(src + 4 * ofst[4])), *((uint64_t*)(src + 4 * ofst[5])), *((uint64_t*)(src + 4 * ofst[6])), *((uint64_t*)(src + 4 * ofst[7])))),
v_src0, v_src1);
v_uint64 v_tmp0, v_tmp1, v_tmp2, v_tmp3;
v_zip(v_reinterpret_as_u64(v_src0), v_reinterpret_as_u64(v_src1), v_tmp2, v_tmp3);
v_zip(v_tmp2, v_tmp3, v_tmp0, v_tmp1);
v_zip(v_tmp0, v_tmp1, v_tmp2, v_tmp3);
v_zip(v_reinterpret_as_u16(v_tmp2), v_reinterpret_as_u16(v_tmp3), v_src0, v_src1);
}
inline void v_load_indexed_deinterleave(uint16_t* src, int *ofst, v_uint32 &v_src0, v_uint32 &v_src1)
{
v_expand(v_reinterpret_as_u16(v_uint32(
*((uint32_t*)(src + ofst[ 0])), *((uint32_t*)(src + ofst[ 1])), *((uint32_t*)(src + ofst[ 2])), *((uint32_t*)(src + ofst[ 3])),
*((uint32_t*)(src + ofst[ 4])), *((uint32_t*)(src + ofst[ 5])), *((uint32_t*)(src + ofst[ 6])), *((uint32_t*)(src + ofst[ 7])),
*((uint32_t*)(src + ofst[ 8])), *((uint32_t*)(src + ofst[ 9])), *((uint32_t*)(src + ofst[10])), *((uint32_t*)(src + ofst[11])),
*((uint32_t*)(src + ofst[12])), *((uint32_t*)(src + ofst[13])), *((uint32_t*)(src + ofst[14])), *((uint32_t*)(src + ofst[15])))),
v_src0, v_src1);
v_uint32 v_tmp0, v_tmp1;
v_zip(v_src0, v_src1, v_tmp0, v_tmp1);
v_zip(v_tmp0, v_tmp1, v_src0, v_src1);
v_zip(v_src0, v_src1, v_tmp0, v_tmp1);
v_zip(v_tmp0, v_tmp1, v_src0, v_src1);
}
#elif CV_SIMD256
inline void v_load_indexed1(uint8_t* src, int *ofst, v_uint16 &v_src0, v_uint16 &v_src1)
{
v_expand(v_reinterpret_as_u8(v_uint16(
*((uint16_t*)(src + ofst[ 0])), *((uint16_t*)(src + ofst[ 1])), *((uint16_t*)(src + ofst[ 2])), *((uint16_t*)(src + ofst[ 3])),
*((uint16_t*)(src + ofst[ 4])), *((uint16_t*)(src + ofst[ 5])), *((uint16_t*)(src + ofst[ 6])), *((uint16_t*)(src + ofst[ 7])),
*((uint16_t*)(src + ofst[ 8])), *((uint16_t*)(src + ofst[ 9])), *((uint16_t*)(src + ofst[10])), *((uint16_t*)(src + ofst[11])),
*((uint16_t*)(src + ofst[12])), *((uint16_t*)(src + ofst[13])), *((uint16_t*)(src + ofst[14])), *((uint16_t*)(src + ofst[15])))),
v_src0, v_src1);
}
inline void v_load_indexed2(uint8_t* src, int *ofst, v_uint16 &v_src0, v_uint16 &v_src1)
{
v_expand(v_reinterpret_as_u8(v_uint32(
*((uint32_t*)(src + 2 * ofst[0])), *((uint32_t*)(src + 2 * ofst[1])), *((uint32_t*)(src + 2 * ofst[2])), *((uint32_t*)(src + 2 * ofst[3])),
*((uint32_t*)(src + 2 * ofst[4])), *((uint32_t*)(src + 2 * ofst[5])), *((uint32_t*)(src + 2 * ofst[6])), *((uint32_t*)(src + 2 * ofst[7])))),
v_src0, v_src1);
v_uint32 v_tmp0, v_tmp1, v_tmp2, v_tmp3;
v_zip(v_reinterpret_as_u32(v_src0), v_reinterpret_as_u32(v_src1), v_tmp2, v_tmp3);
v_zip(v_tmp2, v_tmp3, v_tmp0, v_tmp1);
v_zip(v_tmp0, v_tmp1, v_tmp2, v_tmp3);
v_zip(v_reinterpret_as_u16(v_tmp2), v_reinterpret_as_u16(v_tmp3), v_src0, v_src1);
}
inline void v_load_indexed4(uint8_t* src, int *ofst, v_uint16 &v_src0, v_uint16 &v_src1)
{
v_expand(v_reinterpret_as_u8(v_uint64(
*((uint64_t*)(src + 4 * ofst[0])), *((uint64_t*)(src + 4 * ofst[1])), *((uint64_t*)(src + 4 * ofst[2])), *((uint64_t*)(src + 4 * ofst[3])))),
v_src0, v_src1);
v_uint64 v_tmp0, v_tmp1, v_tmp2, v_tmp3;
v_zip(v_reinterpret_as_u64(v_src0), v_reinterpret_as_u64(v_src1), v_tmp2, v_tmp3);
v_zip(v_tmp2, v_tmp3, v_tmp0, v_tmp1);
v_zip(v_reinterpret_as_u16(v_tmp0), v_reinterpret_as_u16(v_tmp1), v_src0, v_src1);
}
inline void v_load_indexed_deinterleave(uint16_t* src, int *ofst, v_uint32 &v_src0, v_uint32 &v_src1)
{
v_uint32 v_tmp0, v_tmp1;
v_expand(v_reinterpret_as_u16(v_uint32(
*((uint32_t*)(src + ofst[0])), *((uint32_t*)(src + ofst[1])), *((uint32_t*)(src + ofst[2])), *((uint32_t*)(src + ofst[3])),
*((uint32_t*)(src + ofst[4])), *((uint32_t*)(src + ofst[5])), *((uint32_t*)(src + ofst[6])), *((uint32_t*)(src + ofst[7])))),
v_tmp0, v_tmp1);
v_zip(v_tmp0, v_tmp1, v_src0, v_src1);
v_zip(v_src0, v_src1, v_tmp0, v_tmp1);
v_zip(v_tmp0, v_tmp1, v_src0, v_src1);
}
#elif CV_SIMD128
inline void v_load_indexed1(uint8_t* src, int *ofst, v_uint16 &v_src0, v_uint16 &v_src1)
{
uint16_t buf[8];
buf[0] = *((uint16_t*)(src + ofst[0]));
buf[1] = *((uint16_t*)(src + ofst[1]));
buf[2] = *((uint16_t*)(src + ofst[2]));
buf[3] = *((uint16_t*)(src + ofst[3]));
buf[4] = *((uint16_t*)(src + ofst[4]));
buf[5] = *((uint16_t*)(src + ofst[5]));
buf[6] = *((uint16_t*)(src + ofst[6]));
buf[7] = *((uint16_t*)(src + ofst[7]));
v_src0 = vx_load_expand((uint8_t*)buf);
v_src1 = vx_load_expand((uint8_t*)buf + 8);
}
inline void v_load_indexed2(uint8_t* src, int *ofst, v_uint16 &v_src0, v_uint16 &v_src1)
{
uint32_t buf[4];
buf[0] = *((uint32_t*)(src + 2 * ofst[0]));
buf[1] = *((uint32_t*)(src + 2 * ofst[1]));
buf[2] = *((uint32_t*)(src + 2 * ofst[2]));
buf[3] = *((uint32_t*)(src + 2 * ofst[3]));
v_uint32 v_tmp0, v_tmp1, v_tmp2, v_tmp3;
v_tmp0 = v_reinterpret_as_u32(vx_load_expand((uint8_t*)buf));
v_tmp1 = v_reinterpret_as_u32(vx_load_expand((uint8_t*)buf + 8));
v_zip(v_tmp0, v_tmp1, v_tmp2, v_tmp3);
v_zip(v_tmp2, v_tmp3, v_tmp0, v_tmp1);
v_zip(v_reinterpret_as_u16(v_tmp0), v_reinterpret_as_u16(v_tmp1), v_src0, v_src1);
}
inline void v_load_indexed4(uint8_t* src, int *ofst, v_uint16 &v_src0, v_uint16 &v_src1)
{
v_uint16 v_tmp0, v_tmp1;
v_src0 = vx_load_expand(src + 4 * ofst[0]);
v_src1 = vx_load_expand(src + 4 * ofst[1]);
v_recombine(v_src0, v_src1, v_tmp0, v_tmp1);
v_zip(v_tmp0, v_tmp1, v_src0, v_src1);
}
inline void v_load_indexed_deinterleave(uint16_t* src, int *ofst, v_uint32 &v_src0, v_uint32 &v_src1)
{
uint32_t buf[4];
buf[0] = *((uint32_t*)(src + ofst[0]));
buf[1] = *((uint32_t*)(src + ofst[1]));
buf[2] = *((uint32_t*)(src + ofst[2]));
buf[3] = *((uint32_t*)(src + ofst[3]));
v_src0 = vx_load_expand((uint16_t*)buf);
v_src1 = vx_load_expand((uint16_t*)buf + 4);
v_uint32 v_tmp0, v_tmp1;
v_zip(v_src0, v_src1, v_tmp0, v_tmp1);
v_zip(v_tmp0, v_tmp1, v_src0, v_src1);
}
#endif
template <>
void hlineResizeCn<uint8_t, ufixedpoint16, 2, true, 1>(uint8_t* src, int, int *ofst, ufixedpoint16* m, ufixedpoint16* dst, int dst_min, int dst_max, int dst_width)
{
int i = 0;
ufixedpoint16 src_0(src[0]);
v_uint16x8 v_src_0 = v_setall_u16(*((uint16_t*)&src_0));
for (; i < dst_min - 7; i += 8, m += 16, dst += 8) // Points that fall left from src image so became equal to leftmost src point
#if CV_SIMD
const int VECSZ = v_uint16::nlanes;
v_uint16 v_src_0 = vx_setall_u16(*((uint16_t*)&src_0));
for (; i <= dst_min - VECSZ; i += VECSZ, m += 2*VECSZ, dst += VECSZ) // Points that fall left from src image so became equal to leftmost src point
{
v_store((uint16_t*)dst, v_src_0);
}
#endif
for (; i < dst_min; i++, m += 2)
{
*(dst++) = src_0;
}
for (; i < dst_max - 7 && ofst[i + 7] + 15 <= ofst[dst_width - 1]; i += 8, m += 16, dst += 8)
#if CV_SIMD
for (; i <= dst_max - VECSZ; i += VECSZ, m += 2*VECSZ, dst += VECSZ)
{
v_uint32x4 v_src01 = v_combine_low(v_reinterpret_as_u32(v_load_expand(src + ofst[i ])), v_reinterpret_as_u32(v_load_expand(src + ofst[i + 1])));
v_uint32x4 v_src23 = v_combine_low(v_reinterpret_as_u32(v_load_expand(src + ofst[i + 2])), v_reinterpret_as_u32(v_load_expand(src + ofst[i + 3])));
v_uint32x4 v_src45 = v_combine_low(v_reinterpret_as_u32(v_load_expand(src + ofst[i + 4])), v_reinterpret_as_u32(v_load_expand(src + ofst[i + 5])));
v_uint32x4 v_src67 = v_combine_low(v_reinterpret_as_u32(v_load_expand(src + ofst[i + 6])), v_reinterpret_as_u32(v_load_expand(src + ofst[i + 7])));
v_uint16 v_src0, v_src1;
v_load_indexed1(src, ofst + i, v_src0, v_src1);
v_uint32x4 v_zip02, v_zip13, v_zip46, v_zip57;
v_zip(v_src01, v_src23, v_zip02, v_zip13);
v_zip(v_src45, v_src67, v_zip46, v_zip57);
v_uint32x4 v_src0, v_src1;
v_zip(v_combine_low(v_zip02, v_zip46), v_combine_low(v_zip13, v_zip57), v_src0, v_src1);
v_int16x8 v_mul0 = v_load((int16_t*)m);
v_int16x8 v_mul1 = v_load((int16_t*)m + 8);
v_uint32x4 v_res0 = v_reinterpret_as_u32(v_dotprod(v_reinterpret_as_s16(v_src0), v_mul0));
v_uint32x4 v_res1 = v_reinterpret_as_u32(v_dotprod(v_reinterpret_as_s16(v_src1), v_mul1));
v_int16 v_mul0 = vx_load((int16_t*)m);
v_int16 v_mul1 = vx_load((int16_t*)m + VECSZ);
v_uint32 v_res0 = v_reinterpret_as_u32(v_dotprod(v_reinterpret_as_s16(v_src0), v_mul0));
v_uint32 v_res1 = v_reinterpret_as_u32(v_dotprod(v_reinterpret_as_s16(v_src1), v_mul1));
v_store((uint16_t*)dst, v_pack(v_res0, v_res1));
}
#endif
for (; i < dst_max; i += 1, m += 2)
{
uint8_t* px = src + ofst[i];
*(dst++) = m[0] * px[0] + m[1] * px[1];
}
src_0 = (src + ofst[dst_width - 1])[0];
v_src_0 = v_setall_u16(*((uint16_t*)&src_0));
for (; i < dst_width - 7; i += 8, dst += 8) // Points that fall left from src image so became equal to leftmost src point
#if CV_SIMD
v_src_0 = vx_setall_u16(*((uint16_t*)&src_0));
for (; i <= dst_width - VECSZ; i += VECSZ, dst += VECSZ) // Points that fall left from src image so became equal to leftmost src point
{
v_store((uint16_t*)dst, v_src_0);
}
vx_cleanup();
#endif
for (; i < dst_width; i++)
{
*(dst++) = src_0;
@ -394,87 +542,109 @@ template <>
void hlineResizeCn<uint8_t, ufixedpoint16, 2, true, 2>(uint8_t* src, int, int *ofst, ufixedpoint16* m, ufixedpoint16* dst, int dst_min, int dst_max, int dst_width)
{
int i = 0;
ufixedpoint16 srccn[8] = { src[0], src[1], src[0], src[1], src[0], src[1], src[0], src[1] };
v_uint16x8 v_srccn = v_load((uint16_t*)srccn);
for (; i < dst_min - 3; i += 4, m += 8, dst += 8) // Points that fall left from src image so became equal to leftmost src point
union {
uint32_t d;
uint16_t w[2];
} srccn;
((ufixedpoint16*)(srccn.w))[0] = src[0];
((ufixedpoint16*)(srccn.w))[1] = src[1];
#if CV_SIMD
const int VECSZ = v_uint16::nlanes;
v_uint16 v_srccn = v_reinterpret_as_u16(vx_setall_u32(srccn.d));
for (; i <= dst_min - VECSZ/2; i += VECSZ/2, m += VECSZ, dst += VECSZ) // Points that fall left from src image so became equal to leftmost src point
{
v_store((uint16_t*)dst, v_srccn);
}
#endif
for (; i < dst_min; i++, m += 2)
{
*(dst++) = srccn[0];
*(dst++) = srccn[1];
*(dst++) = ((ufixedpoint16*)(srccn.w))[0];
*(dst++) = ((ufixedpoint16*)(srccn.w))[1];
}
for (; i < dst_max - 3 && ofst[i + 3] + 7 <= ofst[dst_width - 1]; i += 4, m += 8, dst += 8)
#if CV_SIMD
for (; i <= dst_max - VECSZ/2; i += VECSZ/2, m += VECSZ, dst += VECSZ)
{
v_uint32x4 v_src0 = v_combine_low(v_reinterpret_as_u32(v_load_expand(src + 2 * ofst[i ])), v_reinterpret_as_u32(v_load_expand(src + 2 * ofst[i + 1])));
v_uint32x4 v_src1 = v_combine_low(v_reinterpret_as_u32(v_load_expand(src + 2 * ofst[i + 2])), v_reinterpret_as_u32(v_load_expand(src + 2 * ofst[i + 3])));
v_uint16 v_src0, v_src1;
v_load_indexed2(src, ofst + i, v_src0, v_src1);
v_uint32x4 v_zip0, v_zip1;
v_zip(v_src0, v_src1, v_zip0, v_zip1);
v_zip(v_zip0, v_zip1, v_src0, v_src1);
v_int16x8 v_src0123, v_src4567;
v_zip(v_reinterpret_as_s16(v_src0), v_reinterpret_as_s16(v_src1), v_src0123, v_src4567);
v_uint32x4 v_mul = v_load((uint32_t*)m);//AaBbCcDd
v_uint32 v_mul = vx_load((uint32_t*)m);//AaBbCcDd
v_uint32 v_zip0, v_zip1;
v_zip(v_mul, v_mul, v_zip0, v_zip1);//AaAaBbBb CcCcDdDd
v_uint32x4 v_res0 = v_reinterpret_as_u32(v_dotprod(v_src0123, v_reinterpret_as_s16(v_zip0)));
v_uint32x4 v_res1 = v_reinterpret_as_u32(v_dotprod(v_src4567, v_reinterpret_as_s16(v_zip1)));
v_uint32 v_res0 = v_reinterpret_as_u32(v_dotprod(v_reinterpret_as_s16(v_src0), v_reinterpret_as_s16(v_zip0)));
v_uint32 v_res1 = v_reinterpret_as_u32(v_dotprod(v_reinterpret_as_s16(v_src1), v_reinterpret_as_s16(v_zip1)));
v_store((uint16_t*)dst, v_pack(v_res0, v_res1));//AB1AB2CD1CD2
}
#endif
for (; i < dst_max; i += 1, m += 2)
{
uint8_t* px = src + 2 * ofst[i];
*(dst++) = m[0] * px[0] + m[1] * px[2];
*(dst++) = m[0] * px[1] + m[1] * px[3];
}
srccn[0] = (src + 2 * ofst[dst_width - 1])[0]; srccn[1] = (src + 2 * ofst[dst_width - 1])[1]; srccn[2] = (src + 2 * ofst[dst_width - 1])[0]; srccn[3] = (src + 2 * ofst[dst_width - 1])[1];
srccn[4] = (src + 2 * ofst[dst_width - 1])[0]; srccn[5] = (src + 2 * ofst[dst_width - 1])[1]; srccn[6] = (src + 2 * ofst[dst_width - 1])[0]; srccn[7] = (src + 2 * ofst[dst_width - 1])[1];
v_srccn = v_load((uint16_t*)srccn);
for (; i < dst_width - 3; i += 4, dst += 8) // Points that fall left from src image so became equal to leftmost src point
((ufixedpoint16*)(srccn.w))[0] = (src + 2 * ofst[dst_width - 1])[0]; ((ufixedpoint16*)(srccn.w))[1] = (src + 2 * ofst[dst_width - 1])[1];
#if CV_SIMD
v_srccn = v_reinterpret_as_u16(vx_setall_u32(srccn.d));
for (; i <= dst_width - VECSZ/2; i += VECSZ/2, dst += VECSZ) // Points that fall left from src image so became equal to leftmost src point
{
v_store((uint16_t*)dst, v_srccn);
}
vx_cleanup();
#endif
for (; i < dst_width; i++)
{
*(dst++) = srccn[0];
*(dst++) = srccn[1];
*(dst++) = ((ufixedpoint16*)(srccn.w))[0];
*(dst++) = ((ufixedpoint16*)(srccn.w))[1];
}
}
template <>
void hlineResizeCn<uint8_t, ufixedpoint16, 2, true, 4>(uint8_t* src, int, int *ofst, ufixedpoint16* m, ufixedpoint16* dst, int dst_min, int dst_max, int dst_width)
{
int i = 0;
ufixedpoint16 srccn[8] = { src[0], src[1], src[2], src[3], src[0], src[1], src[2], src[3] };
v_uint16x8 v_srccn = v_load((uint16_t*)srccn);
for (; i < dst_min - 1; i += 2, m += 4, dst += 8) // Points that fall left from src image so became equal to leftmost src point
union {
uint64_t q;
uint16_t w[4];
} srccn;
((ufixedpoint16*)(srccn.w))[0] = src[0];
((ufixedpoint16*)(srccn.w))[1] = src[1];
((ufixedpoint16*)(srccn.w))[2] = src[2];
((ufixedpoint16*)(srccn.w))[3] = src[3];
#if CV_SIMD
const int VECSZ = v_uint16::nlanes;
v_uint16 v_srccn = v_reinterpret_as_u16(vx_setall_u64(srccn.q));
for (; i <= dst_min - VECSZ/4; i += VECSZ/4, m += VECSZ/2, dst += VECSZ) // Points that fall left from src image so became equal to leftmost src point
{
v_store((uint16_t*)dst, v_srccn);
}
#endif
if (i < dst_min) // Points that fall left from src image so became equal to leftmost src point
{
*(dst++) = srccn[0];
*(dst++) = srccn[1];
*(dst++) = srccn[2];
*(dst++) = srccn[3];
*(dst++) = ((ufixedpoint16*)(srccn.w))[0];
*(dst++) = ((ufixedpoint16*)(srccn.w))[1];
*(dst++) = ((ufixedpoint16*)(srccn.w))[2];
*(dst++) = ((ufixedpoint16*)(srccn.w))[3];
i++; m += 2;
}
for (; i < dst_max - 1 && ofst[i + 1] + 3 <= ofst[dst_width - 1]; i += 2, m += 4, dst += 8)
#if CV_SIMD
for (; i <= dst_max - VECSZ/2; i += VECSZ/2, m += VECSZ, dst += 2*VECSZ)
{
v_int16x8 v_src01 = v_reinterpret_as_s16(v_load_expand(src + 4 * ofst[i ]));
v_int16x8 v_src23 = v_reinterpret_as_s16(v_load_expand(src + 4 * ofst[i + 1]));
v_uint16 v_src0, v_src1, v_src2, v_src3;
v_load_indexed4(src, ofst + i, v_src0, v_src1);
v_load_indexed4(src, ofst + i + VECSZ/4, v_src2, v_src3);
v_int16x8 v_tmp0, v_tmp1;
v_recombine(v_src01, v_src23, v_tmp0, v_tmp1);
v_zip(v_tmp0, v_tmp1, v_src01, v_src23);
v_uint32 v_mul0, v_mul1, v_mul2, v_mul3, v_tmp;
v_mul0 = vx_load((uint32_t*)m);//AaBbCcDd
v_zip(v_mul0, v_mul0, v_mul3, v_tmp );//AaAaBbBb CcCcDdDd
v_zip(v_mul3, v_mul3, v_mul0, v_mul1);//AaAaAaAa BbBbBbBb
v_zip(v_tmp , v_tmp , v_mul2, v_mul3);//CcCcCcCc DdDdDdDd
v_int16x8 v_mul01 = v_reinterpret_as_s16(v_setall_u32(((uint32_t*)m)[0]));//AaAaAaAa
v_int16x8 v_mul23 = v_reinterpret_as_s16(v_setall_u32(((uint32_t*)m)[1]));//BbBbBbBb
v_uint32x4 v_res0 = v_reinterpret_as_u32(v_dotprod(v_src01, v_mul01));
v_uint32x4 v_res1 = v_reinterpret_as_u32(v_dotprod(v_src23, v_mul23));
v_store((uint16_t*)dst, v_pack(v_res0, v_res1));//AB1AB2CD1CD2
v_uint32 v_res0 = v_reinterpret_as_u32(v_dotprod(v_reinterpret_as_s16(v_src0), v_reinterpret_as_s16(v_mul0)));
v_uint32 v_res1 = v_reinterpret_as_u32(v_dotprod(v_reinterpret_as_s16(v_src1), v_reinterpret_as_s16(v_mul1)));
v_uint32 v_res2 = v_reinterpret_as_u32(v_dotprod(v_reinterpret_as_s16(v_src2), v_reinterpret_as_s16(v_mul2)));
v_uint32 v_res3 = v_reinterpret_as_u32(v_dotprod(v_reinterpret_as_s16(v_src3), v_reinterpret_as_s16(v_mul3)));
v_store((uint16_t*)dst , v_pack(v_res0, v_res1));
v_store((uint16_t*)dst + VECSZ, v_pack(v_res2, v_res3));
}
#endif
for (; i < dst_max; i += 1, m += 2)
{
uint8_t* px = src + 4 * ofst[i];
@ -483,19 +653,22 @@ void hlineResizeCn<uint8_t, ufixedpoint16, 2, true, 4>(uint8_t* src, int, int *o
*(dst++) = m[0] * px[2] + m[1] * px[6];
*(dst++) = m[0] * px[3] + m[1] * px[7];
}
srccn[0] = (src + 4 * ofst[dst_width - 1])[0]; srccn[1] = (src + 4 * ofst[dst_width - 1])[1]; srccn[2] = (src + 4 * ofst[dst_width - 1])[2]; srccn[3] = (src + 4 * ofst[dst_width - 1])[3];
srccn[4] = (src + 4 * ofst[dst_width - 1])[0]; srccn[5] = (src + 4 * ofst[dst_width - 1])[1]; srccn[6] = (src + 4 * ofst[dst_width - 1])[2]; srccn[7] = (src + 4 * ofst[dst_width - 1])[3];
v_srccn = v_load((uint16_t*)srccn);
for (; i < dst_width - 1; i += 2, dst += 8) // Points that fall right from src image so became equal to rightmost src point
((ufixedpoint16*)(srccn.w))[0] = (src + 4 * ofst[dst_width - 1])[0]; ((ufixedpoint16*)(srccn.w))[1] = (src + 4 * ofst[dst_width - 1])[1];
((ufixedpoint16*)(srccn.w))[2] = (src + 4 * ofst[dst_width - 1])[2]; ((ufixedpoint16*)(srccn.w))[3] = (src + 4 * ofst[dst_width - 1])[3];
#if CV_SIMD
v_srccn = v_reinterpret_as_u16(vx_setall_u64(srccn.q));
for (; i <= dst_width - VECSZ/4; i += VECSZ/4, dst += VECSZ) // Points that fall right from src image so became equal to rightmost src point
{
v_store((uint16_t*)dst, v_srccn);
}
vx_cleanup();
#endif
if (i < dst_width)
{
*(dst++) = srccn[0];
*(dst++) = srccn[1];
*(dst++) = srccn[2];
*(dst++) = srccn[3];
*(dst++) = ((ufixedpoint16*)(srccn.w))[0];
*(dst++) = ((ufixedpoint16*)(srccn.w))[1];
*(dst++) = ((ufixedpoint16*)(srccn.w))[2];
*(dst++) = ((ufixedpoint16*)(srccn.w))[3];
}
}
template <>
@ -503,40 +676,42 @@ void hlineResizeCn<uint16_t, ufixedpoint32, 2, true, 1>(uint16_t* src, int, int
{
int i = 0;
ufixedpoint32 src_0(src[0]);
v_uint32x4 v_src_0 = v_setall_u32(*((uint32_t*)&src_0));
for (; i < dst_min - 3; i += 4, m += 8, dst += 4) // Points that fall left from src image so became equal to leftmost src point
#if CV_SIMD
const int VECSZ = v_uint32::nlanes;
v_uint32 v_src_0 = vx_setall_u32(*((uint32_t*)&src_0));
for (; i <= dst_min - VECSZ; i += VECSZ, m += 2*VECSZ, dst += VECSZ) // Points that fall left from src image so became equal to leftmost src point
{
v_store((uint32_t*)dst, v_src_0);
}
#endif
for (; i < dst_min; i++, m += 2)
{
*(dst++) = src_0;
}
for (; i < dst_max - 3 && ofst[i + 3] + 8 <= ofst[dst_width - 1]; i += 4, m += 8, dst += 4)
#if CV_SIMD
for (; i <= dst_max - VECSZ; i += VECSZ, m += 2*VECSZ, dst += VECSZ)
{
v_uint32x4 v_src0 = v_combine_low(v_load_expand(src + ofst[i]), v_load_expand(src + ofst[i + 1]));
v_uint32x4 v_mul0 = v_load((uint32_t*)m);
v_uint32x4 v_src1 = v_combine_low(v_load_expand(src + ofst[i + 2]), v_load_expand(src + ofst[i + 3]));
v_uint32x4 v_mul1 = v_load((uint32_t*)m + 4);
v_uint32x4 v_res0 = v_src0 * v_mul0;//a1a2b1b2
v_uint32x4 v_res1 = v_src1 * v_mul1;//c1c2d1d2
v_uint32x4 v_tmp0, v_tmp1;
v_recombine(v_res0, v_res1, v_tmp0, v_tmp1);//a1a2c1c2 b1b2d1d2
v_zip(v_tmp0, v_tmp1, v_res0, v_res1);//a1b1a2b2 c1d1c2d2
v_recombine(v_res0, v_res1, v_tmp0, v_tmp1);//a1b1c1d1 a2b2c2d2
v_store((uint32_t*)dst, v_tmp0 + v_tmp1);//abcd
v_uint32 v_src0, v_src1;
v_load_indexed_deinterleave(src, ofst + i, v_src0, v_src1);
v_uint32 v_mul0, v_mul1;
v_load_deinterleave((uint32_t*)m, v_mul0, v_mul1);
v_store((uint32_t*)dst, v_src0 * v_mul0 + v_src1 * v_mul1);//abcd
}
#endif
for (; i < dst_max; i += 1, m += 2)
{
uint16_t* px = src + ofst[i];
*(dst++) = m[0] * px[0] + m[1] * px[1];
}
src_0 = (src + ofst[dst_width - 1])[0];
v_src_0 = v_setall_u32(*((uint32_t*)&src_0));
for (; i < dst_width - 3; i += 4, dst += 4)
#if CV_SIMD
v_src_0 = vx_setall_u32(*((uint32_t*)&src_0));
for (; i <= dst_width - VECSZ; i += VECSZ, dst += VECSZ)
{
v_store((uint32_t*)dst, v_src_0);
}
vx_cleanup();
#endif
for (; i < dst_width; i++)
{
*(dst++) = src_0;
@ -552,18 +727,22 @@ void vlineSet(FT* src, ET* dst, int dst_width)
template <>
void vlineSet<uint8_t, ufixedpoint16>(ufixedpoint16* src, uint8_t* dst, int dst_width)
{
static const v_uint16x8 v_fixedRound = v_setall_u16((uint16_t)((1U << 8) >> 1));
int i = 0;
for (; i < dst_width - 15; i += 16, src += 16, dst += 16)
#if CV_SIMD
const int VECSZ = v_uint8::nlanes;
static const v_uint16 v_fixedRound = vx_setall_u16((uint16_t)((1U << 8) >> 1));
for (; i <= dst_width - VECSZ; i += VECSZ, src += VECSZ, dst += VECSZ)
{
v_uint16x8 v_src0 = v_load((uint16_t*)src);
v_uint16x8 v_src1 = v_load((uint16_t*)src + 8);
v_uint16 v_src0 = vx_load((uint16_t*)src);
v_uint16 v_src1 = vx_load((uint16_t*)src + VECSZ/2);
v_uint16x8 v_res0 = (v_src0 + v_fixedRound) >> 8;
v_uint16x8 v_res1 = (v_src1 + v_fixedRound) >> 8;
v_uint16 v_res0 = (v_src0 + v_fixedRound) >> 8;
v_uint16 v_res1 = (v_src1 + v_fixedRound) >> 8;
v_store(dst, v_pack(v_res0, v_res1));
}
vx_cleanup();
#endif
for (; i < dst_width; i++)
*(dst++) = *(src++);
}
@ -582,36 +761,40 @@ void vlineResize(FT* src, size_t src_step, FT* m, ET* dst, int dst_width)
template <>
void vlineResize<uint8_t, ufixedpoint16, 2>(ufixedpoint16* src, size_t src_step, ufixedpoint16* m, uint8_t* dst, int dst_width)
{
static const v_int32x4 v_fixedRound = v_setall_s32((int32_t)((1 << 16) >> 1));
static const v_int16x8 v_128 = v_reinterpret_as_s16(v_setall_u16((uint16_t)1<<15));
static const v_int8x16 v_128_16 = v_reinterpret_as_s8 (v_setall_u8 ((uint8_t) 1<<7));
int i = 0;
ufixedpoint16* src1 = src + src_step;
v_int16x8 v_mul = v_reinterpret_as_s16(v_setall_u32(((uint32_t*)m)[0]));
for (; i < dst_width - 15; i += 16, src += 16, src1 += 16, dst += 16)
#if CV_SIMD
const int VECSZ = v_uint8::nlanes;
static const v_int32 v_fixedRound = vx_setall_s32((int32_t)((1 << 16) >> 1));
static const v_int16 v_128 = v_reinterpret_as_s16(vx_setall_u16((uint16_t)1<<15));
static const v_int8 v_128_16 = v_reinterpret_as_s8 (vx_setall_u8 ((uint8_t) 1<<7));
v_int16 v_mul = v_reinterpret_as_s16(vx_setall_u32(((uint32_t*)m)[0]));
for (; i <= dst_width - VECSZ; i += VECSZ, src += VECSZ, src1 += VECSZ, dst += VECSZ)
{
v_int16x8 v_src00 = v_load((int16_t*)src);
v_int16x8 v_src10 = v_load((int16_t*)src1);
v_int16x8 v_tmp0, v_tmp1;
v_int16 v_src00 = vx_load((int16_t*)src);
v_int16 v_src10 = vx_load((int16_t*)src1);
v_int16 v_tmp0, v_tmp1;
v_zip(v_add_wrap(v_src00,v_128), v_add_wrap(v_src10,v_128), v_tmp0, v_tmp1);
v_int32x4 v_res0 = v_dotprod(v_tmp0, v_mul);
v_int32x4 v_res1 = v_dotprod(v_tmp1, v_mul);
v_int32 v_res0 = v_dotprod(v_tmp0, v_mul);
v_int32 v_res1 = v_dotprod(v_tmp1, v_mul);
v_int16x8 v_src01 = v_load((int16_t*)src + 8);
v_int16x8 v_src11 = v_load((int16_t*)src1 + 8);
v_int16 v_src01 = vx_load((int16_t*)src + VECSZ/2);
v_int16 v_src11 = vx_load((int16_t*)src1 + VECSZ/2);
v_zip(v_add_wrap(v_src01,v_128), v_add_wrap(v_src11,v_128), v_tmp0, v_tmp1);
v_int32x4 v_res2 = v_dotprod(v_tmp0, v_mul);
v_int32x4 v_res3 = v_dotprod(v_tmp1, v_mul);
v_int32 v_res2 = v_dotprod(v_tmp0, v_mul);
v_int32 v_res3 = v_dotprod(v_tmp1, v_mul);
v_int8x16 v_res = v_pack(v_pack((v_res0 + v_fixedRound) >> 16,
(v_res1 + v_fixedRound) >> 16),
v_pack((v_res2 + v_fixedRound) >> 16,
(v_res3 + v_fixedRound) >> 16));
v_int8 v_res = v_pack(v_pack((v_res0 + v_fixedRound) >> 16,
(v_res1 + v_fixedRound) >> 16),
v_pack((v_res2 + v_fixedRound) >> 16,
(v_res3 + v_fixedRound) >> 16));
v_store(dst, v_reinterpret_as_u8(v_sub_wrap(v_res, v_128_16)));
}
vx_cleanup();
#endif
for (; i < dst_width; i++)
{
*(dst++) = (uint8_t)(*(src++) * m[0] + *(src1++) * m[1]);

View File

@ -407,27 +407,25 @@ void cv::pyrMeanShiftFiltering( InputArray _src, OutputArray _dst,
cv::Size size = src.size();
const uchar* sptr = src.ptr();
int sstep = (int)src.step;
uchar* mask = 0;
int mstep = 0;
uchar* dptr;
int dstep;
float sp = (float)(sp0 / (1 << level));
sp = MAX( sp, 1 );
cv::Mat m;
if( level < max_level )
{
cv::Size size1 = dst_pyramid[level+1].size();
cv::Mat m( size.height, size.width, CV_8UC1, mask0.ptr() );
m = cv::Mat(size.height, size.width, CV_8UC1, mask0.ptr());
dstep = (int)dst_pyramid[level+1].step;
dptr = dst_pyramid[level+1].ptr() + dstep + cn;
mstep = (int)m.step;
mask = m.ptr() + mstep;
//cvResize( dst_pyramid[level+1], dst_pyramid[level], CV_INTER_CUBIC );
cv::pyrUp( dst_pyramid[level+1], dst_pyramid[level], dst_pyramid[level].size() );
m.setTo(cv::Scalar::all(0));
for( i = 1; i < size1.height-1; i++, dptr += dstep - (size1.width-2)*3, mask += mstep*2 )
for( i = 1; i < size1.height-1; i++, dptr += dstep - (size1.width-2)*3)
{
uchar* mask = m.ptr(1 + i * 2);
for( j = 1; j < size1.width-1; j++, dptr += cn )
{
int c0 = dptr[0], c1 = dptr[1], c2 = dptr[2];
@ -437,16 +435,16 @@ void cv::pyrMeanShiftFiltering( InputArray _src, OutputArray _dst,
}
cv::dilate( m, m, cv::Mat() );
mask = m.ptr();
}
dptr = dst_pyramid[level].ptr();
dstep = (int)dst_pyramid[level].step;
for( i = 0; i < size.height; i++, sptr += sstep - size.width*3,
dptr += dstep - size.width*3,
mask += mstep )
dptr += dstep - size.width*3
)
{
uchar* mask = m.empty() ? NULL : m.ptr(i);
for( j = 0; j < size.width; j++, sptr += 3, dptr += 3 )
{
int x0 = j, y0 = i, x1, y1, iter;

View File

@ -1820,22 +1820,13 @@ template <>
void hlineSmooth1N<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, const ufixedpoint16* m, int, ufixedpoint16* dst, int len, int)
{
int lencn = len*cn;
v_uint16x8 v_mul = v_setall_u16(*((uint16_t*)m));
int i = 0;
for (; i <= lencn - 16; i += 16)
{
v_uint8x16 v_src = v_load(src + i);
v_uint16x8 v_tmp0, v_tmp1;
v_expand(v_src, v_tmp0, v_tmp1);
v_store((uint16_t*)dst + i, v_mul*v_tmp0);
v_store((uint16_t*)dst + i + 8, v_mul*v_tmp1);
}
if (i <= lencn - 8)
{
v_uint16x8 v_src = v_load_expand(src + i);
v_store((uint16_t*)dst + i, v_mul*v_src);
i += 8;
}
#if CV_SIMD
const int VECSZ = v_uint16::nlanes;
v_uint16 v_mul = vx_setall_u16(*((uint16_t*)m));
for (; i <= lencn - VECSZ; i += VECSZ)
v_store((uint16_t*)dst + i, v_mul*vx_load_expand(src + i));
#endif
for (; i < lencn; i++)
dst[i] = m[0] * src[i];
}
@ -1850,20 +1841,11 @@ void hlineSmooth1N1<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, const uf
{
int lencn = len*cn;
int i = 0;
for (; i <= lencn - 16; i += 16)
{
v_uint8x16 v_src = v_load(src + i);
v_uint16x8 v_tmp0, v_tmp1;
v_expand(v_src, v_tmp0, v_tmp1);
v_store((uint16_t*)dst + i, v_shl<8>(v_tmp0));
v_store((uint16_t*)dst + i + 8, v_shl<8>(v_tmp1));
}
if (i <= lencn - 8)
{
v_uint16x8 v_src = v_load_expand(src + i);
v_store((uint16_t*)dst + i, v_shl<8>(v_src));
i += 8;
}
#if CV_SIMD
const int VECSZ = v_uint16::nlanes;
for (; i <= lencn - VECSZ; i += VECSZ)
v_store((uint16_t*)dst + i, v_shl<8>(vx_load_expand(src + i)));
#endif
for (; i < lencn; i++)
dst[i] = src[i];
}
@ -1926,18 +1908,15 @@ void hlineSmooth3N<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, const ufi
src += cn; dst += cn;
int i = cn, lencn = (len - 1)*cn;
v_uint16x8 v_mul0 = v_setall_u16(*((uint16_t*)m));
v_uint16x8 v_mul1 = v_setall_u16(*((uint16_t*)(m + 1)));
v_uint16x8 v_mul2 = v_setall_u16(*((uint16_t*)(m + 2)));
for (; i <= lencn - 16; i += 16, src += 16, dst += 16)
{
v_uint16x8 v_src00, v_src01, v_src10, v_src11, v_src20, v_src21;
v_expand(v_load(src - cn), v_src00, v_src01);
v_expand(v_load(src), v_src10, v_src11);
v_expand(v_load(src + cn), v_src20, v_src21);
v_store((uint16_t*)dst, v_src00 * v_mul0 + v_src10 * v_mul1 + v_src20 * v_mul2);
v_store((uint16_t*)dst + 8, v_src01 * v_mul0 + v_src11 * v_mul1 + v_src21 * v_mul2);
}
#if CV_SIMD
const uint16_t* _m = (const uint16_t*)m;
const int VECSZ = v_uint16::nlanes;
v_uint16 v_mul0 = vx_setall_u16(_m[0]);
v_uint16 v_mul1 = vx_setall_u16(_m[1]);
v_uint16 v_mul2 = vx_setall_u16(_m[2]);
for (; i <= lencn - VECSZ; i += VECSZ, src += VECSZ, dst += VECSZ)
v_store((uint16_t*)dst, vx_load_expand(src - cn) * v_mul0 + vx_load_expand(src) * v_mul1 + vx_load_expand(src + cn) * v_mul2);
#endif
for (; i < lencn; i++, src++, dst++)
*dst = m[0] * src[-cn] + m[1] * src[0] + m[2] * src[cn];
@ -2017,15 +1996,11 @@ void hlineSmooth3N121<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, const
src += cn; dst += cn;
int i = cn, lencn = (len - 1)*cn;
for (; i <= lencn - 16; i += 16, src += 16, dst += 16)
{
v_uint16x8 v_src00, v_src01, v_src10, v_src11, v_src20, v_src21;
v_expand(v_load(src - cn), v_src00, v_src01);
v_expand(v_load(src), v_src10, v_src11);
v_expand(v_load(src + cn), v_src20, v_src21);
v_store((uint16_t*)dst, (v_src00 + v_src20 + (v_src10 << 1)) << 6);
v_store((uint16_t*)dst + 8, (v_src01 + v_src21 + (v_src11 << 1)) << 6);
}
#if CV_SIMD
const int VECSZ = v_uint16::nlanes;
for (; i <= lencn - VECSZ; i += VECSZ, src += VECSZ, dst += VECSZ)
v_store((uint16_t*)dst, (vx_load_expand(src - cn) + vx_load_expand(src + cn) + (vx_load_expand(src) << 1)) << 6);
#endif
for (; i < lencn; i++, src++, dst++)
*((uint16_t*)dst) = (uint16_t(src[-cn]) + uint16_t(src[cn]) + (uint16_t(src[0]) << 1)) << 6;
@ -2108,17 +2083,14 @@ void hlineSmooth3Naba<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, const
src += cn; dst += cn;
int i = cn, lencn = (len - 1)*cn;
v_uint16x8 v_mul0 = v_setall_u16(*((uint16_t*)m));
v_uint16x8 v_mul1 = v_setall_u16(*((uint16_t*)m+1));
for (; i <= lencn - 16; i += 16, src += 16, dst += 16)
{
v_uint16x8 v_src00, v_src01, v_src10, v_src11, v_src20, v_src21;
v_expand(v_load(src - cn), v_src00, v_src01);
v_expand(v_load(src), v_src10, v_src11);
v_expand(v_load(src + cn), v_src20, v_src21);
v_store((uint16_t*)dst, (v_src00 + v_src20) * v_mul0 + v_src10 * v_mul1);
v_store((uint16_t*)dst + 8, (v_src01 + v_src21) * v_mul0 + v_src11 * v_mul1);
}
#if CV_SIMD
const uint16_t* _m = (const uint16_t*)m;
const int VECSZ = v_uint16::nlanes;
v_uint16 v_mul0 = vx_setall_u16(_m[0]);
v_uint16 v_mul1 = vx_setall_u16(_m[1]);
for (; i <= lencn - VECSZ; i += VECSZ, src += VECSZ, dst += VECSZ)
v_store((uint16_t*)dst, (vx_load_expand(src - cn) + vx_load_expand(src + cn)) * v_mul0 + vx_load_expand(src) * v_mul1);
#endif
for (; i < lencn; i++, src++, dst++)
*((uint16_t*)dst) = ((uint16_t*)m)[1] * src[0] + ((uint16_t*)m)[0] * ((uint16_t)(src[-cn]) + (uint16_t)(src[cn]));
@ -2304,22 +2276,17 @@ void hlineSmooth5N<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, const ufi
src += 2 * cn; dst += 2 * cn;
int i = 2*cn, lencn = (len - 2)*cn;
v_uint16x8 v_mul0 = v_setall_u16(*((uint16_t*)m));
v_uint16x8 v_mul1 = v_setall_u16(*((uint16_t*)(m + 1)));
v_uint16x8 v_mul2 = v_setall_u16(*((uint16_t*)(m + 2)));
v_uint16x8 v_mul3 = v_setall_u16(*((uint16_t*)(m + 3)));
v_uint16x8 v_mul4 = v_setall_u16(*((uint16_t*)(m + 4)));
for (; i <= lencn - 16; i += 16, src += 16, dst += 16)
{
v_uint16x8 v_src00, v_src01, v_src10, v_src11, v_src20, v_src21, v_src30, v_src31, v_src40, v_src41;
v_expand(v_load(src - 2*cn), v_src00, v_src01);
v_expand(v_load(src - cn), v_src10, v_src11);
v_expand(v_load(src), v_src20, v_src21);
v_expand(v_load(src + cn), v_src30, v_src31);
v_expand(v_load(src + 2*cn), v_src40, v_src41);
v_store((uint16_t*)dst, v_src00 * v_mul0 + v_src10 * v_mul1 + v_src20 * v_mul2 + v_src30 * v_mul3 + v_src40 * v_mul4);
v_store((uint16_t*)dst + 8, v_src01 * v_mul0 + v_src11 * v_mul1 + v_src21 * v_mul2 + v_src31 * v_mul3 + v_src41 * v_mul4);
}
#if CV_SIMD
const uint16_t* _m = (const uint16_t*)m;
const int VECSZ = v_uint16::nlanes;
v_uint16 v_mul0 = vx_setall_u16(_m[0]);
v_uint16 v_mul1 = vx_setall_u16(_m[1]);
v_uint16 v_mul2 = vx_setall_u16(_m[2]);
v_uint16 v_mul3 = vx_setall_u16(_m[3]);
v_uint16 v_mul4 = vx_setall_u16(_m[4]);
for (; i <= lencn - VECSZ; i += VECSZ, src += VECSZ, dst += VECSZ)
v_store((uint16_t*)dst, vx_load_expand(src - 2 * cn) * v_mul0 + vx_load_expand(src - cn) * v_mul1 + vx_load_expand(src) * v_mul2 + vx_load_expand(src + cn) * v_mul3 + vx_load_expand(src + 2 * cn) * v_mul4);
#endif
for (; i < lencn; i++, src++, dst++)
*dst = m[0] * src[-2*cn] + m[1] * src[-cn] + m[2] * src[0] + m[3] * src[cn] + m[4] * src[2*cn];
@ -2517,18 +2484,12 @@ void hlineSmooth5N14641<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, cons
src += 2 * cn; dst += 2 * cn;
int i = 2 * cn, lencn = (len - 2)*cn;
v_uint16x8 v_6 = v_setall_u16(6);
for (; i <= lencn - 16; i += 16, src += 16, dst += 16)
{
v_uint16x8 v_src00, v_src01, v_src10, v_src11, v_src20, v_src21, v_src30, v_src31, v_src40, v_src41;
v_expand(v_load(src - 2*cn), v_src00, v_src01);
v_expand(v_load(src - cn), v_src10, v_src11);
v_expand(v_load(src), v_src20, v_src21);
v_expand(v_load(src + cn), v_src30, v_src31);
v_expand(v_load(src + 2*cn), v_src40, v_src41);
v_store((uint16_t*)dst, (v_src20 * v_6 + ((v_src10 + v_src30) << 2) + v_src00 + v_src40) << 4);
v_store((uint16_t*)dst + 8, (v_src21 * v_6 + ((v_src11 + v_src31) << 2) + v_src01 + v_src41) << 4);
}
#if CV_SIMD
const int VECSZ = v_uint16::nlanes;
v_uint16 v_6 = vx_setall_u16(6);
for (; i <= lencn - VECSZ; i += VECSZ, src += VECSZ, dst += VECSZ)
v_store((uint16_t*)dst, (vx_load_expand(src) * v_6 + ((vx_load_expand(src - cn) + vx_load_expand(src + cn)) << 2) + vx_load_expand(src - 2 * cn) + vx_load_expand(src + 2 * cn)) << 4);
#endif
for (; i < lencn; i++, src++, dst++)
*((uint16_t*)dst) = (uint16_t(src[0]) * 6 + ((uint16_t(src[-cn]) + uint16_t(src[cn])) << 2) + uint16_t(src[-2 * cn]) + uint16_t(src[2 * cn])) << 4;
@ -2721,20 +2682,15 @@ void hlineSmooth5Nabcba<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, cons
src += 2 * cn; dst += 2 * cn;
int i = 2 * cn, lencn = (len - 2)*cn;
v_uint16x8 v_mul0 = v_setall_u16(*((uint16_t*)m));
v_uint16x8 v_mul1 = v_setall_u16(*((uint16_t*)(m + 1)));
v_uint16x8 v_mul2 = v_setall_u16(*((uint16_t*)(m + 2)));
for (; i <= lencn - 16; i += 16, src += 16, dst += 16)
{
v_uint16x8 v_src00, v_src01, v_src10, v_src11, v_src20, v_src21, v_src30, v_src31, v_src40, v_src41;
v_expand(v_load(src - 2 * cn), v_src00, v_src01);
v_expand(v_load(src - cn), v_src10, v_src11);
v_expand(v_load(src), v_src20, v_src21);
v_expand(v_load(src + cn), v_src30, v_src31);
v_expand(v_load(src + 2 * cn), v_src40, v_src41);
v_store((uint16_t*)dst, (v_src00 + v_src40) * v_mul0 + (v_src10 + v_src30)* v_mul1 + v_src20 * v_mul2);
v_store((uint16_t*)dst + 8, (v_src01 + v_src41) * v_mul0 + (v_src11 + v_src31) * v_mul1 + v_src21 * v_mul2);
}
#if CV_SIMD
const uint16_t* _m = (const uint16_t*)m;
const int VECSZ = v_uint16::nlanes;
v_uint16 v_mul0 = vx_setall_u16(_m[0]);
v_uint16 v_mul1 = vx_setall_u16(_m[1]);
v_uint16 v_mul2 = vx_setall_u16(_m[2]);
for (; i <= lencn - VECSZ; i += VECSZ, src += VECSZ, dst += VECSZ)
v_store((uint16_t*)dst, (vx_load_expand(src - 2 * cn) + vx_load_expand(src + 2 * cn)) * v_mul0 + (vx_load_expand(src - cn) + vx_load_expand(src + cn))* v_mul1 + vx_load_expand(src) * v_mul2);
#endif
for (; i < lencn; i++, src++, dst++)
*((uint16_t*)dst) = ((uint16_t*)m)[0] * ((uint16_t)(src[-2 * cn]) + (uint16_t)(src[2 * cn])) + ((uint16_t*)m)[1] * ((uint16_t)(src[-cn]) + (uint16_t)(src[cn])) + ((uint16_t*)m)[2] * src[0];
@ -2844,23 +2800,16 @@ void hlineSmooth<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, const ufixe
}
i *= cn;
int lencn = (len - post_shift + 1)*cn;
for (; i <= lencn - 16; i+=16, src+=16, dst+=16)
#if CV_SIMD
const int VECSZ = v_uint16::nlanes;
for (; i <= lencn - VECSZ; i+=VECSZ, src+=VECSZ, dst+=VECSZ)
{
v_uint16x8 v_src0, v_src1;
v_uint16x8 v_mul = v_setall_u16(*((uint16_t*)m));
v_expand(v_load(src), v_src0, v_src1);
v_uint16x8 v_res0 = v_src0 * v_mul;
v_uint16x8 v_res1 = v_src1 * v_mul;
v_uint16 v_res0 = vx_load_expand(src) * vx_setall_u16(*((uint16_t*)m));
for (int j = 1; j < n; j++)
{
v_mul = v_setall_u16(*((uint16_t*)(m + j)));
v_expand(v_load(src + j * cn), v_src0, v_src1);
v_res0 += v_src0 * v_mul;
v_res1 += v_src1 * v_mul;
}
v_res0 += vx_load_expand(src + j * cn) * vx_setall_u16(*((uint16_t*)(m + j)));
v_store((uint16_t*)dst, v_res0);
v_store((uint16_t*)dst+8, v_res1);
}
#endif
for (; i < lencn; i++, src++, dst++)
{
*dst = m[0] * src[0];
@ -2970,26 +2919,16 @@ void hlineSmoothONa_yzy_a<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, co
}
i *= cn;
int lencn = (len - post_shift + 1)*cn;
for (; i <= lencn - 16; i += 16, src += 16, dst += 16)
#if CV_SIMD
const int VECSZ = v_uint16::nlanes;
for (; i <= lencn - VECSZ; i += VECSZ, src += VECSZ, dst += VECSZ)
{
v_uint16x8 v_src00, v_src01, v_srcN00, v_srcN01;
v_uint16x8 v_mul = v_setall_u16(*((uint16_t*)(m + pre_shift)));
v_expand(v_load(src + pre_shift * cn), v_src00, v_src01);
v_uint16x8 v_res0 = v_src00 * v_mul;
v_uint16x8 v_res1 = v_src01 * v_mul;
v_uint16 v_res0 = vx_load_expand(src + pre_shift * cn) * vx_setall_u16(*((uint16_t*)(m + pre_shift)));
for (int j = 0; j < pre_shift; j ++)
{
v_mul = v_setall_u16(*((uint16_t*)(m + j)));
v_expand(v_load(src + j * cn), v_src00, v_src01);
v_expand(v_load(src + (n - 1 - j)*cn), v_srcN00, v_srcN01);
v_res0 += (v_src00 + v_srcN00) * v_mul;
v_res1 += (v_src01 + v_srcN01) * v_mul;
}
v_res0 += (vx_load_expand(src + j * cn) + vx_load_expand(src + (n - 1 - j)*cn)) * vx_setall_u16(*((uint16_t*)(m + j)));
v_store((uint16_t*)dst, v_res0);
v_store((uint16_t*)dst + 8, v_res1);
}
#endif
for (; i < lencn; i++, src++, dst++)
{
*dst = m[pre_shift] * src[pre_shift*cn];
@ -3025,28 +2964,13 @@ template <>
void vlineSmooth1N<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src, const ufixedpoint16* m, int, uint8_t* dst, int len)
{
const ufixedpoint16* src0 = src[0];
v_uint16x8 v_mul = v_setall_u16(*((uint16_t*)m));
#if CV_SSE2
v_uint16x8 v_1 = v_setall_u16(1);
v_mul += v_mul;
#endif
int i = 0;
for (; i <= len - 16; i += 16)
{
v_uint16x8 v_src0 = v_load((uint16_t*)src0 + i);
v_uint16x8 v_src1 = v_load((uint16_t*)src0 + i + 8);
v_uint8x16 v_res;
#if CV_SSE2
v_res.val = _mm_packus_epi16(_mm_srli_epi16(_mm_add_epi16(v_1.val, _mm_mulhi_epu16(v_src0.val, v_mul.val)),1),
_mm_srli_epi16(_mm_add_epi16(v_1.val, _mm_mulhi_epu16(v_src1.val, v_mul.val)),1));
#else
v_uint32x4 v_res0, v_res1, v_res2, v_res3;
v_mul_expand(v_src0, v_mul, v_res0, v_res1);
v_mul_expand(v_src1, v_mul, v_res2, v_res3);
v_res = v_pack(v_rshr_pack<16>(v_res0, v_res1), v_rshr_pack<16>(v_res2, v_res3));
#if CV_SIMD
const int VECSZ = v_uint16::nlanes;
v_uint16 v_mul = vx_setall_u16(*((uint16_t*)m)<<1);
for (; i <= len - VECSZ; i += VECSZ)
v_rshr_pack_store<1>(dst + i, v_mul_hi(vx_load((uint16_t*)src0 + i), v_mul));
#endif
v_store(dst + i, v_res);
}
for (; i < len; i++)
dst[i] = m[0] * src0[i];
}
@ -3062,8 +2986,11 @@ void vlineSmooth1N1<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src, co
{
const ufixedpoint16* src0 = src[0];
int i = 0;
for (; i <= len - 8; i += 8)
v_rshr_pack_store<8>(dst + i, v_load((uint16_t*)(src0 + i)));
#if CV_SIMD
const int VECSZ = v_uint16::nlanes;
for (; i <= len - VECSZ; i += VECSZ)
v_rshr_pack_store<8>(dst + i, vx_load((uint16_t*)(src0 + i)));
#endif
for (; i < len; i++)
dst[i] = src0[i];
}
@ -3077,46 +3004,51 @@ template <>
void vlineSmooth3N<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src, const ufixedpoint16* m, int, uint8_t* dst, int len)
{
int i = 0;
static const v_int16x8 v_128 = v_reinterpret_as_s16(v_setall_u16((uint16_t)1 << 15));
v_int32x4 v_128_4 = v_setall_s32(128 << 16);
if (len > 7)
#if CV_SIMD
static const v_int16 v_128 = v_reinterpret_as_s16(vx_setall_u16((uint16_t)1 << 15));
v_int32 v_128_4 = vx_setall_s32(128 << 16);
const int VECSZ = v_uint16::nlanes;
if (len >= VECSZ)
{
ufixedpoint32 val[] = { (m[0] + m[1] + m[2]) * ufixedpoint16((uint8_t)128) };
v_128_4 = v_setall_s32(*((int32_t*)val));
v_128_4 = vx_setall_s32(*((int32_t*)val));
}
v_int16x8 v_mul01 = v_reinterpret_as_s16(v_setall_u32(*((uint32_t*)m)));
v_int16x8 v_mul2 = v_reinterpret_as_s16(v_setall_u16(*((uint16_t*)(m + 2))));
for (; i <= len - 32; i += 32)
v_int16 v_mul01 = v_reinterpret_as_s16(vx_setall_u32(*((uint32_t*)m)));
v_int16 v_mul2 = v_reinterpret_as_s16(vx_setall_u16(*((uint16_t*)(m + 2))));
for (; i <= len - 4*VECSZ; i += 4*VECSZ)
{
v_int16x8 v_src00, v_src10, v_src01, v_src11, v_src02, v_src12, v_src03, v_src13;
v_int16x8 v_tmp0, v_tmp1;
v_int16 v_src00, v_src10, v_src01, v_src11, v_src02, v_src12, v_src03, v_src13;
v_int16 v_tmp0, v_tmp1;
v_src00 = v_load((int16_t*)(src[0]) + i);
v_src01 = v_load((int16_t*)(src[0]) + i + 8);
v_src02 = v_load((int16_t*)(src[0]) + i + 16);
v_src03 = v_load((int16_t*)(src[0]) + i + 24);
v_src10 = v_load((int16_t*)(src[1]) + i);
v_src11 = v_load((int16_t*)(src[1]) + i + 8);
v_src12 = v_load((int16_t*)(src[1]) + i + 16);
v_src13 = v_load((int16_t*)(src[1]) + i + 24);
const int16_t* src0 = (const int16_t*)src[0] + i;
const int16_t* src1 = (const int16_t*)src[1] + i;
v_src00 = vx_load(src0);
v_src01 = vx_load(src0 + VECSZ);
v_src02 = vx_load(src0 + 2*VECSZ);
v_src03 = vx_load(src0 + 3*VECSZ);
v_src10 = vx_load(src1);
v_src11 = vx_load(src1 + VECSZ);
v_src12 = vx_load(src1 + 2*VECSZ);
v_src13 = vx_load(src1 + 3*VECSZ);
v_zip(v_add_wrap(v_src00, v_128), v_add_wrap(v_src10, v_128), v_tmp0, v_tmp1);
v_int32x4 v_res0 = v_dotprod(v_tmp0, v_mul01);
v_int32x4 v_res1 = v_dotprod(v_tmp1, v_mul01);
v_int32 v_res0 = v_dotprod(v_tmp0, v_mul01);
v_int32 v_res1 = v_dotprod(v_tmp1, v_mul01);
v_zip(v_add_wrap(v_src01, v_128), v_add_wrap(v_src11, v_128), v_tmp0, v_tmp1);
v_int32x4 v_res2 = v_dotprod(v_tmp0, v_mul01);
v_int32x4 v_res3 = v_dotprod(v_tmp1, v_mul01);
v_int32 v_res2 = v_dotprod(v_tmp0, v_mul01);
v_int32 v_res3 = v_dotprod(v_tmp1, v_mul01);
v_zip(v_add_wrap(v_src02, v_128), v_add_wrap(v_src12, v_128), v_tmp0, v_tmp1);
v_int32x4 v_res4 = v_dotprod(v_tmp0, v_mul01);
v_int32x4 v_res5 = v_dotprod(v_tmp1, v_mul01);
v_int32 v_res4 = v_dotprod(v_tmp0, v_mul01);
v_int32 v_res5 = v_dotprod(v_tmp1, v_mul01);
v_zip(v_add_wrap(v_src03, v_128), v_add_wrap(v_src13, v_128), v_tmp0, v_tmp1);
v_int32x4 v_res6 = v_dotprod(v_tmp0, v_mul01);
v_int32x4 v_res7 = v_dotprod(v_tmp1, v_mul01);
v_int32 v_res6 = v_dotprod(v_tmp0, v_mul01);
v_int32 v_res7 = v_dotprod(v_tmp1, v_mul01);
v_int32x4 v_resj0, v_resj1;
v_src00 = v_load((int16_t*)(src[2]) + i);
v_src01 = v_load((int16_t*)(src[2]) + i + 8);
v_src02 = v_load((int16_t*)(src[2]) + i + 16);
v_src03 = v_load((int16_t*)(src[2]) + i + 24);
v_int32 v_resj0, v_resj1;
const int16_t* src2 = (const int16_t*)src[2] + i;
v_src00 = vx_load(src2);
v_src01 = vx_load(src2 + VECSZ);
v_src02 = vx_load(src2 + 2*VECSZ);
v_src03 = vx_load(src2 + 3*VECSZ);
v_mul_expand(v_add_wrap(v_src00, v_128), v_mul2, v_resj0, v_resj1);
v_res0 += v_resj0;
v_res1 += v_resj1;
@ -3139,11 +3071,12 @@ void vlineSmooth3N<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src, con
v_res6 += v_128_4;
v_res7 += v_128_4;
v_store(dst + i , v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res0, v_res1)),
v_reinterpret_as_u16(v_rshr_pack<16>(v_res2, v_res3))));
v_store(dst + i + 16, v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res4, v_res5)),
v_reinterpret_as_u16(v_rshr_pack<16>(v_res6, v_res7))));
v_store(dst + i , v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res0, v_res1)),
v_reinterpret_as_u16(v_rshr_pack<16>(v_res2, v_res3))));
v_store(dst + i + 2*VECSZ, v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res4, v_res5)),
v_reinterpret_as_u16(v_rshr_pack<16>(v_res6, v_res7))));
}
#endif
for (; i < len; i++)
dst[i] = m[0] * src[0][i] + m[1] * src[1][i] + m[2] * src[2][i];
}
@ -3157,18 +3090,21 @@ template <>
void vlineSmooth3N121<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src, const ufixedpoint16*, int, uint8_t* dst, int len)
{
int i = 0;
for (; i <= len - 16; i += 16)
#if CV_SIMD
const int VECSZ = v_uint16::nlanes;
for (; i <= len - 2*VECSZ; i += 2*VECSZ)
{
v_uint32x4 v_src00, v_src01, v_src02, v_src03, v_src10, v_src11, v_src12, v_src13, v_src20, v_src21, v_src22, v_src23;
v_expand(v_load((uint16_t*)(src[0]) + i), v_src00, v_src01);
v_expand(v_load((uint16_t*)(src[0]) + i + 8), v_src02, v_src03);
v_expand(v_load((uint16_t*)(src[1]) + i), v_src10, v_src11);
v_expand(v_load((uint16_t*)(src[1]) + i + 8), v_src12, v_src13);
v_expand(v_load((uint16_t*)(src[2]) + i), v_src20, v_src21);
v_expand(v_load((uint16_t*)(src[2]) + i + 8), v_src22, v_src23);
v_uint32 v_src00, v_src01, v_src02, v_src03, v_src10, v_src11, v_src12, v_src13, v_src20, v_src21, v_src22, v_src23;
v_expand(vx_load((uint16_t*)(src[0]) + i), v_src00, v_src01);
v_expand(vx_load((uint16_t*)(src[0]) + i + VECSZ), v_src02, v_src03);
v_expand(vx_load((uint16_t*)(src[1]) + i), v_src10, v_src11);
v_expand(vx_load((uint16_t*)(src[1]) + i + VECSZ), v_src12, v_src13);
v_expand(vx_load((uint16_t*)(src[2]) + i), v_src20, v_src21);
v_expand(vx_load((uint16_t*)(src[2]) + i + VECSZ), v_src22, v_src23);
v_store(dst + i, v_pack(v_rshr_pack<10>(v_src00 + v_src20 + (v_src10 + v_src10), v_src01 + v_src21 + (v_src11 + v_src11)),
v_rshr_pack<10>(v_src02 + v_src22 + (v_src12 + v_src12), v_src03 + v_src23 + (v_src13 + v_src13))));
}
#endif
for (; i < len; i++)
dst[i] = (((uint32_t)(((uint16_t*)(src[0]))[i]) + (uint32_t)(((uint16_t*)(src[2]))[i]) + ((uint32_t)(((uint16_t*)(src[1]))[i]) << 1)) + (1 << 9)) >> 10;
}
@ -3182,95 +3118,102 @@ template <>
void vlineSmooth5N<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src, const ufixedpoint16* m, int, uint8_t* dst, int len)
{
int i = 0;
static const v_int16x8 v_128 = v_reinterpret_as_s16(v_setall_u16((uint16_t)1 << 15));
v_int32x4 v_128_4 = v_setall_s32(128 << 16);
if (len > 7)
#if CV_SIMD
const int VECSZ = v_uint16::nlanes;
if (len >= 4 * VECSZ)
{
ufixedpoint32 val[] = { (m[0] + m[1] + m[2] + m[3] + m[4]) * ufixedpoint16((uint8_t)128) };
v_128_4 = v_setall_s32(*((int32_t*)val));
}
v_int16x8 v_mul01 = v_reinterpret_as_s16(v_setall_u32(*((uint32_t*)m)));
v_int16x8 v_mul23 = v_reinterpret_as_s16(v_setall_u32(*((uint32_t*)(m + 2))));
v_int16x8 v_mul4 = v_reinterpret_as_s16(v_setall_u16(*((uint16_t*)(m + 4))));
for (; i <= len - 32; i += 32)
{
v_int16x8 v_src00, v_src10, v_src01, v_src11, v_src02, v_src12, v_src03, v_src13;
v_int16x8 v_tmp0, v_tmp1;
v_src00 = v_load((int16_t*)(src[0]) + i);
v_src01 = v_load((int16_t*)(src[0]) + i + 8);
v_src02 = v_load((int16_t*)(src[0]) + i + 16);
v_src03 = v_load((int16_t*)(src[0]) + i + 24);
v_src10 = v_load((int16_t*)(src[1]) + i);
v_src11 = v_load((int16_t*)(src[1]) + i + 8);
v_src12 = v_load((int16_t*)(src[1]) + i + 16);
v_src13 = v_load((int16_t*)(src[1]) + i + 24);
v_zip(v_add_wrap(v_src00, v_128), v_add_wrap(v_src10, v_128), v_tmp0, v_tmp1);
v_int32x4 v_res0 = v_dotprod(v_tmp0, v_mul01);
v_int32x4 v_res1 = v_dotprod(v_tmp1, v_mul01);
v_zip(v_add_wrap(v_src01, v_128), v_add_wrap(v_src11, v_128), v_tmp0, v_tmp1);
v_int32x4 v_res2 = v_dotprod(v_tmp0, v_mul01);
v_int32x4 v_res3 = v_dotprod(v_tmp1, v_mul01);
v_zip(v_add_wrap(v_src02, v_128), v_add_wrap(v_src12, v_128), v_tmp0, v_tmp1);
v_int32x4 v_res4 = v_dotprod(v_tmp0, v_mul01);
v_int32x4 v_res5 = v_dotprod(v_tmp1, v_mul01);
v_zip(v_add_wrap(v_src03, v_128), v_add_wrap(v_src13, v_128), v_tmp0, v_tmp1);
v_int32x4 v_res6 = v_dotprod(v_tmp0, v_mul01);
v_int32x4 v_res7 = v_dotprod(v_tmp1, v_mul01);
v_src00 = v_load((int16_t*)(src[2]) + i);
v_src01 = v_load((int16_t*)(src[2]) + i + 8);
v_src02 = v_load((int16_t*)(src[2]) + i + 16);
v_src03 = v_load((int16_t*)(src[2]) + i + 24);
v_src10 = v_load((int16_t*)(src[3]) + i);
v_src11 = v_load((int16_t*)(src[3]) + i + 8);
v_src12 = v_load((int16_t*)(src[3]) + i + 16);
v_src13 = v_load((int16_t*)(src[3]) + i + 24);
v_zip(v_add_wrap(v_src00, v_128), v_add_wrap(v_src10, v_128), v_tmp0, v_tmp1);
v_res0 += v_dotprod(v_tmp0, v_mul23);
v_res1 += v_dotprod(v_tmp1, v_mul23);
v_zip(v_add_wrap(v_src01, v_128), v_add_wrap(v_src11, v_128), v_tmp0, v_tmp1);
v_res2 += v_dotprod(v_tmp0, v_mul23);
v_res3 += v_dotprod(v_tmp1, v_mul23);
v_zip(v_add_wrap(v_src02, v_128), v_add_wrap(v_src12, v_128), v_tmp0, v_tmp1);
v_res4 += v_dotprod(v_tmp0, v_mul23);
v_res5 += v_dotprod(v_tmp1, v_mul23);
v_zip(v_add_wrap(v_src03, v_128), v_add_wrap(v_src13, v_128), v_tmp0, v_tmp1);
v_res6 += v_dotprod(v_tmp0, v_mul23);
v_res7 += v_dotprod(v_tmp1, v_mul23);
v_int32x4 v_resj0, v_resj1;
v_src00 = v_load((int16_t*)(src[4]) + i);
v_src01 = v_load((int16_t*)(src[4]) + i + 8);
v_src02 = v_load((int16_t*)(src[4]) + i + 16);
v_src03 = v_load((int16_t*)(src[4]) + i + 24);
v_mul_expand(v_add_wrap(v_src00, v_128), v_mul4, v_resj0, v_resj1);
v_res0 += v_resj0;
v_res1 += v_resj1;
v_mul_expand(v_add_wrap(v_src01, v_128), v_mul4, v_resj0, v_resj1);
v_res2 += v_resj0;
v_res3 += v_resj1;
v_mul_expand(v_add_wrap(v_src02, v_128), v_mul4, v_resj0, v_resj1);
v_res4 += v_resj0;
v_res5 += v_resj1;
v_mul_expand(v_add_wrap(v_src03, v_128), v_mul4, v_resj0, v_resj1);
v_res6 += v_resj0;
v_res7 += v_resj1;
v_res0 += v_128_4;
v_res1 += v_128_4;
v_res2 += v_128_4;
v_res3 += v_128_4;
v_res4 += v_128_4;
v_res5 += v_128_4;
v_res6 += v_128_4;
v_res7 += v_128_4;
v_store(dst + i , v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res0, v_res1)),
v_reinterpret_as_u16(v_rshr_pack<16>(v_res2, v_res3))));
v_store(dst + i + 16, v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res4, v_res5)),
v_reinterpret_as_u16(v_rshr_pack<16>(v_res6, v_res7))));
v_int32 v_128_4 = vx_setall_s32(*((int32_t*)val));
static const v_int16 v_128 = v_reinterpret_as_s16(vx_setall_u16((uint16_t)1 << 15));
v_int16 v_mul01 = v_reinterpret_as_s16(vx_setall_u32(*((uint32_t*)m)));
v_int16 v_mul23 = v_reinterpret_as_s16(vx_setall_u32(*((uint32_t*)(m + 2))));
v_int16 v_mul4 = v_reinterpret_as_s16(vx_setall_u16(*((uint16_t*)(m + 4))));
for (; i <= len - 4*VECSZ; i += 4*VECSZ)
{
v_int16 v_src00, v_src10, v_src01, v_src11, v_src02, v_src12, v_src03, v_src13;
v_int16 v_tmp0, v_tmp1;
const int16_t* src0 = (const int16_t*)src[0] + i;
const int16_t* src1 = (const int16_t*)src[1] + i;
v_src00 = vx_load(src0);
v_src01 = vx_load(src0 + VECSZ);
v_src02 = vx_load(src0 + 2*VECSZ);
v_src03 = vx_load(src0 + 3*VECSZ);
v_src10 = vx_load(src1);
v_src11 = vx_load(src1 + VECSZ);
v_src12 = vx_load(src1 + 2*VECSZ);
v_src13 = vx_load(src1 + 3*VECSZ);
v_zip(v_add_wrap(v_src00, v_128), v_add_wrap(v_src10, v_128), v_tmp0, v_tmp1);
v_int32 v_res0 = v_dotprod(v_tmp0, v_mul01);
v_int32 v_res1 = v_dotprod(v_tmp1, v_mul01);
v_zip(v_add_wrap(v_src01, v_128), v_add_wrap(v_src11, v_128), v_tmp0, v_tmp1);
v_int32 v_res2 = v_dotprod(v_tmp0, v_mul01);
v_int32 v_res3 = v_dotprod(v_tmp1, v_mul01);
v_zip(v_add_wrap(v_src02, v_128), v_add_wrap(v_src12, v_128), v_tmp0, v_tmp1);
v_int32 v_res4 = v_dotprod(v_tmp0, v_mul01);
v_int32 v_res5 = v_dotprod(v_tmp1, v_mul01);
v_zip(v_add_wrap(v_src03, v_128), v_add_wrap(v_src13, v_128), v_tmp0, v_tmp1);
v_int32 v_res6 = v_dotprod(v_tmp0, v_mul01);
v_int32 v_res7 = v_dotprod(v_tmp1, v_mul01);
const int16_t* src2 = (const int16_t*)src[2] + i;
const int16_t* src3 = (const int16_t*)src[3] + i;
v_src00 = vx_load(src2);
v_src01 = vx_load(src2 + VECSZ);
v_src02 = vx_load(src2 + 2*VECSZ);
v_src03 = vx_load(src2 + 3*VECSZ);
v_src10 = vx_load(src3);
v_src11 = vx_load(src3 + VECSZ);
v_src12 = vx_load(src3 + 2*VECSZ);
v_src13 = vx_load(src3 + 3*VECSZ);
v_zip(v_add_wrap(v_src00, v_128), v_add_wrap(v_src10, v_128), v_tmp0, v_tmp1);
v_res0 += v_dotprod(v_tmp0, v_mul23);
v_res1 += v_dotprod(v_tmp1, v_mul23);
v_zip(v_add_wrap(v_src01, v_128), v_add_wrap(v_src11, v_128), v_tmp0, v_tmp1);
v_res2 += v_dotprod(v_tmp0, v_mul23);
v_res3 += v_dotprod(v_tmp1, v_mul23);
v_zip(v_add_wrap(v_src02, v_128), v_add_wrap(v_src12, v_128), v_tmp0, v_tmp1);
v_res4 += v_dotprod(v_tmp0, v_mul23);
v_res5 += v_dotprod(v_tmp1, v_mul23);
v_zip(v_add_wrap(v_src03, v_128), v_add_wrap(v_src13, v_128), v_tmp0, v_tmp1);
v_res6 += v_dotprod(v_tmp0, v_mul23);
v_res7 += v_dotprod(v_tmp1, v_mul23);
v_int32 v_resj0, v_resj1;
const int16_t* src4 = (const int16_t*)src[4] + i;
v_src00 = vx_load(src4);
v_src01 = vx_load(src4 + VECSZ);
v_src02 = vx_load(src4 + 2*VECSZ);
v_src03 = vx_load(src4 + 3*VECSZ);
v_mul_expand(v_add_wrap(v_src00, v_128), v_mul4, v_resj0, v_resj1);
v_res0 += v_resj0;
v_res1 += v_resj1;
v_mul_expand(v_add_wrap(v_src01, v_128), v_mul4, v_resj0, v_resj1);
v_res2 += v_resj0;
v_res3 += v_resj1;
v_mul_expand(v_add_wrap(v_src02, v_128), v_mul4, v_resj0, v_resj1);
v_res4 += v_resj0;
v_res5 += v_resj1;
v_mul_expand(v_add_wrap(v_src03, v_128), v_mul4, v_resj0, v_resj1);
v_res6 += v_resj0;
v_res7 += v_resj1;
v_res0 += v_128_4;
v_res1 += v_128_4;
v_res2 += v_128_4;
v_res3 += v_128_4;
v_res4 += v_128_4;
v_res5 += v_128_4;
v_res6 += v_128_4;
v_res7 += v_128_4;
v_store(dst + i , v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res0, v_res1)),
v_reinterpret_as_u16(v_rshr_pack<16>(v_res2, v_res3))));
v_store(dst + i + 2*VECSZ, v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res4, v_res5)),
v_reinterpret_as_u16(v_rshr_pack<16>(v_res6, v_res7))));
}
}
#endif
for (; i < len; i++)
dst[i] = m[0] * src[0][i] + m[1] * src[1][i] + m[2] * src[2][i] + m[3] * src[3][i] + m[4] * src[4][i];
}
@ -3284,28 +3227,31 @@ template <>
void vlineSmooth5N14641<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src, const ufixedpoint16*, int, uint8_t* dst, int len)
{
int i = 0;
v_uint32x4 v_6 = v_setall_u32(6);
for (; i <= len - 16; i += 16)
#if CV_SIMD
v_uint32 v_6 = vx_setall_u32(6);
const int VECSZ = v_uint16::nlanes;
for (; i <= len - 2*VECSZ; i += 2*VECSZ)
{
v_uint32x4 v_src00, v_src10, v_src20, v_src30, v_src40;
v_uint32x4 v_src01, v_src11, v_src21, v_src31, v_src41;
v_uint32x4 v_src02, v_src12, v_src22, v_src32, v_src42;
v_uint32x4 v_src03, v_src13, v_src23, v_src33, v_src43;
v_expand(v_load((uint16_t*)(src[0]) + i), v_src00, v_src01);
v_expand(v_load((uint16_t*)(src[0]) + i + 8), v_src02, v_src03);
v_expand(v_load((uint16_t*)(src[1]) + i), v_src10, v_src11);
v_expand(v_load((uint16_t*)(src[1]) + i + 8), v_src12, v_src13);
v_expand(v_load((uint16_t*)(src[2]) + i), v_src20, v_src21);
v_expand(v_load((uint16_t*)(src[2]) + i + 8), v_src22, v_src23);
v_expand(v_load((uint16_t*)(src[3]) + i), v_src30, v_src31);
v_expand(v_load((uint16_t*)(src[3]) + i + 8), v_src32, v_src33);
v_expand(v_load((uint16_t*)(src[4]) + i), v_src40, v_src41);
v_expand(v_load((uint16_t*)(src[4]) + i + 8), v_src42, v_src43);
v_uint32 v_src00, v_src10, v_src20, v_src30, v_src40;
v_uint32 v_src01, v_src11, v_src21, v_src31, v_src41;
v_uint32 v_src02, v_src12, v_src22, v_src32, v_src42;
v_uint32 v_src03, v_src13, v_src23, v_src33, v_src43;
v_expand(vx_load((uint16_t*)(src[0]) + i), v_src00, v_src01);
v_expand(vx_load((uint16_t*)(src[0]) + i + VECSZ), v_src02, v_src03);
v_expand(vx_load((uint16_t*)(src[1]) + i), v_src10, v_src11);
v_expand(vx_load((uint16_t*)(src[1]) + i + VECSZ), v_src12, v_src13);
v_expand(vx_load((uint16_t*)(src[2]) + i), v_src20, v_src21);
v_expand(vx_load((uint16_t*)(src[2]) + i + VECSZ), v_src22, v_src23);
v_expand(vx_load((uint16_t*)(src[3]) + i), v_src30, v_src31);
v_expand(vx_load((uint16_t*)(src[3]) + i + VECSZ), v_src32, v_src33);
v_expand(vx_load((uint16_t*)(src[4]) + i), v_src40, v_src41);
v_expand(vx_load((uint16_t*)(src[4]) + i + VECSZ), v_src42, v_src43);
v_store(dst + i, v_pack(v_rshr_pack<12>(v_src20*v_6 + ((v_src10 + v_src30) << 2) + v_src00 + v_src40,
v_src21*v_6 + ((v_src11 + v_src31) << 2) + v_src01 + v_src41),
v_rshr_pack<12>(v_src22*v_6 + ((v_src12 + v_src32) << 2) + v_src02 + v_src42,
v_src23*v_6 + ((v_src13 + v_src33) << 2) + v_src03 + v_src43)));
}
#endif
for (; i < len; i++)
dst[i] = ((uint32_t)(((uint16_t*)(src[2]))[i]) * 6 +
(((uint32_t)(((uint16_t*)(src[1]))[i]) + (uint32_t)(((uint16_t*)(src[3]))[i])) << 2) +
@ -3326,57 +3272,63 @@ template <>
void vlineSmooth<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src, const ufixedpoint16* m, int n, uint8_t* dst, int len)
{
int i = 0;
static const v_int16x8 v_128 = v_reinterpret_as_s16(v_setall_u16((uint16_t)1 << 15));
v_int32x4 v_128_4 = v_setall_s32(128 << 16);
if (len > 7)
#if CV_SIMD
static const v_int16 v_128 = v_reinterpret_as_s16(vx_setall_u16((uint16_t)1 << 15));
v_int32 v_128_4 = vx_setall_s32(128 << 16);
const int VECSZ = v_uint16::nlanes;
if (len >= VECSZ)
{
ufixedpoint16 msum = m[0] + m[1];
for (int j = 2; j < n; j++)
msum = msum + m[j];
ufixedpoint32 val[] = { msum * ufixedpoint16((uint8_t)128) };
v_128_4 = v_setall_s32(*((int32_t*)val));
v_128_4 = vx_setall_s32(*((int32_t*)val));
}
for (; i <= len - 32; i += 32)
for (; i <= len - 4*VECSZ; i += 4*VECSZ)
{
v_int16x8 v_src00, v_src10, v_src01, v_src11, v_src02, v_src12, v_src03, v_src13;
v_int16x8 v_tmp0, v_tmp1;
v_int16 v_src00, v_src10, v_src01, v_src11, v_src02, v_src12, v_src03, v_src13;
v_int16 v_tmp0, v_tmp1;
v_int16x8 v_mul = v_reinterpret_as_s16(v_setall_u32(*((uint32_t*)m)));
v_int16 v_mul = v_reinterpret_as_s16(vx_setall_u32(*((uint32_t*)m)));
v_src00 = v_load((int16_t*)(src[0]) + i);
v_src01 = v_load((int16_t*)(src[0]) + i + 8);
v_src02 = v_load((int16_t*)(src[0]) + i + 16);
v_src03 = v_load((int16_t*)(src[0]) + i + 24);
v_src10 = v_load((int16_t*)(src[1]) + i);
v_src11 = v_load((int16_t*)(src[1]) + i + 8);
v_src12 = v_load((int16_t*)(src[1]) + i + 16);
v_src13 = v_load((int16_t*)(src[1]) + i + 24);
const int16_t* src0 = (const int16_t*)src[0] + i;
const int16_t* src1 = (const int16_t*)src[1] + i;
v_src00 = vx_load(src0);
v_src01 = vx_load(src0 + VECSZ);
v_src02 = vx_load(src0 + 2*VECSZ);
v_src03 = vx_load(src0 + 3*VECSZ);
v_src10 = vx_load(src1);
v_src11 = vx_load(src1 + VECSZ);
v_src12 = vx_load(src1 + 2*VECSZ);
v_src13 = vx_load(src1 + 3*VECSZ);
v_zip(v_add_wrap(v_src00, v_128), v_add_wrap(v_src10, v_128), v_tmp0, v_tmp1);
v_int32x4 v_res0 = v_dotprod(v_tmp0, v_mul);
v_int32x4 v_res1 = v_dotprod(v_tmp1, v_mul);
v_int32 v_res0 = v_dotprod(v_tmp0, v_mul);
v_int32 v_res1 = v_dotprod(v_tmp1, v_mul);
v_zip(v_add_wrap(v_src01, v_128), v_add_wrap(v_src11, v_128), v_tmp0, v_tmp1);
v_int32x4 v_res2 = v_dotprod(v_tmp0, v_mul);
v_int32x4 v_res3 = v_dotprod(v_tmp1, v_mul);
v_int32 v_res2 = v_dotprod(v_tmp0, v_mul);
v_int32 v_res3 = v_dotprod(v_tmp1, v_mul);
v_zip(v_add_wrap(v_src02, v_128), v_add_wrap(v_src12, v_128), v_tmp0, v_tmp1);
v_int32x4 v_res4 = v_dotprod(v_tmp0, v_mul);
v_int32x4 v_res5 = v_dotprod(v_tmp1, v_mul);
v_int32 v_res4 = v_dotprod(v_tmp0, v_mul);
v_int32 v_res5 = v_dotprod(v_tmp1, v_mul);
v_zip(v_add_wrap(v_src03, v_128), v_add_wrap(v_src13, v_128), v_tmp0, v_tmp1);
v_int32x4 v_res6 = v_dotprod(v_tmp0, v_mul);
v_int32x4 v_res7 = v_dotprod(v_tmp1, v_mul);
v_int32 v_res6 = v_dotprod(v_tmp0, v_mul);
v_int32 v_res7 = v_dotprod(v_tmp1, v_mul);
int j = 2;
for (; j < n - 1; j+=2)
{
v_mul = v_reinterpret_as_s16(v_setall_u32(*((uint32_t*)(m+j))));
v_mul = v_reinterpret_as_s16(vx_setall_u32(*((uint32_t*)(m+j))));
v_src00 = v_load((int16_t*)(src[j]) + i);
v_src01 = v_load((int16_t*)(src[j]) + i + 8);
v_src02 = v_load((int16_t*)(src[j]) + i + 16);
v_src03 = v_load((int16_t*)(src[j]) + i + 24);
v_src10 = v_load((int16_t*)(src[j+1]) + i);
v_src11 = v_load((int16_t*)(src[j+1]) + i + 8);
v_src12 = v_load((int16_t*)(src[j+1]) + i + 16);
v_src13 = v_load((int16_t*)(src[j+1]) + i + 24);
const int16_t* srcj0 = (const int16_t*)src[j] + i;
const int16_t* srcj1 = (const int16_t*)src[j + 1] + i;
v_src00 = vx_load(srcj0);
v_src01 = vx_load(srcj0 + VECSZ);
v_src02 = vx_load(srcj0 + 2*VECSZ);
v_src03 = vx_load(srcj0 + 3*VECSZ);
v_src10 = vx_load(srcj1);
v_src11 = vx_load(srcj1 + VECSZ);
v_src12 = vx_load(srcj1 + 2*VECSZ);
v_src13 = vx_load(srcj1 + 3*VECSZ);
v_zip(v_add_wrap(v_src00, v_128), v_add_wrap(v_src10, v_128), v_tmp0, v_tmp1);
v_res0 += v_dotprod(v_tmp0, v_mul);
v_res1 += v_dotprod(v_tmp1, v_mul);
@ -3392,12 +3344,13 @@ void vlineSmooth<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src, const
}
if(j < n)
{
v_int32x4 v_resj0, v_resj1;
v_mul = v_reinterpret_as_s16(v_setall_u16(*((uint16_t*)(m + j))));
v_src00 = v_load((int16_t*)(src[j]) + i);
v_src01 = v_load((int16_t*)(src[j]) + i + 8);
v_src02 = v_load((int16_t*)(src[j]) + i + 16);
v_src03 = v_load((int16_t*)(src[j]) + i + 24);
v_int32 v_resj0, v_resj1;
v_mul = v_reinterpret_as_s16(vx_setall_u16(*((uint16_t*)(m + j))));
const int16_t* srcj = (const int16_t*)src[j] + i;
v_src00 = vx_load(srcj);
v_src01 = vx_load(srcj + VECSZ);
v_src02 = vx_load(srcj + 2*VECSZ);
v_src03 = vx_load(srcj + 3*VECSZ);
v_mul_expand(v_add_wrap(v_src00, v_128), v_mul, v_resj0, v_resj1);
v_res0 += v_resj0;
v_res1 += v_resj1;
@ -3420,11 +3373,12 @@ void vlineSmooth<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src, const
v_res6 += v_128_4;
v_res7 += v_128_4;
v_store(dst + i , v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res0, v_res1)),
v_reinterpret_as_u16(v_rshr_pack<16>(v_res2, v_res3))));
v_store(dst + i + 16, v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res4, v_res5)),
v_reinterpret_as_u16(v_rshr_pack<16>(v_res6, v_res7))));
v_store(dst + i , v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res0, v_res1)),
v_reinterpret_as_u16(v_rshr_pack<16>(v_res2, v_res3))));
v_store(dst + i + 2*VECSZ, v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res4, v_res5)),
v_reinterpret_as_u16(v_rshr_pack<16>(v_res6, v_res7))));
}
#endif
for (; i < len; i++)
{
ufixedpoint32 val = m[0] * src[0][i];
@ -3450,29 +3404,32 @@ void vlineSmoothONa_yzy_a(const FT* const * src, const FT* m, int n, ET* dst, in
template <>
void vlineSmoothONa_yzy_a<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src, const ufixedpoint16* m, int n, uint8_t* dst, int len)
{
int pre_shift = n / 2;
int i = 0;
static const v_int16x8 v_128 = v_reinterpret_as_s16(v_setall_u16((uint16_t)1 << 15));
v_int32x4 v_128_4 = v_setall_s32(128 << 16);
if (len > 7)
#if CV_SIMD
int pre_shift = n / 2;
static const v_int16 v_128 = v_reinterpret_as_s16(vx_setall_u16((uint16_t)1 << 15));
v_int32 v_128_4 = vx_setall_s32(128 << 16);
const int VECSZ = v_uint16::nlanes;
if (len >= VECSZ)
{
ufixedpoint16 msum = m[0] + m[pre_shift] + m[n - 1];
for (int j = 1; j < pre_shift; j++)
msum = msum + m[j] + m[n - 1 - j];
ufixedpoint32 val[] = { msum * ufixedpoint16((uint8_t)128) };
v_128_4 = v_setall_s32(*((int32_t*)val));
v_128_4 = vx_setall_s32(*((int32_t*)val));
}
for (; i <= len - 32; i += 32)
for (; i <= len - 4*VECSZ; i += 4*VECSZ)
{
v_int16x8 v_src00, v_src10, v_src20, v_src30, v_src01, v_src11, v_src21, v_src31;
v_int32x4 v_res0, v_res1, v_res2, v_res3, v_res4, v_res5, v_res6, v_res7;
v_int16x8 v_tmp0, v_tmp1, v_tmp2, v_tmp3, v_tmp4, v_tmp5, v_tmp6, v_tmp7;
v_int16 v_src00, v_src10, v_src20, v_src30, v_src01, v_src11, v_src21, v_src31;
v_int32 v_res0, v_res1, v_res2, v_res3, v_res4, v_res5, v_res6, v_res7;
v_int16 v_tmp0, v_tmp1, v_tmp2, v_tmp3, v_tmp4, v_tmp5, v_tmp6, v_tmp7;
v_int16x8 v_mul = v_reinterpret_as_s16(v_setall_u16(*((uint16_t*)(m + pre_shift))));
v_src00 = v_load((int16_t*)(src[pre_shift]) + i);
v_src10 = v_load((int16_t*)(src[pre_shift]) + i + 8);
v_src20 = v_load((int16_t*)(src[pre_shift]) + i + 16);
v_src30 = v_load((int16_t*)(src[pre_shift]) + i + 24);
v_int16 v_mul = v_reinterpret_as_s16(vx_setall_u16(*((uint16_t*)(m + pre_shift))));
const int16_t* srcp = (const int16_t*)src[pre_shift] + i;
v_src00 = vx_load(srcp);
v_src10 = vx_load(srcp + VECSZ);
v_src20 = vx_load(srcp + 2*VECSZ);
v_src30 = vx_load(srcp + 3*VECSZ);
v_mul_expand(v_add_wrap(v_src00, v_128), v_mul, v_res0, v_res1);
v_mul_expand(v_add_wrap(v_src10, v_128), v_mul, v_res2, v_res3);
v_mul_expand(v_add_wrap(v_src20, v_128), v_mul, v_res4, v_res5);
@ -3481,16 +3438,18 @@ void vlineSmoothONa_yzy_a<uint8_t, ufixedpoint16>(const ufixedpoint16* const * s
int j = 0;
for (; j < pre_shift; j++)
{
v_mul = v_reinterpret_as_s16(v_setall_u16(*((uint16_t*)(m + j))));
v_mul = v_reinterpret_as_s16(vx_setall_u16(*((uint16_t*)(m + j))));
v_src00 = v_load((int16_t*)(src[j]) + i);
v_src10 = v_load((int16_t*)(src[j]) + i + 8);
v_src20 = v_load((int16_t*)(src[j]) + i + 16);
v_src30 = v_load((int16_t*)(src[j]) + i + 24);
v_src01 = v_load((int16_t*)(src[n - 1 - j]) + i);
v_src11 = v_load((int16_t*)(src[n - 1 - j]) + i + 8);
v_src21 = v_load((int16_t*)(src[n - 1 - j]) + i + 16);
v_src31 = v_load((int16_t*)(src[n - 1 - j]) + i + 24);
const int16_t* srcj0 = (const int16_t*)src[j] + i;
const int16_t* srcj1 = (const int16_t*)src[n - 1 - j] + i;
v_src00 = vx_load(srcj0);
v_src10 = vx_load(srcj0 + VECSZ);
v_src20 = vx_load(srcj0 + 2*VECSZ);
v_src30 = vx_load(srcj0 + 3*VECSZ);
v_src01 = vx_load(srcj1);
v_src11 = vx_load(srcj1 + VECSZ);
v_src21 = vx_load(srcj1 + 2*VECSZ);
v_src31 = vx_load(srcj1 + 3*VECSZ);
v_zip(v_add_wrap(v_src00, v_128), v_add_wrap(v_src01, v_128), v_tmp0, v_tmp1);
v_res0 += v_dotprod(v_tmp0, v_mul);
v_res1 += v_dotprod(v_tmp1, v_mul);
@ -3514,11 +3473,12 @@ void vlineSmoothONa_yzy_a<uint8_t, ufixedpoint16>(const ufixedpoint16* const * s
v_res6 += v_128_4;
v_res7 += v_128_4;
v_store(dst + i , v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res0, v_res1)),
v_reinterpret_as_u16(v_rshr_pack<16>(v_res2, v_res3))));
v_store(dst + i + 16, v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res4, v_res5)),
v_reinterpret_as_u16(v_rshr_pack<16>(v_res6, v_res7))));
v_store(dst + i , v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res0, v_res1)),
v_reinterpret_as_u16(v_rshr_pack<16>(v_res2, v_res3))));
v_store(dst + i + 2*VECSZ, v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res4, v_res5)),
v_reinterpret_as_u16(v_rshr_pack<16>(v_res6, v_res7))));
}
#endif
for (; i < len; i++)
{
ufixedpoint32 val = m[0] * src[0][i];
@ -3816,8 +3776,8 @@ static void createGaussianKernels( T & kx, T & ky, int type, Size &ksize,
if( ksize.height <= 0 && sigma2 > 0 )
ksize.height = cvRound(sigma2*(depth == CV_8U ? 3 : 4)*2 + 1)|1;
CV_Assert( ksize.width > 0 && ksize.width % 2 == 1 &&
ksize.height > 0 && ksize.height % 2 == 1 );
CV_Assert( ksize.width > 0 && ksize.width % 2 == 1 &&
ksize.height > 0 && ksize.height % 2 == 1 );
sigma1 = std::max( sigma1, 0. );
sigma2 = std::max( sigma2, 0. );
@ -4146,20 +4106,6 @@ void cv::GaussianBlur( InputArray _src, OutputArray _dst, Size ksize,
int sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
if(sdepth == CV_8U && ((borderType & BORDER_ISOLATED) || !_src.getMat().isSubmatrix()))
{
std::vector<ufixedpoint16> fkx, fky;
createGaussianKernels(fkx, fky, type, ksize, sigma1, sigma2);
Mat src = _src.getMat();
Mat dst = _dst.getMat();
if (src.data == dst.data)
src = src.clone();
fixedSmoothInvoker<uint8_t, ufixedpoint16> invoker(src.ptr<uint8_t>(), src.step1(), dst.ptr<uint8_t>(), dst.step1(), dst.cols, dst.rows, dst.channels(), &fkx[0], (int)fkx.size(), &fky[0], (int)fky.size(), borderType & ~BORDER_ISOLATED);
parallel_for_(Range(0, dst.rows), invoker, std::max(1, std::min(getNumThreads(), getNumberOfCPUs())));
return;
}
Mat kx, ky;
createGaussianKernels(kx, ky, type, ksize, sigma1, sigma2);
@ -4185,6 +4131,17 @@ void cv::GaussianBlur( InputArray _src, OutputArray _dst, Size ksize,
CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType));
if(sdepth == CV_8U && ((borderType & BORDER_ISOLATED) || !_src.getMat().isSubmatrix()))
{
std::vector<ufixedpoint16> fkx, fky;
createGaussianKernels(fkx, fky, type, ksize, sigma1, sigma2);
if (src.data == dst.data)
src = src.clone();
fixedSmoothInvoker<uint8_t, ufixedpoint16> invoker(src.ptr<uint8_t>(), src.step1(), dst.ptr<uint8_t>(), dst.step1(), dst.cols, dst.rows, dst.channels(), &fkx[0], (int)fkx.size(), &fky[0], (int)fky.size(), borderType & ~BORDER_ISOLATED);
parallel_for_(Range(0, dst.rows), invoker, std::max(1, std::min(getNumThreads(), getNumberOfCPUs())));
return;
}
sepFilter2D(src, dst, sdepth, kx, ky, Point(-1, -1), 0, borderType);
}

View File

@ -758,6 +758,7 @@ void Subdiv2D::getTriangleList(std::vector<Vec6f>& triangleList) const
triangleList.clear();
int i, total = (int)(qedges.size()*4);
std::vector<bool> edgemask(total, false);
Rect2f rect(topLeft.x, topLeft.y, bottomRight.x, bottomRight.y);
for( i = 4; i < total; i += 2 )
{
@ -773,7 +774,8 @@ void Subdiv2D::getTriangleList(std::vector<Vec6f>& triangleList) const
edge = getEdge(edge, NEXT_AROUND_LEFT);
edgeOrg(edge, &c);
edgemask[edge] = true;
triangleList.push_back(Vec6f(a.x, a.y, b.x, b.y, c.x, c.y));
if( rect.contains(a) && rect.contains(b) && rect.contains(c) )
triangleList.push_back(Vec6f(a.x, a.y, b.x, b.y, c.x, c.y));
}
}

View File

@ -283,4 +283,23 @@ void CV_DisTransTest::prepare_to_validation( int /*test_case_idx*/ )
TEST(Imgproc_DistanceTransform, accuracy) { CV_DisTransTest test; test.safe_run(); }
BIGDATA_TEST(Imgproc_DistanceTransform, large_image_12218)
{
const int lls_maxcnt = 79992000; // labels's maximum count
const int lls_mincnt = 1; // labels's minimum count
int i, j, nz;
Mat src(8000, 20000, CV_8UC1), dst, labels;
for( i = 0; i < src.rows; i++ )
for( j = 0; j < src.cols; j++ )
src.at<uchar>(i, j) = (j > (src.cols / 2)) ? 0 : 255;
distanceTransform(src, dst, labels, cv::DIST_L2, cv::DIST_MASK_3, DIST_LABEL_PIXEL);
double scale = (double)lls_mincnt / (double)lls_maxcnt;
labels.convertTo(labels, CV_32SC1, scale);
Size size = labels.size();
nz = cv::countNonZero(labels);
EXPECT_EQ(nz, (size.height*size.width / 2));
}
}} // namespace

View File

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//M*/
#include "test_precomp.hpp"
namespace opencv_test { namespace {
TEST(Imgproc_Subdiv2D_getTriangleList, regression_5788)
{
const float points[65][2] = {
{ 390, 802}, { 397, 883}, { 414, 963 }, { 439, 1042 }, { 472, 1113},
{ 521, 1181}, { 591, 1238}, { 678, 1284 }, { 771, 1292 }, { 853, 1281},
{ 921, 1243}, { 982, 1191}, {1030, 1121 }, {1059, 1038 }, {1072, 945},
{1081, 849}, {1082, 749}, { 459, 734 }, { 502, 704 }, { 554, 696},
{ 609, 698}, { 660, 707}, { 818, 688 }, { 874, 661 }, { 929, 646},
{ 982, 653}, {1026, 682}, { 740, 771 }, { 748, 834 }, { 756, 897},
{ 762, 960}, { 700, 998}, { 733, 1006 }, { 766, 1011 }, { 797, 999},
{ 825, 987}, { 528, 796}, { 566, 766 }, { 617, 763 }, { 659, 794},
{ 619, 808}, { 569, 812}, { 834, 777 }, { 870, 735 }, { 918, 729},
{ 958, 750}, { 929, 773}, { 882, 780 }, { 652, 1102 }, { 701, 1079},
{ 743, 1063}, { 774, 1068}, { 807, 1057 }, { 852, 1065 }, { 896, 1077},
{ 860, 1117}, { 820, 1135}, { 783, 1141 }, { 751, 1140 }, { 706, 1130},
{ 675, 1102}, { 743, 1094}, { 774, 1094 }, { 809, 1088 }, { 878, 1082}
};
std::vector<cv::Point2f> pts;
cv::Rect rect(0, 0, 1500, 2000);
cv::Subdiv2D subdiv(rect);
for( int i = 0; i < 65; i++ )
{
cv::Point2f pt(points[i][0], points[i][1]);
pts.push_back(pt);
}
subdiv.insert(pts);
std::vector<cv::Vec6f> triangles;
subdiv.getTriangleList(triangles);
int trig_cnt = 0;
for( std::vector<cv::Vec6f>::const_iterator it = triangles.begin(); it != triangles.end(); it++, trig_cnt++ )
{
EXPECT_TRUE( (0 <= triangles.at(trig_cnt).val[0] && triangles.at(trig_cnt).val[0] < 1500) &&
(0 <= triangles.at(trig_cnt).val[1] && triangles.at(trig_cnt).val[1] < 2000) &&
(0 <= triangles.at(trig_cnt).val[2] && triangles.at(trig_cnt).val[2] < 1500) &&
(0 <= triangles.at(trig_cnt).val[3] && triangles.at(trig_cnt).val[3] < 2000) &&
(0 <= triangles.at(trig_cnt).val[4] && triangles.at(trig_cnt).val[4] < 1500) &&
(0 <= triangles.at(trig_cnt).val[5] && triangles.at(trig_cnt).val[5] < 2000) );
}
EXPECT_EQ(trig_cnt, 105);
}
}};

View File

@ -29,7 +29,7 @@
#define ARRAYLIST(ENV) static_cast<jclass>(ENV->NewGlobalRef(ENV->FindClass("java/util/ArrayList")))
#define LIST_ADD(ENV, LIST) ENV->GetMethodID(LIST, "add", "(Ljava/lang/Object;)Z")
#define LIST_GET(ENV, LIST) ENV->GetMethodID(LIST, "get", "((I)Ljava/lang/Object;")
#define LIST_GET(ENV, LIST) ENV->GetMethodID(LIST, "get", "(I)Ljava/lang/Object;")
#define LIST_SIZE(ENV, LIST) ENV->GetMethodID(LIST, "size", "()I")
#define LIST_CLEAR(ENV, LIST) ENV->GetMethodID(LIST, "clear", "()V")

View File

@ -56,7 +56,7 @@ add_custom_command(
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/src/embindgen.py
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/src/templates.py
DEPENDS ${scripts_hdr_parser}
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/headers.txt
#(not needed - generated by CMake) DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/headers.txt
DEPENDS ${opencv_hdrs}
DEPENDS ${JS_HELPER})

View File

@ -68,15 +68,10 @@
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//M*/
#include "opencv2/core.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/video/tracking.hpp"
#include "opencv2/video/background_segm.hpp"
#include "opencv2/objdetect.hpp"
#include "opencv2/dnn.hpp"
#include <emscripten/bind.h>
@INCLUDES@
using namespace emscripten;
using namespace cv;
using namespace dnn;

View File

@ -733,12 +733,14 @@ class JSWrapperGenerator(object):
def gen(self, dst_file, src_files, core_bindings):
# step 1: scan the headers and extract classes, enums and functions
headers = []
for hdr in src_files:
decls = self.parser.parse(hdr)
# print(hdr);
# self.print_decls(decls);
if len(decls) == 0:
continue
headers.append(hdr[hdr.rindex('opencv2/'):])
for decl in decls:
name = decl[0]
type = name[:name.find(" ")]
@ -890,6 +892,9 @@ class JSWrapperGenerator(object):
with open(core_bindings) as f:
ret = f.read()
header_includes = '\n'.join(['#include "{}"'.format(hdr) for hdr in headers])
ret = ret.replace('@INCLUDES@', header_includes)
defis = '\n'.join(self.wrapper_funcs)
ret += wrapper_codes_template.substitute(ns=wrapper_namespace, defs=defis)
ret += emscripten_binding_template.substitute(binding_name='testBinding', bindings=''.join(self.bindings))

View File

@ -140,13 +140,12 @@ public:
String getModelName() const CV_OVERRIDE { return NAME_BRUTE_FORCE; }
int getType() const CV_OVERRIDE { return ml::KNearest::BRUTE_FORCE; }
void findNearestCore( const Mat& _samples, int k0, const Range& range,
void findNearestCore( const Mat& _samples, int k, const Range& range,
Mat* results, Mat* neighbor_responses,
Mat* dists, float* presult ) const
{
int testidx, baseidx, i, j, d = samples.cols, nsamples = samples.rows;
int testcount = range.end - range.start;
int k = std::min(k0, nsamples);
AutoBuffer<float> buf(testcount*k*2);
float* dbuf = buf.data();
@ -215,7 +214,7 @@ public:
float* nr = neighbor_responses->ptr<float>(testidx + range.start);
for( j = 0; j < k; j++ )
nr[j] = rbuf[testidx*k + j];
for( ; j < k0; j++ )
for( ; j < k; j++ )
nr[j] = 0.f;
}
@ -224,7 +223,7 @@ public:
float* dptr = dists->ptr<float>(testidx + range.start);
for( j = 0; j < k; j++ )
dptr[j] = dbuf[testidx*k + j];
for( ; j < k0; j++ )
for( ; j < k; j++ )
dptr[j] = 0.f;
}
@ -307,6 +306,7 @@ public:
{
float result = 0.f;
CV_Assert( 0 < k );
k = std::min(k, samples.rows);
Mat test_samples = _samples.getMat();
CV_Assert( test_samples.type() == CV_32F && test_samples.cols == samples.cols );
@ -363,6 +363,7 @@ public:
{
float result = 0.f;
CV_Assert( 0 < k );
k = std::min(k, samples.rows);
Mat test_samples = _samples.getMat();
CV_Assert( test_samples.type() == CV_32F && test_samples.cols == samples.cols );

View File

@ -702,4 +702,26 @@ TEST(ML_EM, accuracy) { CV_EMTest test; test.safe_run(); }
TEST(ML_EM, save_load) { CV_EMTest_SaveLoad test; test.safe_run(); }
TEST(ML_EM, classification) { CV_EMTest_Classification test; test.safe_run(); }
TEST(ML_KNearest, regression_12347)
{
Mat xTrainData = (Mat_<float>(5,2) << 1, 1.1, 1.1, 1, 2, 2, 2.1, 2, 2.1, 2.1);
Mat yTrainLabels = (Mat_<float>(5,1) << 1, 1, 2, 2, 2);
Ptr<KNearest> knn = KNearest::create();
knn->train(xTrainData, ml::ROW_SAMPLE, yTrainLabels);
Mat xTestData = (Mat_<float>(2,2) << 1.1, 1.1, 2, 2.2);
Mat zBestLabels, neighbours, dist;
// check output shapes:
int K = 16, Kexp = std::min(K, xTrainData.rows);
knn->findNearest(xTestData, K, zBestLabels, neighbours, dist);
EXPECT_EQ(xTestData.rows, zBestLabels.rows);
EXPECT_EQ(neighbours.cols, Kexp);
EXPECT_EQ(dist.cols, Kexp);
// see if the result is still correct:
K = 2;
knn->findNearest(xTestData, K, zBestLabels, neighbours, dist);
EXPECT_EQ(1, zBestLabels.at<float>(0,0));
EXPECT_EQ(2, zBestLabels.at<float>(1,0));
}
}} // namespace

File diff suppressed because it is too large Load Diff

View File

@ -40,6 +40,9 @@ if __name__ == "__main__":
parser.add_argument("--valgrind_supp", metavar="FILE", action='append', help="Path to valgrind suppression file (example: --valgrind_supp opencv/platforms/scripts/valgrind.supp)")
parser.add_argument("--valgrind_opt", metavar="OPT", action="append", default=[], help="Add command line option to valgrind (example: --valgrind_opt=--leak-check=full)")
# QEMU
parser.add_argument("--qemu", default="", help="Specify qemu binary and base parameters")
# Android
parser.add_argument("--android", action="store_true", default=False, help="Android: force all tests to run on device")
parser.add_argument("--android_sdk", metavar="PATH", help="Android: path to SDK to use adb and aapt tools")

View File

@ -77,7 +77,7 @@ class TestSuite(object):
return False
return os.access(fullpath, os.X_OK)
def wrapInValgrind(self, cmd=[]):
def wrapCommand(self, cmd, env):
if self.options.valgrind:
res = ['valgrind']
supp = self.options.valgrind_supp or []
@ -89,6 +89,14 @@ class TestSuite(object):
res.extend(self.options.valgrind_opt)
has_gtest_filter = next((True for x in cmd if x.startswith('--gtest_filter=')), False)
return res + cmd + ([longTestFilter(LONG_TESTS_DEBUG_VALGRIND)] if not has_gtest_filter else [])
elif self.options.qemu:
import shlex
res = shlex.split(self.options.qemu)
for (name, value) in [entry for entry in os.environ.items() if entry[0].startswith('OPENCV') and not entry[0] in env]:
res += ['-E', '"{}={}"'.format(name, value)]
for (name, value) in env.items():
res += ['-E', '"{}={}"'.format(name, value)]
return res + ['--'] + cmd
return cmd
def tryCommand(self, cmd, workingDir):
@ -125,7 +133,6 @@ class TestSuite(object):
else:
if isColorEnabled(args):
args.append("--gtest_color=yes")
cmd = self.wrapInValgrind([exe] + args)
env = {}
if not self.options.valgrind and self.options.trace:
env['OPENCV_TRACE'] = '1'
@ -133,6 +140,7 @@ class TestSuite(object):
env['OPENCV_TRACE_SYNC_OPENCL'] = '1'
tempDir = TempEnvDir('OPENCV_TEMP_PATH', "__opencv_temp.")
tempDir.init()
cmd = self.wrapCommand([exe] + args, env)
log.warning("Run: %s" % " ".join(cmd))
ret = execute(cmd, cwd=workingDir, env=env)
try:

View File

@ -721,6 +721,7 @@ void checkIppStatus()
}
}
static bool checkTestData = false;
bool skipUnstableTests = false;
bool runBigDataTests = false;
int testThreads = 0;
@ -733,6 +734,7 @@ void parseCustomOptions(int argc, char **argv)
"{ test_threads |-1 |the number of worker threads, if parallel execution is enabled}"
"{ skip_unstable |false |skip unstable tests }"
"{ test_bigdata |false |run BigData tests (>=2Gb) }"
"{ test_require_data |false |fail on missing non-required test data instead of skip}"
"{ h help |false |print help info }";
cv::CommandLineParser parser(argc, argv, command_line_keys);
@ -756,6 +758,7 @@ void parseCustomOptions(int argc, char **argv)
skipUnstableTests = parser.get<bool>("skip_unstable");
runBigDataTests = parser.get<bool>("test_bigdata");
checkTestData = parser.get<bool>("test_require_data");
}
@ -870,7 +873,7 @@ static std::string findData(const std::string& relative_path, bool required, boo
#endif
#endif
const char* type = findDirectory ? "directory" : "data file";
if (required)
if (required || checkTestData)
CV_Error(cv::Error::StsError, cv::format("OpenCV tests: Can't find required %s: %s", type, relative_path.c_str()));
throw SkipTestException(cv::format("OpenCV tests: Can't find %s: %s", type, relative_path.c_str()));
}

File diff suppressed because it is too large Load Diff

View File

@ -616,7 +616,7 @@ class SourceReaderCB : public IMFSourceReaderCallback
{
public:
SourceReaderCB() :
m_nRefCount(1), m_hEvent(CreateEvent(NULL, FALSE, FALSE, NULL)), m_bEOS(FALSE), m_hrStatus(S_OK), m_dwStreamIndex(0)
m_nRefCount(0), m_hEvent(CreateEvent(NULL, FALSE, FALSE, NULL)), m_bEOS(FALSE), m_hrStatus(S_OK), m_reader(NULL), m_dwStreamIndex(0)
{
}
@ -677,7 +677,7 @@ public:
BOOL m_bEOS;
HRESULT m_hrStatus;
_ComPtr<IMFSourceReader> m_reader;
IMFSourceReader *m_reader;
DWORD m_dwStreamIndex;
_ComPtr<IMFSample> m_lastSample;
};
@ -1140,7 +1140,7 @@ bool CvCapture_MSMF::grabFrame()
if (!reader->m_reader)
{
// Initiate capturing with async callback
reader->m_reader = videoFileSource;
reader->m_reader = videoFileSource.Get();
reader->m_dwStreamIndex = dwStreamIndex;
if (FAILED(hr = videoFileSource->ReadSample(dwStreamIndex, 0, NULL, NULL, NULL, NULL)))
{

Some files were not shown because too many files have changed in this diff Show More