mirror of
https://github.com/opencv/opencv.git
synced 2025-01-18 14:13:15 +08:00
Merge remote-tracking branch 'upstream/3.4' into merge-3.4
This commit is contained in:
commit
d74b98c3d9
@ -1,45 +0,0 @@
|
||||
# Main variables:
|
||||
# IPP_A_LIBRARIES and IPP_A_INCLUDE to use IPP Async
|
||||
# HAVE_IPP_A for conditional compilation OpenCV with/without IPP Async
|
||||
|
||||
# IPP_ASYNC_ROOT - root of IPP Async installation
|
||||
|
||||
if(X86_64)
|
||||
find_path(
|
||||
IPP_A_INCLUDE_DIR
|
||||
NAMES ipp_async_defs.h
|
||||
PATHS $ENV{IPP_ASYNC_ROOT}
|
||||
PATH_SUFFIXES include
|
||||
DOC "Path to Intel IPP Async interface headers")
|
||||
|
||||
find_file(
|
||||
IPP_A_LIBRARIES
|
||||
NAMES ipp_async_preview.lib
|
||||
PATHS $ENV{IPP_ASYNC_ROOT}
|
||||
PATH_SUFFIXES lib/intel64
|
||||
DOC "Path to Intel IPP Async interface libraries")
|
||||
|
||||
else()
|
||||
find_path(
|
||||
IPP_A_INCLUDE_DIR
|
||||
NAMES ipp_async_defs.h
|
||||
PATHS $ENV{IPP_ASYNC_ROOT}
|
||||
PATH_SUFFIXES include
|
||||
DOC "Path to Intel IPP Async interface headers")
|
||||
|
||||
find_file(
|
||||
IPP_A_LIBRARIES
|
||||
NAMES ipp_async_preview.lib
|
||||
PATHS $ENV{IPP_ASYNC_ROOT}
|
||||
PATH_SUFFIXES lib/ia32
|
||||
DOC "Path to Intel IPP Async interface libraries")
|
||||
endif()
|
||||
|
||||
if(IPP_A_INCLUDE_DIR AND IPP_A_LIBRARIES)
|
||||
set(HAVE_IPP_A TRUE)
|
||||
else()
|
||||
set(HAVE_IPP_A FALSE)
|
||||
message(WARNING "Intel IPP Async library directory (set by IPP_A_LIBRARIES_DIR variable) is not found or does not have Intel IPP Async libraries.")
|
||||
endif()
|
||||
|
||||
mark_as_advanced(FORCE IPP_A_LIBRARIES IPP_A_INCLUDE_DIR)
|
@ -35,17 +35,6 @@ if(WITH_IPP)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# --- IPP Async ---
|
||||
|
||||
if(WITH_IPP_A)
|
||||
include("${OpenCV_SOURCE_DIR}/cmake/OpenCVFindIPPAsync.cmake")
|
||||
if(IPP_A_INCLUDE_DIR AND IPP_A_LIBRARIES)
|
||||
ocv_include_directories(${IPP_A_INCLUDE_DIR})
|
||||
link_directories(${IPP_A_LIBRARIES})
|
||||
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${IPP_A_LIBRARIES})
|
||||
endif()
|
||||
endif(WITH_IPP_A)
|
||||
|
||||
# --- CUDA ---
|
||||
if(WITH_CUDA)
|
||||
include("${OpenCV_SOURCE_DIR}/cmake/OpenCVDetectCUDA.cmake")
|
||||
|
@ -103,9 +103,6 @@
|
||||
#cmakedefine HAVE_IPP_ICV
|
||||
#cmakedefine HAVE_IPP_IW
|
||||
|
||||
/* Intel IPP Async */
|
||||
#cmakedefine HAVE_IPP_A
|
||||
|
||||
/* JPEG-2000 codec */
|
||||
#cmakedefine HAVE_JASPER
|
||||
|
||||
|
@ -227,7 +227,6 @@ SEARCH_INCLUDES = YES
|
||||
INCLUDE_PATH =
|
||||
INCLUDE_FILE_PATTERNS =
|
||||
PREDEFINED = __cplusplus=1 \
|
||||
HAVE_IPP_A=1 \
|
||||
CVAPI(x)=x \
|
||||
CV_DOXYGEN= \
|
||||
CV_EXPORTS= \
|
||||
|
@ -1,7 +1,7 @@
|
||||
How to use the OpenCV parallel_for_ to parallelize your code {#tutorial_how_to_use_OpenCV_parallel_for_}
|
||||
==================================================================
|
||||
|
||||
@prev_tutorial{tutorial_how_to_use_ippa_conversion}
|
||||
@prev_tutorial{tutorial_interoperability_with_OpenCV_1}
|
||||
|
||||
Goal
|
||||
----
|
||||
|
@ -1,146 +0,0 @@
|
||||
Intel® IPP Asynchronous C/C++ library in OpenCV {#tutorial_how_to_use_ippa_conversion}
|
||||
===============================================
|
||||
|
||||
@prev_tutorial{tutorial_interoperability_with_OpenCV_1}
|
||||
@next_tutorial{tutorial_how_to_use_OpenCV_parallel_for_}
|
||||
|
||||
Goal
|
||||
----
|
||||
|
||||
The tutorial demonstrates the [Intel® IPP Asynchronous
|
||||
C/C++](http://software.intel.com/en-us/intel-ipp-preview) library usage with OpenCV. The code
|
||||
example below illustrates implementation of the Sobel operation, accelerated with Intel® IPP
|
||||
Asynchronous C/C++ functions. In this code example, @ref cv::hpp::getMat and @ref cv::hpp::getHpp
|
||||
functions are used for data conversion between
|
||||
[hppiMatrix](http://software.intel.com/en-us/node/501660) and Mat matrices.
|
||||
|
||||
Code
|
||||
----
|
||||
|
||||
You may also find the source code in the
|
||||
`samples/cpp/tutorial_code/core/ippasync/ippasync_sample.cpp` file of the OpenCV source library or
|
||||
download it from [here](https://github.com/opencv/opencv/tree/master/samples/cpp/tutorial_code/core/ippasync/ippasync_sample.cpp).
|
||||
|
||||
@include cpp/tutorial_code/core/ippasync/ippasync_sample.cpp
|
||||
|
||||
Explanation
|
||||
-----------
|
||||
|
||||
-# Create parameters for OpenCV:
|
||||
@code{.cpp}
|
||||
VideoCapture cap;
|
||||
Mat image, gray, result;
|
||||
@endcode
|
||||
and IPP Async:
|
||||
@code{.cpp}
|
||||
hppiMatrix* src,* dst;
|
||||
hppAccel accel = 0;
|
||||
hppAccelType accelType;
|
||||
hppStatus sts;
|
||||
hppiVirtualMatrix * virtMatrix;
|
||||
@endcode
|
||||
-# Load input image or video. How to open and read video stream you can see in the
|
||||
@ref tutorial_video_input_psnr_ssim tutorial.
|
||||
@code{.cpp}
|
||||
if( useCamera )
|
||||
{
|
||||
printf("used camera\n");
|
||||
cap.open(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("used image %s\n", file.c_str());
|
||||
cap.open(file.c_str());
|
||||
}
|
||||
|
||||
if( !cap.isOpened() )
|
||||
{
|
||||
printf("can not open camera or video file\n");
|
||||
return -1;
|
||||
}
|
||||
@endcode
|
||||
-# Create accelerator instance using
|
||||
[hppCreateInstance](http://software.intel.com/en-us/node/501686):
|
||||
@code{.cpp}
|
||||
accelType = sAccel == "cpu" ? HPP_ACCEL_TYPE_CPU:
|
||||
sAccel == "gpu" ? HPP_ACCEL_TYPE_GPU:
|
||||
HPP_ACCEL_TYPE_ANY;
|
||||
|
||||
//Create accelerator instance
|
||||
sts = hppCreateInstance(accelType, 0, &accel);
|
||||
CHECK_STATUS(sts, "hppCreateInstance");
|
||||
@endcode
|
||||
-# Create an array of virtual matrices using
|
||||
[hppiCreateVirtualMatrices](http://software.intel.com/en-us/node/501700) function.
|
||||
@code{.cpp}
|
||||
virtMatrix = hppiCreateVirtualMatrices(accel, 1);
|
||||
@endcode
|
||||
-# Prepare a matrix for input and output data:
|
||||
@code{.cpp}
|
||||
cap >> image;
|
||||
if(image.empty())
|
||||
break;
|
||||
|
||||
cvtColor( image, gray, COLOR_BGR2GRAY );
|
||||
|
||||
result.create( image.rows, image.cols, CV_8U);
|
||||
@endcode
|
||||
-# Convert Mat to [hppiMatrix](http://software.intel.com/en-us/node/501660) using @ref cv::hpp::getHpp
|
||||
and call [hppiSobel](http://software.intel.com/en-us/node/474701) function.
|
||||
@code{.cpp}
|
||||
//convert Mat to hppiMatrix
|
||||
src = getHpp(gray, accel);
|
||||
dst = getHpp(result, accel);
|
||||
|
||||
sts = hppiSobel(accel,src, HPP_MASK_SIZE_3X3,HPP_NORM_L1,virtMatrix[0]);
|
||||
CHECK_STATUS(sts,"hppiSobel");
|
||||
|
||||
sts = hppiConvert(accel, virtMatrix[0], 0, HPP_RND_MODE_NEAR, dst, HPP_DATA_TYPE_8U);
|
||||
CHECK_STATUS(sts,"hppiConvert");
|
||||
|
||||
// Wait for tasks to complete
|
||||
sts = hppWait(accel, HPP_TIME_OUT_INFINITE);
|
||||
CHECK_STATUS(sts, "hppWait");
|
||||
@endcode
|
||||
We use [hppiConvert](http://software.intel.com/en-us/node/501746) because
|
||||
[hppiSobel](http://software.intel.com/en-us/node/474701) returns destination matrix with
|
||||
HPP_DATA_TYPE_16S data type for source matrix with HPP_DATA_TYPE_8U type. You should check
|
||||
hppStatus after each call IPP Async function.
|
||||
|
||||
-# Create windows and show the images, the usual way.
|
||||
@code{.cpp}
|
||||
imshow("image", image);
|
||||
imshow("rez", result);
|
||||
|
||||
waitKey(15);
|
||||
@endcode
|
||||
-# Delete hpp matrices.
|
||||
@code{.cpp}
|
||||
sts = hppiFreeMatrix(src);
|
||||
CHECK_DEL_STATUS(sts,"hppiFreeMatrix");
|
||||
|
||||
sts = hppiFreeMatrix(dst);
|
||||
CHECK_DEL_STATUS(sts,"hppiFreeMatrix");
|
||||
@endcode
|
||||
-# Delete virtual matrices and accelerator instance.
|
||||
@code{.cpp}
|
||||
if (virtMatrix)
|
||||
{
|
||||
sts = hppiDeleteVirtualMatrices(accel, virtMatrix);
|
||||
CHECK_DEL_STATUS(sts,"hppiDeleteVirtualMatrices");
|
||||
}
|
||||
|
||||
if (accel)
|
||||
{
|
||||
sts = hppDeleteInstance(accel);
|
||||
CHECK_DEL_STATUS(sts, "hppDeleteInstance");
|
||||
}
|
||||
@endcode
|
||||
|
||||
Result
|
||||
------
|
||||
|
||||
After compiling the code above we can execute it giving an image or video path and accelerator type
|
||||
as an argument. For this tutorial we use baboon.png image as input. The result is below.
|
||||
|
||||
![](images/How_To_Use_IPPA_Result.jpg)
|
Binary file not shown.
Before Width: | Height: | Size: 61 KiB |
Binary file not shown.
Before Width: | Height: | Size: 6.8 KiB |
@ -2,7 +2,7 @@ Interoperability with OpenCV 1 {#tutorial_interoperability_with_OpenCV_1}
|
||||
==============================
|
||||
|
||||
@prev_tutorial{tutorial_file_input_output_with_xml_yml}
|
||||
@next_tutorial{tutorial_how_to_use_ippa_conversion}
|
||||
@next_tutorial{tutorial_how_to_use_OpenCV_parallel_for_}
|
||||
|
||||
Goal
|
||||
----
|
||||
|
@ -93,15 +93,6 @@ understanding how to manipulate the images on a pixel level.
|
||||
Look here to shed light on all this questions.
|
||||
|
||||
|
||||
- @subpage tutorial_how_to_use_ippa_conversion
|
||||
|
||||
*Compatibility:* \> OpenCV 2.0
|
||||
|
||||
*Author:* Elena Gvozdeva
|
||||
|
||||
You will see how to use the IPP Async with OpenCV.
|
||||
|
||||
|
||||
- @subpage tutorial_how_to_use_OpenCV_parallel_for_
|
||||
|
||||
*Compatibility:* \>= OpenCV 2.4.3
|
||||
|
BIN
doc/tutorials/imgproc/motion_deblur_filter/images/black_car.jpg
Executable file
BIN
doc/tutorials/imgproc/motion_deblur_filter/images/black_car.jpg
Executable file
Binary file not shown.
After Width: | Height: | Size: 33 KiB |
BIN
doc/tutorials/imgproc/motion_deblur_filter/images/motion_original.jpg
Executable file
BIN
doc/tutorials/imgproc/motion_deblur_filter/images/motion_original.jpg
Executable file
Binary file not shown.
After Width: | Height: | Size: 51 KiB |
BIN
doc/tutorials/imgproc/motion_deblur_filter/images/motion_psf.png
Executable file
BIN
doc/tutorials/imgproc/motion_deblur_filter/images/motion_psf.png
Executable file
Binary file not shown.
After Width: | Height: | Size: 556 B |
BIN
doc/tutorials/imgproc/motion_deblur_filter/images/white_car.jpg
Executable file
BIN
doc/tutorials/imgproc/motion_deblur_filter/images/white_car.jpg
Executable file
Binary file not shown.
After Width: | Height: | Size: 20 KiB |
72
doc/tutorials/imgproc/motion_deblur_filter/motion_deblur_filter.markdown
Executable file
72
doc/tutorials/imgproc/motion_deblur_filter/motion_deblur_filter.markdown
Executable file
@ -0,0 +1,72 @@
|
||||
Motion Deblur Filter {#tutorial_motion_deblur_filter}
|
||||
==========================
|
||||
|
||||
Goal
|
||||
----
|
||||
|
||||
In this tutorial you will learn:
|
||||
|
||||
- what the PSF of a motion blur image is
|
||||
- how to restore a motion blur image
|
||||
|
||||
Theory
|
||||
------
|
||||
|
||||
For the degradation image model theory and the Wiener filter theory you can refer to the tutorial @ref tutorial_out_of_focus_deblur_filter "Out-of-focus Deblur Filter".
|
||||
On this page only a linear motion blur distortion is considered. The motion blur image on this page is a real world image. The blur was caused by a moving subject.
|
||||
|
||||
### What is the PSF of a motion blur image?
|
||||
|
||||
The point spread function (PSF) of a linear motion blur distortion is a line segment. Such a PSF is specified by two parameters: \f$LEN\f$ is the length of the blur and \f$THETA\f$ is the angle of motion.
|
||||
|
||||
![Point spread function of a linear motion blur distortion](images/motion_psf.png)
|
||||
|
||||
### How to restore a blurred image?
|
||||
|
||||
On this page the Wiener filter is used as the restoration filter, for details you can refer to the tutorial @ref tutorial_out_of_focus_deblur_filter "Out-of-focus Deblur Filter".
|
||||
In order to synthesize the Wiener filter for a motion blur case, it needs to specify the signal-to-noise ratio (\f$SNR\f$), \f$LEN\f$ and \f$THETA\f$ of the PSF.
|
||||
|
||||
Source code
|
||||
-----------
|
||||
|
||||
You can find source code in the `samples/cpp/tutorial_code/ImgProc/motion_deblur_filter/motion_deblur_filter.cpp` of the OpenCV source code library.
|
||||
|
||||
@include cpp/tutorial_code/ImgProc/motion_deblur_filter/motion_deblur_filter.cpp
|
||||
|
||||
Explanation
|
||||
-----------
|
||||
|
||||
A motion blur image recovering algorithm consists of PSF generation, Wiener filter generation and filtering a blurred image in a frequency domain:
|
||||
@snippet samples/cpp/tutorial_code/ImgProc/motion_deblur_filter/motion_deblur_filter.cpp main
|
||||
|
||||
A function calcPSF() forms a PSF according to input parameters \f$LEN\f$ and \f$THETA\f$ (in degrees):
|
||||
@snippet samples/cpp/tutorial_code/ImgProc/motion_deblur_filter/motion_deblur_filter.cpp calcPSF
|
||||
|
||||
A function edgetaper() tapers the input image’s edges in order to reduce the ringing effect in a restored image:
|
||||
@snippet samples/cpp/tutorial_code/ImgProc/motion_deblur_filter/motion_deblur_filter.cpp edgetaper
|
||||
|
||||
The functions calcWnrFilter(), fftshift() and filter2DFreq() realize an image filtration by a specified PSF in the frequency domain. The functions are copied from the tutorial
|
||||
@ref tutorial_out_of_focus_deblur_filter "Out-of-focus Deblur Filter".
|
||||
|
||||
Result
|
||||
------
|
||||
|
||||
Below you can see the real world image with motion blur distortion. The license plate is not readable on both cars. The red markers show the car’s license plate location.
|
||||
![Motion blur image. The license plates are not readable](images/motion_original.jpg)
|
||||
|
||||
|
||||
Below you can see the restoration result for the black car license plate. The result has been computed with \f$LEN\f$ = 125, \f$THETA\f$ = 0, \f$SNR\f$ = 700.
|
||||
![The restored image of the black car license plate](images/black_car.jpg)
|
||||
|
||||
Below you can see the restoration result for the white car license plate. The result has been computed with \f$LEN\f$ = 78, \f$THETA\f$ = 15, \f$SNR\f$ = 300.
|
||||
![The restored image of the white car license plate](images/white_car.jpg)
|
||||
|
||||
The values of \f$SNR\f$, \f$LEN\f$ and \f$THETA\f$ were selected manually to give the best possible visual result. The \f$THETA\f$ parameter coincides with the car’s moving direction, and the
|
||||
\f$LEN\f$ parameter depends on the car’s moving speed.
|
||||
The result is not perfect, but at least it gives us a hint of the image’s content. With some effort, the car license plate is now readable.
|
||||
|
||||
@note The parameters \f$LEN\f$ and \f$THETA\f$ are the most important. You should adjust \f$LEN\f$ and \f$THETA\f$ first, then \f$SNR\f$.
|
||||
|
||||
You can also find a quick video demonstration of a license plate recovering method
|
||||
[YouTube](https://youtu.be/xSrE0hdhb4o).
|
||||
@youtube{xSrE0hdhb4o}
|
@ -8,54 +8,54 @@ Goal
|
||||
|
||||
In this tutorial you will learn:
|
||||
|
||||
- what is a degradation image model
|
||||
- what is PSF of out-of-focus image
|
||||
- what a degradation image model is
|
||||
- what the PSF of an out-of-focus image is
|
||||
- how to restore a blurred image
|
||||
- what is Wiener filter
|
||||
- what is a Wiener filter
|
||||
|
||||
Theory
|
||||
------
|
||||
|
||||
@note The explanation is based on the books @cite gonzalez and @cite gruzman. Also, you can refer to Matlab's tutorial [Image Deblurring in Matlab] and an article [SmartDeblur].
|
||||
@note An out-of-focus image on this page is a real world image. An out-of-focus was done manually by camera optics.
|
||||
@note The explanation is based on the books @cite gonzalez and @cite gruzman. Also, you can refer to Matlab's tutorial [Image Deblurring in Matlab] and the article [SmartDeblur].
|
||||
@note The out-of-focus image on this page is a real world image. The out-of-focus was achieved manually by camera optics.
|
||||
|
||||
### What is a degradation image model?
|
||||
|
||||
A mathematical model of the image degradation in frequency domain representation is:
|
||||
Here is a mathematical model of the image degradation in frequency domain representation:
|
||||
|
||||
\f[S = H\cdot U + N\f]
|
||||
|
||||
where
|
||||
\f$S\f$ is a spectrum of blurred (degraded) image,
|
||||
\f$U\f$ is a spectrum of original true (undegraded) image,
|
||||
\f$H\f$ is frequency response of point spread function (PSF),
|
||||
\f$H\f$ is a frequency response of point spread function (PSF),
|
||||
\f$N\f$ is a spectrum of additive noise.
|
||||
|
||||
Circular PSF is a good approximation of out-of-focus distortion. Such PSF is specified by only one parameter - radius \f$R\f$. Circular PSF is used in this work.
|
||||
The circular PSF is a good approximation of out-of-focus distortion. Such a PSF is specified by only one parameter - radius \f$R\f$. Circular PSF is used in this work.
|
||||
|
||||
![Circular point spread function](psf.png)
|
||||
|
||||
### How to restore an blurred image?
|
||||
### How to restore a blurred image?
|
||||
|
||||
The objective of restoration (deblurring) is to obtain an estimate of the original image. Restoration formula in frequency domain is:
|
||||
The objective of restoration (deblurring) is to obtain an estimate of the original image. The restoration formula in frequency domain is:
|
||||
|
||||
\f[U' = H_w\cdot S\f]
|
||||
|
||||
where
|
||||
\f$U'\f$ is spectrum of estimation of original image \f$U\f$,
|
||||
\f$H_w\f$ is restoration filter, for example, Wiener filter.
|
||||
\f$U'\f$ is the spectrum of estimation of original image \f$U\f$, and
|
||||
\f$H_w\f$ is the restoration filter, for example, the Wiener filter.
|
||||
|
||||
### What is Wiener filter?
|
||||
### What is the Wiener filter?
|
||||
|
||||
Wiener filter is a way to restore a blurred image. Let's suppose that PSF is a real and symmetric signal, a power spectrum of the original true image and noise are not known,
|
||||
then simplified Wiener formula is:
|
||||
The Wiener filter is a way to restore a blurred image. Let's suppose that the PSF is a real and symmetric signal, a power spectrum of the original true image and noise are not known,
|
||||
then a simplified Wiener formula is:
|
||||
|
||||
\f[H_w = \frac{H}{|H|^2+\frac{1}{SNR}} \f]
|
||||
|
||||
where
|
||||
\f$SNR\f$ is signal-to-noise ratio.
|
||||
|
||||
So, in order to recover an out-of-focus image by Wiener filter, it needs to know \f$SNR\f$ and \f$R\f$ of circular PSF.
|
||||
So, in order to recover an out-of-focus image by Wiener filter, it needs to know the \f$SNR\f$ and \f$R\f$ of the circular PSF.
|
||||
|
||||
|
||||
Source code
|
||||
@ -68,36 +68,36 @@ You can find source code in the `samples/cpp/tutorial_code/ImgProc/out_of_focus_
|
||||
Explanation
|
||||
-----------
|
||||
|
||||
An out-of-focus image recovering algorithm consists of PSF generation, Wiener filter generation and filtering an blurred image in frequency domain:
|
||||
An out-of-focus image recovering algorithm consists of PSF generation, Wiener filter generation and filtering a blurred image in frequency domain:
|
||||
@snippet samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp main
|
||||
|
||||
A function calcPSF() forms an circular PSF according to input parameter radius \f$R\f$:
|
||||
A function calcPSF() forms a circular PSF according to input parameter radius \f$R\f$:
|
||||
@snippet samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp calcPSF
|
||||
|
||||
A function calcWnrFilter() synthesizes simplified Wiener filter \f$H_w\f$ according to formula described above:
|
||||
A function calcWnrFilter() synthesizes the simplified Wiener filter \f$H_w\f$ according to the formula described above:
|
||||
@snippet samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp calcWnrFilter
|
||||
|
||||
A function fftshift() rearranges PSF. This code was just copied from tutorial @ref tutorial_discrete_fourier_transform "Discrete Fourier Transform":
|
||||
A function fftshift() rearranges the PSF. This code was just copied from the tutorial @ref tutorial_discrete_fourier_transform "Discrete Fourier Transform":
|
||||
@snippet samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp fftshift
|
||||
|
||||
A function filter2DFreq() filters an blurred image in frequency domain:
|
||||
A function filter2DFreq() filters the blurred image in the frequency domain:
|
||||
@snippet samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp filter2DFreq
|
||||
|
||||
Result
|
||||
------
|
||||
|
||||
Below you can see real out-of-focus image:
|
||||
Below you can see the real out-of-focus image:
|
||||
![Out-of-focus image](images/original.jpg)
|
||||
|
||||
|
||||
Below result was done by \f$R\f$ = 53 and \f$SNR\f$ = 5200 parameters:
|
||||
And the following result has been computed with \f$R\f$ = 53 and \f$SNR\f$ = 5200 parameters:
|
||||
![The restored (deblurred) image](images/recovered.jpg)
|
||||
|
||||
The Wiener filter was used, values of \f$R\f$ and \f$SNR\f$ were selected manually to give the best possible visual result.
|
||||
We can see that the result is not perfect, but it gives us a hint to the image content. With some difficulty, the text is readable.
|
||||
The Wiener filter was used, and values of \f$R\f$ and \f$SNR\f$ were selected manually to give the best possible visual result.
|
||||
We can see that the result is not perfect, but it gives us a hint to the image's content. With some difficulty, the text is readable.
|
||||
|
||||
@note The parameter \f$R\f$ is the most important. So you should adjust \f$R\f$ first, then \f$SNR\f$.
|
||||
@note Sometimes you can observe the ringing effect in an restored image. This effect can be reduced by several methods. For example, you can taper input image edges.
|
||||
@note Sometimes you can observe the ringing effect in a restored image. This effect can be reduced with several methods. For example, you can taper input image edges.
|
||||
|
||||
You can also find a quick video demonstration of this on
|
||||
[YouTube](https://youtu.be/0bEcE4B0XP4).
|
||||
|
@ -320,3 +320,13 @@ In this section you will learn about the image processing (manipulation) functio
|
||||
*Author:* Karpushin Vladislav
|
||||
|
||||
You will learn how to recover an out-of-focus image by Wiener filter.
|
||||
|
||||
- @subpage tutorial_motion_deblur_filter
|
||||
|
||||
*Languages:* C++
|
||||
|
||||
*Compatibility:* \> OpenCV 2.0
|
||||
|
||||
*Author:* Karpushin Vladislav
|
||||
|
||||
You will learn how to recover an image with motion blur distortion using a Wiener filter.
|
||||
|
@ -142,8 +142,6 @@ of them, you need to download and install them on your system.
|
||||
- [Intel Integrated Performance Primitives (*IPP*)](http://software.intel.com/en-us/articles/intel-ipp/) may be used to improve the performance
|
||||
of color conversion, Haar training and DFT functions of the OpenCV library. Watch out, since
|
||||
this is not a free service.
|
||||
- [Intel IPP Asynchronous C/C++](http://software.intel.com/en-us/intel-ipp-preview) is currently focused delivering Intel Graphics
|
||||
support for advanced image processing and computer vision functions.
|
||||
- OpenCV offers a somewhat fancier and more useful graphical user interface, than the default one
|
||||
by using the [Qt framework](http://qt.nokia.com/downloads). For a quick overview of what this has to offer, look into the
|
||||
documentations *highgui* module, under the *Qt New Functions* section. Version 4.6 or later of
|
||||
@ -204,10 +202,6 @@ libraries). If you do not need the support for some of these, you can just freel
|
||||
|
||||
![](images/IntelTBB.png)
|
||||
|
||||
-# For the [Intel IPP Asynchronous C/C++](http://software.intel.com/en-us/intel-ipp-preview) download the source files and set environment
|
||||
variable **IPP_ASYNC_ROOT**. It should point to
|
||||
`<your Program Files(x86) directory>/Intel/IPP Preview */ipp directory`. Here \* denotes the
|
||||
particular preview name.
|
||||
-# In case of the [Eigen](http://eigen.tuxfamily.org/index.php?title=Main_Page#Download) library it is again a case of download and extract to the
|
||||
`D:/OpenCV/dep` directory.
|
||||
-# Same as above with [OpenEXR](http://www.openexr.com/downloads.html).
|
||||
@ -319,6 +313,7 @@ libraries). If you do not need the support for some of these, you can just freel
|
||||
you are concerned about performance, build them and run.
|
||||
- *BUILD_opencv_python* -\> Self-explanatory. Create the binaries to use OpenCV from the
|
||||
Python language.
|
||||
- *BUILD_opencv_world* -\> Generate a single "opencv_world" binary (a shared or static library, depending on *BUILD_SHARED_LIBS*) including all the modules instead of a collection of separate binaries, one binary per module.
|
||||
|
||||
Press again the *Configure* button and ensure no errors are reported. If this is the case, you
|
||||
can tell CMake to create the project files by pushing the *Generate* button. Go to the build
|
||||
|
@ -1487,12 +1487,14 @@ static void computeDisparitySGBM_HH4( const Mat& img1, const Mat& img2,
|
||||
size_t minLrSize = width1 , LrSize = minLrSize*D2;
|
||||
int hsumBufNRows = SH2*2 + 2;
|
||||
size_t totalBufSize = (LrSize + minLrSize)*NLR*sizeof(CostType) + // minLr[] and Lr[]
|
||||
costBufSize*hsumBufNRows*sizeof(CostType) + // hsumBuf
|
||||
CSBufSize*2*sizeof(CostType) + 1024; // C, S
|
||||
costBufSize*hsumBufNRows*sizeof(CostType) + // hsumBuf
|
||||
CSBufSize*2*sizeof(CostType) + 1024; // C, S
|
||||
|
||||
if( buffer.empty() || !buffer.isContinuous() ||
|
||||
buffer.cols*buffer.rows*buffer.elemSize() < totalBufSize )
|
||||
buffer.create(1, (int)totalBufSize, CV_8U);
|
||||
{
|
||||
buffer.reserveBuffer(totalBufSize);
|
||||
}
|
||||
|
||||
// summary cost over different (nDirs) directions
|
||||
CostType* Cbuf = (CostType*)alignPtr(buffer.ptr(), ALIGN);
|
||||
|
@ -664,6 +664,8 @@ inline void v_mul_expand(const v_uint32x8& a, const v_uint32x8& b,
|
||||
v_zip(v_uint64x4(v0), v_uint64x4(v1), c, d);
|
||||
}
|
||||
|
||||
inline v_int16x16 v_mul_hi(const v_int16x16& a, const v_int16x16& b) { return v_int16x16(_mm256_mulhi_epi16(a.val, b.val)); }
|
||||
inline v_uint16x16 v_mul_hi(const v_uint16x16& a, const v_uint16x16& b) { return v_uint16x16(_mm256_mulhi_epu16(a.val, b.val)); }
|
||||
|
||||
/** Non-saturating arithmetics **/
|
||||
#define OPENCV_HAL_IMPL_AVX_BIN_FUNC(func, _Tpvec, intrin) \
|
||||
|
@ -891,6 +891,20 @@ template<typename _Tp, int n> inline void v_mul_expand(const v_reg<_Tp, n>& a, c
|
||||
}
|
||||
}
|
||||
|
||||
/** @brief Multiply and extract high part
|
||||
|
||||
Multiply values two registers and store high part of the results.
|
||||
Implemented only for 16-bit source types (v_int16x8, v_uint16x8). Returns \f$ a*b >> 16 \f$
|
||||
*/
|
||||
template<typename _Tp, int n> inline v_reg<_Tp, n> v_mul_hi(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
|
||||
{
|
||||
typedef typename V_TypeTraits<_Tp>::w_type w_type;
|
||||
v_reg<_Tp, n> c;
|
||||
for (int i = 0; i < n; i++)
|
||||
c.s[i] = (_Tp)(((w_type)a.s[i] * b.s[i]) >> sizeof(_Tp)*8);
|
||||
return c;
|
||||
}
|
||||
|
||||
//! @cond IGNORED
|
||||
template<typename _Tp, int n> inline void v_hsum(const v_reg<_Tp, n>& a,
|
||||
v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& c)
|
||||
|
@ -553,6 +553,21 @@ inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b,
|
||||
d.val = vmull_u32(vget_high_u32(a.val), vget_high_u32(b.val));
|
||||
}
|
||||
|
||||
inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b)
|
||||
{
|
||||
return v_int16x8(vcombine_s16(
|
||||
vshrn_n_s32(vmull_s16( vget_low_s16(a.val), vget_low_s16(b.val)), 16),
|
||||
vshrn_n_s32(vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val)), 16)
|
||||
));
|
||||
}
|
||||
inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b)
|
||||
{
|
||||
return v_uint16x8(vcombine_u16(
|
||||
vshrn_n_u32(vmull_u16( vget_low_u16(a.val), vget_low_u16(b.val)), 16),
|
||||
vshrn_n_u32(vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val)), 16)
|
||||
));
|
||||
}
|
||||
|
||||
inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
|
||||
{
|
||||
int32x4_t c = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val));
|
||||
|
@ -737,6 +737,9 @@ inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b,
|
||||
d.val = _mm_unpackhi_epi64(c0, c1);
|
||||
}
|
||||
|
||||
inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b) { return v_int16x8(_mm_mulhi_epi16(a.val, b.val)); }
|
||||
inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) { return v_uint16x8(_mm_mulhi_epu16(a.val, b.val)); }
|
||||
|
||||
inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
|
||||
{
|
||||
return v_int32x4(_mm_madd_epi16(a.val, b.val));
|
||||
|
@ -457,6 +457,21 @@ inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b, v_uint64x2& c
|
||||
d.val = vec_mul(vec_unpacklu(a.val), vec_unpacklu(b.val));
|
||||
}
|
||||
|
||||
inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b)
|
||||
{
|
||||
return v_int16x8(vec_packs(
|
||||
vec_sra(vec_mul(vec_unpackh(a.val), vec_unpackh(b.val)), vec_uint4_sp(16)),
|
||||
vec_sra(vec_mul(vec_unpackl(a.val), vec_unpackl(b.val)), vec_uint4_sp(16))
|
||||
));
|
||||
}
|
||||
inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b)
|
||||
{
|
||||
return v_uint16x8(vec_packs(
|
||||
vec_sr(vec_mul(vec_unpackhu(a.val), vec_unpackhu(b.val)), vec_uint4_sp(16)),
|
||||
vec_sr(vec_mul(vec_unpacklu(a.val), vec_unpacklu(b.val)), vec_uint4_sp(16))
|
||||
));
|
||||
}
|
||||
|
||||
/** Non-saturating arithmetics **/
|
||||
#define OPENCV_HAL_IMPL_VSX_BIN_FUNC(func, intrin) \
|
||||
template<typename _Tpvec> \
|
||||
|
@ -45,7 +45,7 @@
|
||||
#ifndef OPENCV_CORE_IPPASYNC_HPP
|
||||
#define OPENCV_CORE_IPPASYNC_HPP
|
||||
|
||||
#ifdef HAVE_IPP_A
|
||||
#ifdef HAVE_IPP_A // this file will be removed in OpenCV 4.0
|
||||
|
||||
#include "opencv2/core.hpp"
|
||||
#include <ipp_async_op.h>
|
||||
|
@ -146,6 +146,12 @@ synonym is needed to generate Python/Java etc. wrappers properly. At the functio
|
||||
level their use is similar, but _InputArray::getMat(idx) should be used to get header for the
|
||||
idx-th component of the outer vector and _InputArray::size().area() should be used to find the
|
||||
number of components (vectors/matrices) of the outer vector.
|
||||
|
||||
In general, type support is limited to cv::Mat types. Other types are forbidden.
|
||||
But in some cases we need to support passing of custom non-general Mat types, like arrays of cv::KeyPoint, cv::DMatch, etc.
|
||||
This data is not intented to be interpreted as an image data, or processed somehow like regular cv::Mat.
|
||||
To pass such custom type use rawIn() / rawOut() / rawInOut() wrappers.
|
||||
Custom type is wrapped as Mat-compatible `CV_8UC<N>` values (N = sizeof(T), N <= CV_CN_MAX).
|
||||
*/
|
||||
class CV_EXPORTS _InputArray
|
||||
{
|
||||
@ -199,6 +205,9 @@ public:
|
||||
template<typename _Tp, std::size_t _Nm> _InputArray(const std::array<_Tp, _Nm>& arr);
|
||||
template<std::size_t _Nm> _InputArray(const std::array<Mat, _Nm>& arr);
|
||||
|
||||
template<typename _Tp> static _InputArray rawIn(const std::vector<_Tp>& vec);
|
||||
template<typename _Tp, std::size_t _Nm> static _InputArray rawIn(const std::array<_Tp, _Nm>& arr);
|
||||
|
||||
Mat getMat(int idx=-1) const;
|
||||
Mat getMat_(int idx=-1) const;
|
||||
UMat getUMat(int idx=-1) const;
|
||||
@ -328,12 +337,13 @@ public:
|
||||
_OutputArray(const UMat& m);
|
||||
_OutputArray(const std::vector<UMat>& vec);
|
||||
|
||||
#ifdef CV_CXX_STD_ARRAY
|
||||
template<typename _Tp, std::size_t _Nm> _OutputArray(std::array<_Tp, _Nm>& arr);
|
||||
template<typename _Tp, std::size_t _Nm> _OutputArray(const std::array<_Tp, _Nm>& arr);
|
||||
template<std::size_t _Nm> _OutputArray(std::array<Mat, _Nm>& arr);
|
||||
template<std::size_t _Nm> _OutputArray(const std::array<Mat, _Nm>& arr);
|
||||
#endif
|
||||
|
||||
template<typename _Tp> static _OutputArray rawOut(std::vector<_Tp>& vec);
|
||||
template<typename _Tp, std::size_t _Nm> static _OutputArray rawOut(std::array<_Tp, _Nm>& arr);
|
||||
|
||||
bool fixedSize() const;
|
||||
bool fixedType() const;
|
||||
@ -397,15 +407,23 @@ public:
|
||||
_InputOutputArray(const UMat& m);
|
||||
_InputOutputArray(const std::vector<UMat>& vec);
|
||||
|
||||
#ifdef CV_CXX_STD_ARRAY
|
||||
template<typename _Tp, std::size_t _Nm> _InputOutputArray(std::array<_Tp, _Nm>& arr);
|
||||
template<typename _Tp, std::size_t _Nm> _InputOutputArray(const std::array<_Tp, _Nm>& arr);
|
||||
template<std::size_t _Nm> _InputOutputArray(std::array<Mat, _Nm>& arr);
|
||||
template<std::size_t _Nm> _InputOutputArray(const std::array<Mat, _Nm>& arr);
|
||||
#endif
|
||||
|
||||
template<typename _Tp> static _InputOutputArray rawInOut(std::vector<_Tp>& vec);
|
||||
template<typename _Tp, std::size_t _Nm> _InputOutputArray rawInOut(std::array<_Tp, _Nm>& arr);
|
||||
|
||||
};
|
||||
|
||||
/** Helper to wrap custom types. @see InputArray */
|
||||
template<typename _Tp> static inline _InputArray rawIn(_Tp& v);
|
||||
/** Helper to wrap custom types. @see InputArray */
|
||||
template<typename _Tp> static inline _OutputArray rawOut(_Tp& v);
|
||||
/** Helper to wrap custom types. @see InputArray */
|
||||
template<typename _Tp> static inline _InputOutputArray rawInOut(_Tp& v);
|
||||
|
||||
CV__DEBUG_NS_END
|
||||
|
||||
typedef const _InputArray& InputArray;
|
||||
@ -991,11 +1009,9 @@ public:
|
||||
*/
|
||||
template<typename _Tp> explicit Mat(const std::initializer_list<int> sizes, const std::initializer_list<_Tp> list);
|
||||
|
||||
#ifdef CV_CXX_STD_ARRAY
|
||||
/** @overload
|
||||
*/
|
||||
template<typename _Tp, size_t _Nm> explicit Mat(const std::array<_Tp, _Nm>& arr, bool copyData=false);
|
||||
#endif
|
||||
|
||||
/** @overload
|
||||
*/
|
||||
@ -1630,9 +1646,7 @@ public:
|
||||
template<typename _Tp, int n> operator Vec<_Tp, n>() const;
|
||||
template<typename _Tp, int m, int n> operator Matx<_Tp, m, n>() const;
|
||||
|
||||
#ifdef CV_CXX_STD_ARRAY
|
||||
template<typename _Tp, std::size_t _Nm> operator std::array<_Tp, _Nm>() const;
|
||||
#endif
|
||||
|
||||
/** @brief Reports whether the matrix is continuous or not.
|
||||
|
||||
@ -2214,9 +2228,7 @@ public:
|
||||
Mat_(std::initializer_list<_Tp> values);
|
||||
explicit Mat_(const std::initializer_list<int> sizes, const std::initializer_list<_Tp> values);
|
||||
|
||||
#ifdef CV_CXX_STD_ARRAY
|
||||
template <std::size_t _Nm> explicit Mat_(const std::array<_Tp, _Nm>& arr, bool copyData=false);
|
||||
#endif
|
||||
|
||||
Mat_& operator = (const Mat& m);
|
||||
Mat_& operator = (const Mat_& m);
|
||||
@ -2314,10 +2326,8 @@ public:
|
||||
//! conversion to vector.
|
||||
operator std::vector<_Tp>() const;
|
||||
|
||||
#ifdef CV_CXX_STD_ARRAY
|
||||
//! conversion to array.
|
||||
template<std::size_t _Nm> operator std::array<_Tp, _Nm>() const;
|
||||
#endif
|
||||
|
||||
//! conversion to Vec
|
||||
template<int n> operator Vec<typename DataType<_Tp>::channel_type, n>() const;
|
||||
|
@ -61,6 +61,16 @@ CV__DEBUG_NS_BEGIN
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
////////////////////////// Custom (raw) type wrapper //////////////////////////
|
||||
|
||||
template<typename _Tp> static inline
|
||||
int rawType()
|
||||
{
|
||||
CV_StaticAssert(sizeof(_Tp) <= CV_CN_MAX, "sizeof(_Tp) is too large");
|
||||
const int elemSize = sizeof(_Tp);
|
||||
return (int)CV_MAKETYPE(CV_8U, elemSize);
|
||||
}
|
||||
|
||||
//////////////////////// Input/Output Arrays ////////////////////////
|
||||
|
||||
inline void _InputArray::init(int _flags, const void* _obj)
|
||||
@ -134,6 +144,25 @@ inline _InputArray::_InputArray(const ogl::Buffer& buf)
|
||||
inline _InputArray::_InputArray(const cuda::HostMem& cuda_mem)
|
||||
{ init(CUDA_HOST_MEM + ACCESS_READ, &cuda_mem); }
|
||||
|
||||
template<typename _Tp> inline
|
||||
_InputArray _InputArray::rawIn(const std::vector<_Tp>& vec)
|
||||
{
|
||||
_InputArray v;
|
||||
v.flags = _InputArray::FIXED_TYPE + _InputArray::STD_VECTOR + rawType<_Tp>() + ACCESS_READ;
|
||||
v.obj = (void*)&vec;
|
||||
return v;
|
||||
}
|
||||
|
||||
template<typename _Tp, std::size_t _Nm> inline
|
||||
_InputArray _InputArray::rawIn(const std::array<_Tp, _Nm>& arr)
|
||||
{
|
||||
_InputArray v;
|
||||
v.flags = FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_READ;
|
||||
v.obj = (void*)arr.data();
|
||||
v.sz = Size(1, _Nm);
|
||||
return v;
|
||||
}
|
||||
|
||||
inline _InputArray::~_InputArray() {}
|
||||
|
||||
inline Mat _InputArray::getMat(int i) const
|
||||
@ -261,6 +290,25 @@ inline _OutputArray::_OutputArray(const ogl::Buffer& buf)
|
||||
inline _OutputArray::_OutputArray(const cuda::HostMem& cuda_mem)
|
||||
{ init(FIXED_TYPE + FIXED_SIZE + CUDA_HOST_MEM + ACCESS_WRITE, &cuda_mem); }
|
||||
|
||||
template<typename _Tp> inline
|
||||
_OutputArray _OutputArray::rawOut(std::vector<_Tp>& vec)
|
||||
{
|
||||
_OutputArray v;
|
||||
v.flags = _InputArray::FIXED_TYPE + _InputArray::STD_VECTOR + rawType<_Tp>() + ACCESS_WRITE;
|
||||
v.obj = (void*)&vec;
|
||||
return v;
|
||||
}
|
||||
|
||||
template<typename _Tp, std::size_t _Nm> inline
|
||||
_OutputArray _OutputArray::rawOut(std::array<_Tp, _Nm>& arr)
|
||||
{
|
||||
_OutputArray v;
|
||||
v.flags = FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_WRITE;
|
||||
v.obj = (void*)arr.data();
|
||||
v.sz = Size(1, _Nm);
|
||||
return v;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
inline _InputOutputArray::_InputOutputArray() { init(ACCESS_RW, 0); }
|
||||
@ -370,6 +418,30 @@ inline _InputOutputArray::_InputOutputArray(const ogl::Buffer& buf)
|
||||
inline _InputOutputArray::_InputOutputArray(const cuda::HostMem& cuda_mem)
|
||||
{ init(FIXED_TYPE + FIXED_SIZE + CUDA_HOST_MEM + ACCESS_RW, &cuda_mem); }
|
||||
|
||||
template<typename _Tp> inline
|
||||
_InputOutputArray _InputOutputArray::rawInOut(std::vector<_Tp>& vec)
|
||||
{
|
||||
_InputOutputArray v;
|
||||
v.flags = _InputArray::FIXED_TYPE + _InputArray::STD_VECTOR + rawType<_Tp>() + ACCESS_RW;
|
||||
v.obj = (void*)&vec;
|
||||
return v;
|
||||
}
|
||||
|
||||
template<typename _Tp, std::size_t _Nm> inline
|
||||
_InputOutputArray _InputOutputArray::rawInOut(std::array<_Tp, _Nm>& arr)
|
||||
{
|
||||
_InputOutputArray v;
|
||||
v.flags = FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_RW;
|
||||
v.obj = (void*)arr.data();
|
||||
v.sz = Size(1, _Nm);
|
||||
return v;
|
||||
}
|
||||
|
||||
|
||||
template<typename _Tp> static inline _InputArray rawIn(_Tp& v) { return _InputArray::rawIn(v); }
|
||||
template<typename _Tp> static inline _OutputArray rawOut(_Tp& v) { return _OutputArray::rawOut(v); }
|
||||
template<typename _Tp> static inline _InputOutputArray rawInOut(_Tp& v) { return _InputOutputArray::rawInOut(v); }
|
||||
|
||||
CV__DEBUG_NS_END
|
||||
|
||||
//////////////////////////////////////////// Mat //////////////////////////////////////////
|
||||
|
@ -270,7 +270,7 @@ static void binary_op( InputArray _src1, InputArray _src2, OutputArray _dst,
|
||||
if( !haveScalar )
|
||||
{
|
||||
const Mat* arrays[] = { &src1, &src2, &dst, &mask, 0 };
|
||||
uchar* ptrs[4]{};
|
||||
uchar* ptrs[4] = {};
|
||||
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
size_t total = it.size, blocksize = total;
|
||||
@ -306,7 +306,7 @@ static void binary_op( InputArray _src1, InputArray _src2, OutputArray _dst,
|
||||
else
|
||||
{
|
||||
const Mat* arrays[] = { &src1, &dst, &mask, 0 };
|
||||
uchar* ptrs[3]{};
|
||||
uchar* ptrs[3] = {};
|
||||
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
size_t total = it.size, blocksize = std::min(total, blocksize0);
|
||||
@ -745,7 +745,7 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
|
||||
if( !haveScalar )
|
||||
{
|
||||
const Mat* arrays[] = { &src1, &src2, &dst, &mask, 0 };
|
||||
uchar* ptrs[4]{};
|
||||
uchar* ptrs[4] = {};
|
||||
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
size_t total = it.size, blocksize = total;
|
||||
@ -812,7 +812,7 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
|
||||
else
|
||||
{
|
||||
const Mat* arrays[] = { &src1, &dst, &mask, 0 };
|
||||
uchar* ptrs[3]{};
|
||||
uchar* ptrs[3] = {};
|
||||
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
size_t total = it.size, blocksize = std::min(total, blocksize0);
|
||||
@ -1240,7 +1240,7 @@ void cv::compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op)
|
||||
if( !haveScalar )
|
||||
{
|
||||
const Mat* arrays[] = { &src1, &src2, &dst, 0 };
|
||||
uchar* ptrs[3]{};
|
||||
uchar* ptrs[3] = {};
|
||||
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
size_t total = it.size;
|
||||
@ -1251,7 +1251,7 @@ void cv::compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op)
|
||||
else
|
||||
{
|
||||
const Mat* arrays[] = { &src1, &dst, 0 };
|
||||
uchar* ptrs[2]{};
|
||||
uchar* ptrs[2] = {};
|
||||
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
size_t total = it.size, blocksize = std::min(total, blocksize0);
|
||||
@ -1748,7 +1748,7 @@ void cv::inRange(InputArray _src, InputArray _lowerb,
|
||||
|
||||
const Mat* arrays_sc[] = { &src, &dst, 0 };
|
||||
const Mat* arrays_nosc[] = { &src, &dst, &lb, &ub, 0 };
|
||||
uchar* ptrs[4]{};
|
||||
uchar* ptrs[4] = {};
|
||||
|
||||
NAryMatIterator it(lbScalar && ubScalar ? arrays_sc : arrays_nosc, ptrs);
|
||||
size_t total = it.size, blocksize = std::min(total, blocksize0);
|
||||
|
@ -1347,7 +1347,7 @@ void cv::Mat::convertTo(OutputArray _dst, int _type, double alpha, double beta)
|
||||
else
|
||||
{
|
||||
const Mat* arrays[] = {&src, &dst, 0};
|
||||
uchar* ptrs[2]{};
|
||||
uchar* ptrs[2] = {};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
Size sz((int)(it.size*cn), 1);
|
||||
|
||||
@ -1496,7 +1496,7 @@ void cv::convertFp16( InputArray _src, OutputArray _dst)
|
||||
else
|
||||
{
|
||||
const Mat* arrays[] = {&src, &dst, 0};
|
||||
uchar* ptrs[2]{};
|
||||
uchar* ptrs[2] = {};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
Size sz((int)(it.size*cn), 1);
|
||||
|
||||
|
@ -1775,7 +1775,7 @@ void cv::convertScaleAbs( InputArray _src, OutputArray _dst, double alpha, doubl
|
||||
else
|
||||
{
|
||||
const Mat* arrays[] = {&src, &dst, 0};
|
||||
uchar* ptrs[2]{};
|
||||
uchar* ptrs[2] = {};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
Size sz((int)it.size*cn, 1);
|
||||
|
||||
|
@ -306,7 +306,7 @@ void Mat::copyTo( OutputArray _dst ) const
|
||||
if( total() != 0 )
|
||||
{
|
||||
const Mat* arrays[] = { this, &dst };
|
||||
uchar* ptrs[2]{};
|
||||
uchar* ptrs[2] = {};
|
||||
NAryMatIterator it(arrays, ptrs, 2);
|
||||
size_t sz = it.size*elemSize();
|
||||
|
||||
@ -399,7 +399,7 @@ void Mat::copyTo( OutputArray _dst, InputArray _mask ) const
|
||||
}
|
||||
|
||||
const Mat* arrays[] = { this, &dst, &mask, 0 };
|
||||
uchar* ptrs[3]{};
|
||||
uchar* ptrs[3] = {};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
Size sz((int)(it.size*mcn), 1);
|
||||
|
||||
|
@ -25,51 +25,34 @@ static int countNonZero_(const T* src, int len )
|
||||
static int countNonZero8u( const uchar* src, int len )
|
||||
{
|
||||
int i=0, nz = 0;
|
||||
#if CV_SSE2
|
||||
if(USE_SSE2)//5x-6x
|
||||
#if CV_SIMD
|
||||
int len0 = len & -v_uint8::nlanes;
|
||||
v_uint8 v_zero = vx_setzero_u8();
|
||||
v_uint8 v_one = vx_setall_u8(1);
|
||||
|
||||
v_uint32 v_sum32 = vx_setzero_u32();
|
||||
while (i < len0)
|
||||
{
|
||||
__m128i v_zero = _mm_setzero_si128();
|
||||
__m128i sum = _mm_setzero_si128();
|
||||
|
||||
for (; i<=len-16; i+=16)
|
||||
v_uint16 v_sum16 = vx_setzero_u16();
|
||||
int j = i;
|
||||
while (j < std::min(len0, i + 65280 * v_uint16::nlanes))
|
||||
{
|
||||
__m128i r0 = _mm_loadu_si128((const __m128i*)(src+i));
|
||||
sum = _mm_add_epi32(sum, _mm_sad_epu8(_mm_sub_epi8(v_zero, _mm_cmpeq_epi8(r0, v_zero)), v_zero));
|
||||
v_uint8 v_sum8 = vx_setzero_u8();
|
||||
int k = j;
|
||||
for (; k < std::min(len0, j + 255 * v_uint8::nlanes); k += v_uint8::nlanes)
|
||||
v_sum8 += v_one & (vx_load(src + k) == v_zero);
|
||||
v_uint16 part1, part2;
|
||||
v_expand(v_sum8, part1, part2);
|
||||
v_sum16 += part1 + part2;
|
||||
j = k;
|
||||
}
|
||||
nz = i - _mm_cvtsi128_si32(_mm_add_epi32(sum, _mm_unpackhi_epi64(sum, sum)));
|
||||
v_uint32 part1, part2;
|
||||
v_expand(v_sum16, part1, part2);
|
||||
v_sum32 += part1 + part2;
|
||||
i = j;
|
||||
}
|
||||
#elif CV_NEON
|
||||
int len0 = len & -16, blockSize1 = (1 << 8) - 16, blockSize0 = blockSize1 << 6;
|
||||
uint32x4_t v_nz = vdupq_n_u32(0u);
|
||||
uint8x16_t v_zero = vdupq_n_u8(0), v_1 = vdupq_n_u8(1);
|
||||
const uchar * src0 = src;
|
||||
|
||||
while( i < len0 )
|
||||
{
|
||||
int blockSizei = std::min(len0 - i, blockSize0), j = 0;
|
||||
|
||||
while (j < blockSizei)
|
||||
{
|
||||
int blockSizej = std::min(blockSizei - j, blockSize1), k = 0;
|
||||
uint8x16_t v_pz = v_zero;
|
||||
|
||||
for( ; k <= blockSizej - 16; k += 16 )
|
||||
v_pz = vaddq_u8(v_pz, vandq_u8(vceqq_u8(vld1q_u8(src0 + k), v_zero), v_1));
|
||||
|
||||
uint16x8_t v_p1 = vmovl_u8(vget_low_u8(v_pz)), v_p2 = vmovl_u8(vget_high_u8(v_pz));
|
||||
v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_p1), vget_high_u16(v_p1)), v_nz);
|
||||
v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_p2), vget_high_u16(v_p2)), v_nz);
|
||||
|
||||
src0 += blockSizej;
|
||||
j += blockSizej;
|
||||
}
|
||||
|
||||
i += blockSizei;
|
||||
}
|
||||
|
||||
CV_DECL_ALIGNED(16) unsigned int buf[4];
|
||||
vst1q_u32(buf, v_nz);
|
||||
nz += i - saturate_cast<int>(buf[0] + buf[1] + buf[2] + buf[3]);
|
||||
nz = i - v_reduce_sum(v_sum32);
|
||||
v_cleanup();
|
||||
#endif
|
||||
for( ; i < len; i++ )
|
||||
nz += src[i] != 0;
|
||||
@ -79,159 +62,112 @@ static int countNonZero8u( const uchar* src, int len )
|
||||
static int countNonZero16u( const ushort* src, int len )
|
||||
{
|
||||
int i = 0, nz = 0;
|
||||
#if CV_SSE2
|
||||
if (USE_SSE2)
|
||||
#if CV_SIMD
|
||||
int len0 = len & -v_int8::nlanes;
|
||||
v_uint16 v_zero = vx_setzero_u16();
|
||||
v_int8 v_one = vx_setall_s8(1);
|
||||
|
||||
v_int32 v_sum32 = vx_setzero_s32();
|
||||
while (i < len0)
|
||||
{
|
||||
__m128i v_zero = _mm_setzero_si128 ();
|
||||
__m128i sum = _mm_setzero_si128();
|
||||
|
||||
for ( ; i <= len - 8; i += 8)
|
||||
v_int16 v_sum16 = vx_setzero_s16();
|
||||
int j = i;
|
||||
while (j < std::min(len0, i + 32766 * v_int16::nlanes))
|
||||
{
|
||||
__m128i r0 = _mm_loadu_si128((const __m128i*)(src + i));
|
||||
sum = _mm_add_epi32(sum, _mm_sad_epu8(_mm_sub_epi8(v_zero, _mm_cmpeq_epi16(r0, v_zero)), v_zero));
|
||||
v_int8 v_sum8 = vx_setzero_s8();
|
||||
int k = j;
|
||||
for (; k < std::min(len0, j + 127 * v_int8::nlanes); k += v_int8::nlanes)
|
||||
v_sum8 += v_one & v_pack(v_reinterpret_as_s16(vx_load(src + k) == v_zero), v_reinterpret_as_s16(vx_load(src + k + v_uint16::nlanes) == v_zero));
|
||||
v_int16 part1, part2;
|
||||
v_expand(v_sum8, part1, part2);
|
||||
v_sum16 += part1 + part2;
|
||||
j = k;
|
||||
}
|
||||
|
||||
nz = i - (_mm_cvtsi128_si32(_mm_add_epi32(sum, _mm_unpackhi_epi64(sum, sum))) >> 1);
|
||||
src += i;
|
||||
v_int32 part1, part2;
|
||||
v_expand(v_sum16, part1, part2);
|
||||
v_sum32 += part1 + part2;
|
||||
i = j;
|
||||
}
|
||||
#elif CV_NEON
|
||||
int len0 = len & -8, blockSize1 = (1 << 15), blockSize0 = blockSize1 << 6;
|
||||
uint32x4_t v_nz = vdupq_n_u32(0u);
|
||||
uint16x8_t v_zero = vdupq_n_u16(0), v_1 = vdupq_n_u16(1);
|
||||
|
||||
while( i < len0 )
|
||||
{
|
||||
int blockSizei = std::min(len0 - i, blockSize0), j = 0;
|
||||
|
||||
while (j < blockSizei)
|
||||
{
|
||||
int blockSizej = std::min(blockSizei - j, blockSize1), k = 0;
|
||||
uint16x8_t v_pz = v_zero;
|
||||
|
||||
for( ; k <= blockSizej - 8; k += 8 )
|
||||
v_pz = vaddq_u16(v_pz, vandq_u16(vceqq_u16(vld1q_u16(src + k), v_zero), v_1));
|
||||
|
||||
v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_pz), vget_high_u16(v_pz)), v_nz);
|
||||
|
||||
src += blockSizej;
|
||||
j += blockSizej;
|
||||
}
|
||||
|
||||
i += blockSizei;
|
||||
}
|
||||
|
||||
CV_DECL_ALIGNED(16) unsigned int buf[4];
|
||||
vst1q_u32(buf, v_nz);
|
||||
nz += i - saturate_cast<int>(buf[0] + buf[1] + buf[2] + buf[3]);
|
||||
nz = i - v_reduce_sum(v_sum32);
|
||||
v_cleanup();
|
||||
#endif
|
||||
return nz + countNonZero_(src, len - i);
|
||||
return nz + countNonZero_(src + i, len - i);
|
||||
}
|
||||
|
||||
static int countNonZero32s( const int* src, int len )
|
||||
{
|
||||
int i = 0, nz = 0;
|
||||
#if CV_SSE2
|
||||
if (USE_SSE2)
|
||||
#if CV_SIMD
|
||||
int len0 = len & -v_int8::nlanes;
|
||||
v_int32 v_zero = vx_setzero_s32();
|
||||
v_int8 v_one = vx_setall_s8(1);
|
||||
|
||||
v_int32 v_sum32 = vx_setzero_s32();
|
||||
while (i < len0)
|
||||
{
|
||||
__m128i v_zero = _mm_setzero_si128 ();
|
||||
__m128i sum = _mm_setzero_si128();
|
||||
|
||||
for ( ; i <= len - 4; i += 4)
|
||||
v_int16 v_sum16 = vx_setzero_s16();
|
||||
int j = i;
|
||||
while (j < std::min(len0, i + 32766 * v_int16::nlanes))
|
||||
{
|
||||
__m128i r0 = _mm_loadu_si128((const __m128i*)(src + i));
|
||||
sum = _mm_add_epi32(sum, _mm_sad_epu8(_mm_sub_epi8(v_zero, _mm_cmpeq_epi32(r0, v_zero)), v_zero));
|
||||
v_int8 v_sum8 = vx_setzero_s8();
|
||||
int k = j;
|
||||
for (; k < std::min(len0, j + 127 * v_int8::nlanes); k += v_int8::nlanes)
|
||||
v_sum8 += v_one & v_pack(
|
||||
v_pack(vx_load(src + k ) == v_zero, vx_load(src + k + v_int32::nlanes) == v_zero),
|
||||
v_pack(vx_load(src + k + 2*v_int32::nlanes) == v_zero, vx_load(src + k + 3*v_int32::nlanes) == v_zero)
|
||||
);
|
||||
v_int16 part1, part2;
|
||||
v_expand(v_sum8, part1, part2);
|
||||
v_sum16 += part1 + part2;
|
||||
j = k;
|
||||
}
|
||||
|
||||
nz = i - (_mm_cvtsi128_si32(_mm_add_epi32(sum, _mm_unpackhi_epi64(sum, sum))) >> 2);
|
||||
src += i;
|
||||
v_int32 part1, part2;
|
||||
v_expand(v_sum16, part1, part2);
|
||||
v_sum32 += part1 + part2;
|
||||
i = j;
|
||||
}
|
||||
#elif CV_NEON
|
||||
int len0 = len & -8, blockSize1 = (1 << 15), blockSize0 = blockSize1 << 6;
|
||||
uint32x4_t v_nz = vdupq_n_u32(0u);
|
||||
int32x4_t v_zero = vdupq_n_s32(0.0f);
|
||||
uint16x8_t v_1 = vdupq_n_u16(1u), v_zerou = vdupq_n_u16(0u);
|
||||
|
||||
while( i < len0 )
|
||||
{
|
||||
int blockSizei = std::min(len0 - i, blockSize0), j = 0;
|
||||
|
||||
while (j < blockSizei)
|
||||
{
|
||||
int blockSizej = std::min(blockSizei - j, blockSize1), k = 0;
|
||||
uint16x8_t v_pz = v_zerou;
|
||||
|
||||
for( ; k <= blockSizej - 8; k += 8 )
|
||||
v_pz = vaddq_u16(v_pz, vandq_u16(vcombine_u16(vmovn_u32(vceqq_s32(vld1q_s32(src + k), v_zero)),
|
||||
vmovn_u32(vceqq_s32(vld1q_s32(src + k + 4), v_zero))), v_1));
|
||||
|
||||
v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_pz), vget_high_u16(v_pz)), v_nz);
|
||||
|
||||
src += blockSizej;
|
||||
j += blockSizej;
|
||||
}
|
||||
|
||||
i += blockSizei;
|
||||
}
|
||||
|
||||
CV_DECL_ALIGNED(16) unsigned int buf[4];
|
||||
vst1q_u32(buf, v_nz);
|
||||
nz += i - saturate_cast<int>(buf[0] + buf[1] + buf[2] + buf[3]);
|
||||
nz = i - v_reduce_sum(v_sum32);
|
||||
v_cleanup();
|
||||
#endif
|
||||
return nz + countNonZero_(src, len - i);
|
||||
return nz + countNonZero_(src + i, len - i);
|
||||
}
|
||||
|
||||
static int countNonZero32f( const float* src, int len )
|
||||
{
|
||||
int i = 0, nz = 0;
|
||||
#if CV_SSE2
|
||||
if (USE_SSE2)
|
||||
#if CV_SIMD
|
||||
int len0 = len & -v_int8::nlanes;
|
||||
v_float32 v_zero = vx_setzero_f32();
|
||||
v_int8 v_one = vx_setall_s8(1);
|
||||
|
||||
v_int32 v_sum32 = vx_setzero_s32();
|
||||
while (i < len0)
|
||||
{
|
||||
__m128 v_zero_f = _mm_setzero_ps();
|
||||
__m128i v_zero = _mm_setzero_si128 ();
|
||||
__m128i sum = _mm_setzero_si128();
|
||||
|
||||
for ( ; i <= len - 4; i += 4)
|
||||
v_int16 v_sum16 = vx_setzero_s16();
|
||||
int j = i;
|
||||
while (j < std::min(len0, i + 32766 * v_int16::nlanes))
|
||||
{
|
||||
__m128 r0 = _mm_loadu_ps(src + i);
|
||||
sum = _mm_add_epi32(sum, _mm_sad_epu8(_mm_sub_epi8(v_zero, _mm_castps_si128(_mm_cmpeq_ps(r0, v_zero_f))), v_zero));
|
||||
v_int8 v_sum8 = vx_setzero_s8();
|
||||
int k = j;
|
||||
for (; k < std::min(len0, j + 127 * v_int8::nlanes); k += v_int8::nlanes)
|
||||
v_sum8 += v_one & v_pack(
|
||||
v_pack(v_reinterpret_as_s32(vx_load(src + k ) == v_zero), v_reinterpret_as_s32(vx_load(src + k + v_float32::nlanes) == v_zero)),
|
||||
v_pack(v_reinterpret_as_s32(vx_load(src + k + 2*v_float32::nlanes) == v_zero), v_reinterpret_as_s32(vx_load(src + k + 3*v_float32::nlanes) == v_zero))
|
||||
);
|
||||
v_int16 part1, part2;
|
||||
v_expand(v_sum8, part1, part2);
|
||||
v_sum16 += part1 + part2;
|
||||
j = k;
|
||||
}
|
||||
|
||||
nz = i - (_mm_cvtsi128_si32(_mm_add_epi32(sum, _mm_unpackhi_epi64(sum, sum))) >> 2);
|
||||
src += i;
|
||||
v_int32 part1, part2;
|
||||
v_expand(v_sum16, part1, part2);
|
||||
v_sum32 += part1 + part2;
|
||||
i = j;
|
||||
}
|
||||
#elif CV_NEON
|
||||
int len0 = len & -8, blockSize1 = (1 << 15), blockSize0 = blockSize1 << 6;
|
||||
uint32x4_t v_nz = vdupq_n_u32(0u);
|
||||
float32x4_t v_zero = vdupq_n_f32(0.0f);
|
||||
uint16x8_t v_1 = vdupq_n_u16(1u), v_zerou = vdupq_n_u16(0u);
|
||||
|
||||
while( i < len0 )
|
||||
{
|
||||
int blockSizei = std::min(len0 - i, blockSize0), j = 0;
|
||||
|
||||
while (j < blockSizei)
|
||||
{
|
||||
int blockSizej = std::min(blockSizei - j, blockSize1), k = 0;
|
||||
uint16x8_t v_pz = v_zerou;
|
||||
|
||||
for( ; k <= blockSizej - 8; k += 8 )
|
||||
v_pz = vaddq_u16(v_pz, vandq_u16(vcombine_u16(vmovn_u32(vceqq_f32(vld1q_f32(src + k), v_zero)),
|
||||
vmovn_u32(vceqq_f32(vld1q_f32(src + k + 4), v_zero))), v_1));
|
||||
|
||||
v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_pz), vget_high_u16(v_pz)), v_nz);
|
||||
|
||||
src += blockSizej;
|
||||
j += blockSizej;
|
||||
}
|
||||
|
||||
i += blockSizei;
|
||||
}
|
||||
|
||||
CV_DECL_ALIGNED(16) unsigned int buf[4];
|
||||
vst1q_u32(buf, v_nz);
|
||||
nz += i - saturate_cast<int>(buf[0] + buf[1] + buf[2] + buf[3]);
|
||||
nz = i - v_reduce_sum(v_sum32);
|
||||
v_cleanup();
|
||||
#endif
|
||||
return nz + countNonZero_(src, len - i);
|
||||
return nz + countNonZero_(src + i, len - i);
|
||||
}
|
||||
|
||||
static int countNonZero64f( const double* src, int len )
|
||||
@ -378,7 +314,7 @@ int cv::countNonZero( InputArray _src )
|
||||
CV_Assert( func != 0 );
|
||||
|
||||
const Mat* arrays[] = {&src, 0};
|
||||
uchar* ptrs[1]{};
|
||||
uchar* ptrs[1] = {};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
int total = (int)it.size, nz = 0;
|
||||
|
||||
|
@ -98,6 +98,10 @@ int solveLP(const Mat& Func, const Mat& Constr, Mat& z){
|
||||
CV_Assert(Constr.type()==CV_64FC1 || Constr.type()==CV_32FC1);
|
||||
CV_Assert((Func.rows==1 && (Constr.cols-Func.cols==1))||
|
||||
(Func.cols==1 && (Constr.cols-Func.rows==1)));
|
||||
if (!z.empty())
|
||||
CV_CheckTypeEQ(z.type(), CV_64FC1, "");
|
||||
else
|
||||
CV_CheckType(z.type(), z.type() == CV_64FC1 || z.type() == CV_8UC1/*empty cv::Mat*/, "");
|
||||
|
||||
//copy arguments for we will shall modify them
|
||||
Mat_<double> bigC=Mat_<double>(1,(Func.rows==1?Func.cols:Func.rows)+1),
|
||||
|
@ -342,7 +342,7 @@ public:
|
||||
int lutcn = lut_.channels();
|
||||
|
||||
const Mat* arrays[] = {&src, &dst, 0};
|
||||
uchar* ptrs[2]{};
|
||||
uchar* ptrs[2] = {};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
int len = (int)it.size;
|
||||
|
||||
@ -408,7 +408,7 @@ void cv::LUT( InputArray _src, InputArray _lut, OutputArray _dst )
|
||||
CV_Assert( func != 0 );
|
||||
|
||||
const Mat* arrays[] = {&src, &dst, 0};
|
||||
uchar* ptrs[2]{};
|
||||
uchar* ptrs[2] = {};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
int len = (int)it.size;
|
||||
|
||||
|
@ -158,7 +158,7 @@ void magnitude( InputArray src1, InputArray src2, OutputArray dst )
|
||||
Mat Mag = dst.getMat();
|
||||
|
||||
const Mat* arrays[] = {&X, &Y, &Mag, 0};
|
||||
uchar* ptrs[3]{};
|
||||
uchar* ptrs[3] = {};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
int len = (int)it.size*cn;
|
||||
|
||||
@ -194,7 +194,7 @@ void phase( InputArray src1, InputArray src2, OutputArray dst, bool angleInDegre
|
||||
Mat Angle = dst.getMat();
|
||||
|
||||
const Mat* arrays[] = {&X, &Y, &Angle, 0};
|
||||
uchar* ptrs[3]{};
|
||||
uchar* ptrs[3] = {};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
int j, total = (int)(it.size*cn), blockSize = total;
|
||||
size_t esz1 = X.elemSize1();
|
||||
@ -280,7 +280,7 @@ void cartToPolar( InputArray src1, InputArray src2,
|
||||
Mat Mag = dst1.getMat(), Angle = dst2.getMat();
|
||||
|
||||
const Mat* arrays[] = {&X, &Y, &Mag, &Angle, 0};
|
||||
uchar* ptrs[4]{};
|
||||
uchar* ptrs[4] = {};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
int j, total = (int)(it.size*cn), blockSize = std::min(total, ((BLOCK_SIZE+cn-1)/cn)*cn);
|
||||
size_t esz1 = X.elemSize1();
|
||||
@ -577,7 +577,7 @@ void polarToCart( InputArray src1, InputArray src2,
|
||||
CV_IPP_RUN(!angleInDegrees, ipp_polarToCart(Mag, Angle, X, Y));
|
||||
|
||||
const Mat* arrays[] = {&Mag, &Angle, &X, &Y, 0};
|
||||
uchar* ptrs[4]{};
|
||||
uchar* ptrs[4] = {};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
cv::AutoBuffer<float> _buf;
|
||||
float* buf[2] = {0, 0};
|
||||
@ -676,7 +676,7 @@ void exp( InputArray _src, OutputArray _dst )
|
||||
Mat dst = _dst.getMat();
|
||||
|
||||
const Mat* arrays[] = {&src, &dst, 0};
|
||||
uchar* ptrs[2]{};
|
||||
uchar* ptrs[2] = {};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
int len = (int)(it.size*cn);
|
||||
|
||||
@ -709,7 +709,7 @@ void log( InputArray _src, OutputArray _dst )
|
||||
Mat dst = _dst.getMat();
|
||||
|
||||
const Mat* arrays[] = {&src, &dst, 0};
|
||||
uchar* ptrs[2]{};
|
||||
uchar* ptrs[2] = {};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
int len = (int)(it.size*cn);
|
||||
|
||||
@ -1241,7 +1241,7 @@ void pow( InputArray _src, double power, OutputArray _dst )
|
||||
Mat dst = _dst.getMat();
|
||||
|
||||
const Mat* arrays[] = {&src, &dst, 0};
|
||||
uchar* ptrs[2]{};
|
||||
uchar* ptrs[2] = {};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
int len = (int)(it.size*cn);
|
||||
|
||||
@ -1588,7 +1588,7 @@ void patchNaNs( InputOutputArray _a, double _val )
|
||||
|
||||
Mat a = _a.getMat();
|
||||
const Mat* arrays[] = {&a, 0};
|
||||
int* ptrs[1]{};
|
||||
int* ptrs[1] = {};
|
||||
NAryMatIterator it(arrays, (uchar**)ptrs);
|
||||
size_t len = it.size*a.channels();
|
||||
Cv32suf val;
|
||||
|
@ -2144,7 +2144,7 @@ void cv::transform( InputArray _src, OutputArray _dst, InputArray _mtx )
|
||||
CV_Assert( func != 0 );
|
||||
|
||||
const Mat* arrays[] = {&src, &dst, 0};
|
||||
uchar* ptrs[2]{};
|
||||
uchar* ptrs[2] = {};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
size_t i, total = it.size;
|
||||
|
||||
@ -2290,7 +2290,7 @@ void cv::perspectiveTransform( InputArray _src, OutputArray _dst, InputArray _mt
|
||||
CV_Assert( func != 0 );
|
||||
|
||||
const Mat* arrays[] = {&src, &dst, 0};
|
||||
uchar* ptrs[2]{};
|
||||
uchar* ptrs[2] = {};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
size_t i, total = it.size;
|
||||
|
||||
@ -2441,7 +2441,7 @@ void cv::scaleAdd( InputArray _src1, double alpha, InputArray _src2, OutputArray
|
||||
}
|
||||
|
||||
const Mat* arrays[] = {&src1, &src2, &dst, 0};
|
||||
uchar* ptrs[3]{};
|
||||
uchar* ptrs[3] = {};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
size_t i, len = it.size*cn;
|
||||
|
||||
@ -3301,7 +3301,7 @@ double Mat::dot(InputArray _mat) const
|
||||
}
|
||||
|
||||
const Mat* arrays[] = {this, &mat, 0};
|
||||
uchar* ptrs[2]{};
|
||||
uchar* ptrs[2] = {};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
int len = (int)(it.size*cn);
|
||||
double r = 0;
|
||||
|
@ -1413,18 +1413,39 @@ void _OutputArray::create(int d, const int* sizes, int mtype, int i,
|
||||
case 16:
|
||||
((std::vector<Vec4i>*)v)->resize(len);
|
||||
break;
|
||||
case 20:
|
||||
((std::vector<Vec<int, 5> >*)v)->resize(len);
|
||||
break;
|
||||
case 24:
|
||||
((std::vector<Vec6i>*)v)->resize(len);
|
||||
break;
|
||||
case 28:
|
||||
((std::vector<Vec<int, 7> >*)v)->resize(len);
|
||||
break;
|
||||
case 32:
|
||||
((std::vector<Vec8i>*)v)->resize(len);
|
||||
break;
|
||||
case 36:
|
||||
((std::vector<Vec<int, 9> >*)v)->resize(len);
|
||||
break;
|
||||
case 40:
|
||||
((std::vector<Vec<int, 10> >*)v)->resize(len);
|
||||
break;
|
||||
case 44:
|
||||
((std::vector<Vec<int, 11> >*)v)->resize(len);
|
||||
break;
|
||||
case 48:
|
||||
((std::vector<Vec<int, 12> >*)v)->resize(len);
|
||||
break;
|
||||
case 52:
|
||||
((std::vector<Vec<int, 13> >*)v)->resize(len);
|
||||
break;
|
||||
case 56:
|
||||
((std::vector<Vec<int, 14> >*)v)->resize(len);
|
||||
break;
|
||||
case 60:
|
||||
((std::vector<Vec<int, 15> >*)v)->resize(len);
|
||||
break;
|
||||
case 64:
|
||||
((std::vector<Vec<int, 16> >*)v)->resize(len);
|
||||
break;
|
||||
|
@ -121,7 +121,7 @@ cv::Scalar cv::mean( InputArray _src, InputArray _mask )
|
||||
CV_Assert( cn <= 4 && func != 0 );
|
||||
|
||||
const Mat* arrays[] = {&src, &mask, 0};
|
||||
uchar* ptrs[2]{};
|
||||
uchar* ptrs[2] = {};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
|
||||
int j, count = 0;
|
||||
@ -786,7 +786,7 @@ void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, Input
|
||||
CV_Assert( func != 0 );
|
||||
|
||||
const Mat* arrays[] = {&src, &mask, 0};
|
||||
uchar* ptrs[2]{};
|
||||
uchar* ptrs[2] = {};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
|
||||
int j, count = 0, nz0 = 0;
|
||||
|
@ -770,7 +770,7 @@ void cv::minMaxIdx(InputArray _src, double* minVal,
|
||||
CV_Assert( func != 0 );
|
||||
|
||||
const Mat* arrays[] = {&src, &mask, 0};
|
||||
uchar* ptrs[2]{};
|
||||
uchar* ptrs[2] = {};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
|
||||
size_t minidx = 0, maxidx = 0;
|
||||
|
@ -710,7 +710,7 @@ double cv::norm( InputArray _src, int normType, InputArray _mask )
|
||||
int cellSize = normType == NORM_HAMMING ? 1 : 2;
|
||||
|
||||
const Mat* arrays[] = {&src, 0};
|
||||
uchar* ptrs[1]{};
|
||||
uchar* ptrs[1] = {};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
int total = (int)it.size;
|
||||
int result = 0;
|
||||
@ -727,7 +727,7 @@ double cv::norm( InputArray _src, int normType, InputArray _mask )
|
||||
CV_Assert( func != 0 );
|
||||
|
||||
const Mat* arrays[] = {&src, &mask, 0};
|
||||
uchar* ptrs[2]{};
|
||||
uchar* ptrs[2] = {};
|
||||
union
|
||||
{
|
||||
double d;
|
||||
@ -1168,7 +1168,7 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m
|
||||
int cellSize = normType == NORM_HAMMING ? 1 : 2;
|
||||
|
||||
const Mat* arrays[] = {&src1, &src2, 0};
|
||||
uchar* ptrs[2]{};
|
||||
uchar* ptrs[2] = {};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
int total = (int)it.size;
|
||||
int result = 0;
|
||||
@ -1185,7 +1185,7 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m
|
||||
CV_Assert( func != 0 );
|
||||
|
||||
const Mat* arrays[] = {&src1, &src2, &mask, 0};
|
||||
uchar* ptrs[3]{};
|
||||
uchar* ptrs[3] = {};
|
||||
union
|
||||
{
|
||||
double d;
|
||||
|
@ -584,6 +584,11 @@ void RNG::fill( InputOutputArray _mat, int disttype,
|
||||
}
|
||||
ip[j][1] = cvCeil(a);
|
||||
int idiff = ip[j][0] = cvFloor(b) - ip[j][1] - 1;
|
||||
if (idiff < 0)
|
||||
{
|
||||
idiff = 0;
|
||||
ip[j][0] = 0;
|
||||
}
|
||||
double diff = b - a;
|
||||
|
||||
fast_int_mode = fast_int_mode && diff <= 4294967296. && (idiff & (idiff+1)) == 0;
|
||||
|
@ -602,7 +602,7 @@ cv::Scalar cv::sum( InputArray _src )
|
||||
CV_Assert( cn <= 4 && func != 0 );
|
||||
|
||||
const Mat* arrays[] = {&src, 0};
|
||||
uchar* ptrs[1]{};
|
||||
uchar* ptrs[1] = {};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
Scalar s;
|
||||
int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
|
||||
|
@ -1,171 +0,0 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
#include "test_precomp.hpp"
|
||||
#include "opencv2/ts/ocl_test.hpp"
|
||||
|
||||
#ifdef HAVE_IPP_A
|
||||
#include "opencv2/core/ippasync.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace std;
|
||||
using namespace opencv_test;
|
||||
|
||||
namespace opencv_test {
|
||||
namespace ocl {
|
||||
|
||||
PARAM_TEST_CASE(IPPAsync, MatDepth, Channels, hppAccelType)
|
||||
{
|
||||
int type;
|
||||
int cn;
|
||||
int depth;
|
||||
hppAccelType accelType;
|
||||
|
||||
Mat matrix, result;
|
||||
hppiMatrix * hppMat;
|
||||
hppAccel accel;
|
||||
hppiVirtualMatrix * virtMatrix;
|
||||
hppStatus sts;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
type = CV_MAKE_TYPE(GET_PARAM(0), GET_PARAM(1));
|
||||
depth = GET_PARAM(0);
|
||||
cn = GET_PARAM(1);
|
||||
accelType = GET_PARAM(2);
|
||||
}
|
||||
|
||||
void generateTestData()
|
||||
{
|
||||
Size matrix_Size = randomSize(2, 100);
|
||||
const double upValue = 100;
|
||||
|
||||
matrix = randomMat(matrix_Size, type, -upValue, upValue);
|
||||
}
|
||||
|
||||
void Near(double threshold = 0.0)
|
||||
{
|
||||
EXPECT_MAT_NEAR(matrix, result, threshold);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(IPPAsync, accuracy)
|
||||
{
|
||||
sts = hppCreateInstance(accelType, 0, &accel);
|
||||
if (sts!=HPP_STATUS_NO_ERROR) printf("hppStatus = %d\n",sts);
|
||||
CV_Assert(sts==HPP_STATUS_NO_ERROR);
|
||||
|
||||
virtMatrix = hppiCreateVirtualMatrices(accel, 2);
|
||||
|
||||
for (int j = 0; j < test_loop_times; j++)
|
||||
{
|
||||
generateTestData();
|
||||
hppMat = hpp::getHpp(matrix,accel);
|
||||
|
||||
hppScalar a = 3;
|
||||
|
||||
sts = hppiAddC(accel, hppMat, a, 0, virtMatrix[0]);
|
||||
CV_Assert(sts==HPP_STATUS_NO_ERROR);
|
||||
sts = hppiSubC(accel, virtMatrix[0], a, 0, virtMatrix[1]);
|
||||
CV_Assert(sts==HPP_STATUS_NO_ERROR);
|
||||
|
||||
sts = hppWait(accel, HPP_TIME_OUT_INFINITE);
|
||||
CV_Assert(sts==HPP_STATUS_NO_ERROR);
|
||||
|
||||
result = hpp::getMat(virtMatrix[1], accel, cn);
|
||||
|
||||
Near(5.0e-6);
|
||||
|
||||
sts = hppiFreeMatrix(hppMat);
|
||||
CV_Assert(sts==HPP_STATUS_NO_ERROR);
|
||||
}
|
||||
|
||||
sts = hppiDeleteVirtualMatrices(accel, virtMatrix);
|
||||
CV_Assert(sts==HPP_STATUS_NO_ERROR);
|
||||
sts = hppDeleteInstance(accel);
|
||||
CV_Assert(sts==HPP_STATUS_NO_ERROR);
|
||||
}
|
||||
|
||||
PARAM_TEST_CASE(IPPAsyncShared, Channels, hppAccelType)
|
||||
{
|
||||
int cn;
|
||||
int type;
|
||||
hppAccelType accelType;
|
||||
|
||||
Mat matrix, result;
|
||||
hppiMatrix* hppMat;
|
||||
hppAccel accel;
|
||||
hppiVirtualMatrix * virtMatrix;
|
||||
hppStatus sts;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
cn = GET_PARAM(0);
|
||||
accelType = GET_PARAM(1);
|
||||
type=CV_MAKE_TYPE(CV_8U, GET_PARAM(0));
|
||||
}
|
||||
|
||||
void generateTestData()
|
||||
{
|
||||
Size matrix_Size = randomSize(2, 100);
|
||||
hpp32u pitch, size;
|
||||
const int upValue = 100;
|
||||
|
||||
sts = hppQueryMatrixAllocParams(accel, (hpp32u)(matrix_Size.width*cn), (hpp32u)matrix_Size.height, HPP_DATA_TYPE_8U, &pitch, &size);
|
||||
|
||||
matrix = randomMat(matrix_Size, type, 0, upValue);
|
||||
}
|
||||
|
||||
void Near(double threshold = 0.0)
|
||||
{
|
||||
EXPECT_MAT_NEAR(matrix, result, threshold);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(IPPAsyncShared, accuracy)
|
||||
{
|
||||
sts = hppCreateInstance(accelType, 0, &accel);
|
||||
if (sts!=HPP_STATUS_NO_ERROR) printf("hppStatus = %d\n",sts);
|
||||
CV_Assert(sts==HPP_STATUS_NO_ERROR);
|
||||
|
||||
virtMatrix = hppiCreateVirtualMatrices(accel, 2);
|
||||
|
||||
for (int j = 0; j < test_loop_times; j++)
|
||||
{
|
||||
generateTestData();
|
||||
hppMat = hpp::getHpp(matrix,accel);
|
||||
|
||||
hppScalar a = 3;
|
||||
|
||||
sts = hppiAddC(accel, hppMat, a, 0, virtMatrix[0]);
|
||||
CV_Assert(sts==HPP_STATUS_NO_ERROR);
|
||||
sts = hppiSubC(accel, virtMatrix[0], a, 0, virtMatrix[1]);
|
||||
CV_Assert(sts==HPP_STATUS_NO_ERROR);
|
||||
|
||||
sts = hppWait(accel, HPP_TIME_OUT_INFINITE);
|
||||
CV_Assert(sts==HPP_STATUS_NO_ERROR);
|
||||
|
||||
result = hpp::getMat(virtMatrix[1], accel, cn);
|
||||
|
||||
Near(0);
|
||||
|
||||
sts = hppiFreeMatrix(hppMat);
|
||||
CV_Assert(sts==HPP_STATUS_NO_ERROR);
|
||||
}
|
||||
|
||||
sts = hppiDeleteVirtualMatrices(accel, virtMatrix);
|
||||
CV_Assert(sts==HPP_STATUS_NO_ERROR);
|
||||
sts = hppDeleteInstance(accel);
|
||||
CV_Assert(sts==HPP_STATUS_NO_ERROR);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(IppATest, IPPAsyncShared, Combine(Values(1, 2, 3, 4),
|
||||
Values( HPP_ACCEL_TYPE_CPU, HPP_ACCEL_TYPE_GPU)));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(IppATest, IPPAsync, Combine(Values(CV_8U, CV_16U, CV_16S, CV_32F),
|
||||
Values(1, 2, 3, 4),
|
||||
Values( HPP_ACCEL_TYPE_CPU, HPP_ACCEL_TYPE_GPU)));
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
@ -141,4 +141,14 @@ TEST(Core_LPSolver, regression_cycling){
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST(Core_LPSolver, issue_12337)
|
||||
{
|
||||
Mat A=(cv::Mat_<double>(3,1)<<3,1,2);
|
||||
Mat B=(cv::Mat_<double>(3,4)<<1,1,3,30,2,2,5,24,4,1,2,36);
|
||||
EXPECT_ANY_THROW(Mat1f z_float; cv::solveLP(A, B, z_float));
|
||||
EXPECT_NO_THROW(Mat1d z_double; cv::solveLP(A, B, z_double));
|
||||
EXPECT_ANY_THROW(Mat1i z_int; cv::solveLP(A, B, z_int));
|
||||
//need to update interface: EXPECT_ANY_THROW(Mat1b z_8u; cv::solveLP(A, B, z_8u));
|
||||
}
|
||||
|
||||
}} // namespace
|
||||
|
@ -1872,4 +1872,63 @@ TEST(Core_Split, crash_12171)
|
||||
EXPECT_EQ(2, dst2.ptr<uchar>(1)[1]);
|
||||
}
|
||||
|
||||
struct CustomType // like cv::Keypoint
|
||||
{
|
||||
Point2f pt;
|
||||
float size;
|
||||
float angle;
|
||||
float response;
|
||||
int octave;
|
||||
int class_id;
|
||||
};
|
||||
|
||||
static void test_CustomType(InputArray src_, OutputArray dst_)
|
||||
{
|
||||
Mat src = src_.getMat();
|
||||
ASSERT_EQ(sizeof(CustomType), src.elemSize());
|
||||
CV_CheckTypeEQ(src.type(), CV_MAKETYPE(CV_8U, sizeof(CustomType)), "");
|
||||
|
||||
CustomType* kpt = NULL;
|
||||
{
|
||||
Mat dst = dst_.getMat();
|
||||
for (size_t i = 0; i < dst.total(); i++)
|
||||
{
|
||||
kpt = dst.ptr<CustomType>(0) + i;
|
||||
kpt->octave = (int)i;
|
||||
}
|
||||
}
|
||||
const int N = (int)src.total();
|
||||
dst_.create(1, N * 2, rawType<CustomType>());
|
||||
Mat dst = dst_.getMat();
|
||||
for (size_t i = N; i < dst.total(); i++)
|
||||
{
|
||||
kpt = dst.ptr<CustomType>(0) + i;
|
||||
kpt->octave = -(int)i;
|
||||
}
|
||||
#if 0 // Compilation error
|
||||
CustomType& kpt = dst.at<CustomType>(0, 5);
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST(Core_InputArray, support_CustomType)
|
||||
{
|
||||
std::vector<CustomType> kp1(5);
|
||||
std::vector<CustomType> kp2(3);
|
||||
test_CustomType(rawIn(kp1), rawOut(kp2));
|
||||
ASSERT_EQ((size_t)10, kp2.size());
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
EXPECT_EQ(i, kp2[i].octave);
|
||||
}
|
||||
for (int i = 3; i < 5; i++)
|
||||
{
|
||||
EXPECT_EQ(0, kp2[i].octave);
|
||||
}
|
||||
for (int i = 5; i < 10; i++)
|
||||
{
|
||||
EXPECT_EQ(-i, kp2[i].octave);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}} // namespace
|
||||
|
@ -222,7 +222,7 @@ CUDA_TEST_P(ORB, Accuracy)
|
||||
{
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
cv::cuda::GpuMat descriptors;
|
||||
orb->detectAndComputeAsync(loadMat(image), loadMat(mask), keypoints, descriptors);
|
||||
orb->detectAndComputeAsync(loadMat(image), loadMat(mask), rawOut(keypoints), descriptors);
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
|
@ -95,7 +95,7 @@ ocv_glob_module_sources(${sources_options} SOURCES ${fw_srcs})
|
||||
ocv_create_module(${libs} ${INF_ENGINE_TARGET})
|
||||
ocv_add_samples()
|
||||
ocv_add_accuracy_tests(${INF_ENGINE_TARGET})
|
||||
ocv_add_perf_tests()
|
||||
ocv_add_perf_tests(${INF_ENGINE_TARGET})
|
||||
|
||||
ocv_option(${the_module}_PERF_CAFFE "Add performance tests of Caffe framework" OFF)
|
||||
ocv_option(${the_module}_PERF_CLCAFFE "Add performance tests of clCaffe framework" OFF)
|
||||
|
@ -878,6 +878,14 @@ CV__DNN_INLINE_NS_BEGIN
|
||||
CV_EXPORTS_W void shrinkCaffeModel(const String& src, const String& dst,
|
||||
const std::vector<String>& layersTypes = std::vector<String>());
|
||||
|
||||
/** @brief Create a text representation for a binary network stored in protocol buffer format.
|
||||
* @param[in] model A path to binary network.
|
||||
* @param[in] output A path to output text file to be created.
|
||||
*
|
||||
* @note To reduce output file size, trained weights are not included.
|
||||
*/
|
||||
CV_EXPORTS_W void writeTextGraph(const String& model, const String& output);
|
||||
|
||||
/** @brief Performs non maximum suppression given boxes and corresponding scores.
|
||||
|
||||
* @param bboxes a set of bounding boxes to apply NMS.
|
||||
|
119
modules/dnn/misc/java/test/DnnListRegressionTest.java
Normal file
119
modules/dnn/misc/java/test/DnnListRegressionTest.java
Normal file
@ -0,0 +1,119 @@
|
||||
package org.opencv.test.dnn;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import org.opencv.core.Core;
|
||||
import org.opencv.core.Mat;
|
||||
import org.opencv.core.MatOfInt;
|
||||
import org.opencv.core.MatOfFloat;
|
||||
import org.opencv.core.MatOfByte;
|
||||
import org.opencv.core.Scalar;
|
||||
import org.opencv.core.Size;
|
||||
import org.opencv.dnn.DictValue;
|
||||
import org.opencv.dnn.Dnn;
|
||||
import org.opencv.dnn.Layer;
|
||||
import org.opencv.dnn.Net;
|
||||
import org.opencv.imgcodecs.Imgcodecs;
|
||||
import org.opencv.imgproc.Imgproc;
|
||||
import org.opencv.test.OpenCVTestCase;
|
||||
|
||||
/*
|
||||
* regression test for #12324,
|
||||
* testing various java.util.List invocations,
|
||||
* which use the LIST_GET macro
|
||||
*/
|
||||
|
||||
public class DnnListRegressionTest extends OpenCVTestCase {
|
||||
|
||||
private final static String ENV_OPENCV_DNN_TEST_DATA_PATH = "OPENCV_DNN_TEST_DATA_PATH";
|
||||
|
||||
private final static String ENV_OPENCV_TEST_DATA_PATH = "OPENCV_TEST_DATA_PATH";
|
||||
|
||||
String modelFileName = "";
|
||||
String sourceImageFile = "";
|
||||
|
||||
Net net;
|
||||
|
||||
@Override
|
||||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
|
||||
String envDnnTestDataPath = System.getenv(ENV_OPENCV_DNN_TEST_DATA_PATH);
|
||||
|
||||
if(envDnnTestDataPath == null){
|
||||
isTestCaseEnabled = false;
|
||||
return;
|
||||
}
|
||||
|
||||
File dnnTestDataPath = new File(envDnnTestDataPath);
|
||||
modelFileName = new File(dnnTestDataPath, "dnn/tensorflow_inception_graph.pb").toString();
|
||||
|
||||
String envTestDataPath = System.getenv(ENV_OPENCV_TEST_DATA_PATH);
|
||||
|
||||
if(envTestDataPath == null) throw new Exception(ENV_OPENCV_TEST_DATA_PATH + " has to be defined!");
|
||||
|
||||
File testDataPath = new File(envTestDataPath);
|
||||
|
||||
File f = new File(testDataPath, "dnn/grace_hopper_227.png");
|
||||
sourceImageFile = f.toString();
|
||||
if(!f.exists()) throw new Exception("Test image is missing: " + sourceImageFile);
|
||||
|
||||
net = Dnn.readNetFromTensorflow(modelFileName);
|
||||
|
||||
Mat image = Imgcodecs.imread(sourceImageFile);
|
||||
assertNotNull("Loading image from file failed!", image);
|
||||
|
||||
Mat inputBlob = Dnn.blobFromImage(image, 1.0, new Size(224, 224), new Scalar(0), true, true);
|
||||
assertNotNull("Converting image to blob failed!", inputBlob);
|
||||
|
||||
net.setInput(inputBlob, "input");
|
||||
}
|
||||
|
||||
public void testSetInputsNames() {
|
||||
List<String> inputs = new ArrayList();
|
||||
inputs.add("input");
|
||||
try {
|
||||
net.setInputsNames(inputs);
|
||||
} catch(Exception e) {
|
||||
fail("Net setInputsNames failed: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
public void testForward() {
|
||||
List<Mat> outs = new ArrayList();
|
||||
List<String> outNames = new ArrayList();
|
||||
outNames.add("softmax2");
|
||||
try {
|
||||
net.forward(outs,outNames);
|
||||
} catch(Exception e) {
|
||||
fail("Net forward failed: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
public void testGetMemoryConsumption() {
|
||||
int layerId = 1;
|
||||
List<MatOfInt> netInputShapes = new ArrayList();
|
||||
netInputShapes.add(new MatOfInt(1, 3, 224, 224));
|
||||
long[] weights=null;
|
||||
long[] blobs=null;
|
||||
try {
|
||||
net.getMemoryConsumption(layerId, netInputShapes, weights, blobs);
|
||||
} catch(Exception e) {
|
||||
fail("Net getMemoryConsumption failed: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
public void testGetFLOPS() {
|
||||
int layerId = 1;
|
||||
List<MatOfInt> netInputShapes = new ArrayList();
|
||||
netInputShapes.add(new MatOfInt(1, 3, 224, 224));
|
||||
try {
|
||||
net.getFLOPS(layerId, netInputShapes);
|
||||
} catch(Exception e) {
|
||||
fail("Net getFLOPS failed: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
@ -1,107 +0,0 @@
|
||||
#include "../perf_precomp.hpp"
|
||||
#include "opencv2/ts/ocl_perf.hpp"
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
namespace opencv_test { namespace ocl {
|
||||
using namespace ::perf;
|
||||
|
||||
namespace {
|
||||
enum {STRIDE_OFF = 1, STRIDE_ON = 2};
|
||||
CV_ENUM(StrideSize, STRIDE_OFF, STRIDE_ON);
|
||||
|
||||
enum {GROUP_OFF = 1, GROUP_2 = 2};
|
||||
CV_ENUM(GroupSize, GROUP_OFF, GROUP_2);
|
||||
} // namespace
|
||||
|
||||
//Squared Size
|
||||
#define SSZ(n) cv::Size(n, n)
|
||||
|
||||
typedef std::pair<MatShape, int> InpShapeNumOut;
|
||||
typedef tuple<Size, InpShapeNumOut, GroupSize, StrideSize> ConvParam; //kernel_size, inp shape, groups, stride
|
||||
typedef TestBaseWithParam<ConvParam> ConvolutionPerfTest;
|
||||
|
||||
static inline MatShape blobShape(int count, int nplanes, int height, int width)
|
||||
{
|
||||
int data[] = {count, nplanes, height, width};
|
||||
return MatShape(data, data+4);
|
||||
}
|
||||
|
||||
OCL_PERF_TEST_P( ConvolutionPerfTest, perf, Combine(
|
||||
Values(Size(1, 1), Size(3, 3), Size(5, 5), Size(11, 11)),
|
||||
Values(make_pair(blobShape(1, 4, 224, 224), 64),
|
||||
make_pair(blobShape(1, 64, 112, 122), 128),
|
||||
make_pair(blobShape(1, 256, 28, 28), 512)),
|
||||
GroupSize::all(),
|
||||
StrideSize::all())
|
||||
)
|
||||
{
|
||||
RNG rng(0);
|
||||
|
||||
ConvParam params = GetParam();
|
||||
int ksz = get<0>(params).width;
|
||||
MatShape inpShape = get<1>(params).first;
|
||||
int outCn = get<1>(params).second;
|
||||
int groups = get<2>(params);
|
||||
int stride = (ksz >= 11) ? 4 : (int)get<3>(params);
|
||||
|
||||
int inpCn = inpShape[1];
|
||||
int wgtSize[] = { outCn, inpCn/groups, ksz, ksz };
|
||||
int biasSize[] = { outCn, 1, 1, 1 };
|
||||
const int wtype = CV_32F;
|
||||
Mat wgtBlob(4, wgtSize, wtype), biasBlob(4, biasSize, wtype);
|
||||
Mat inpBlob(4, &inpShape[0], wtype);
|
||||
rng.fill(biasBlob, RNG::UNIFORM, -1, +1);
|
||||
rng.fill(wgtBlob, RNG::UNIFORM, -1, +1);
|
||||
rng.fill(inpBlob, RNG::UNIFORM, -1, +1);
|
||||
|
||||
LayerParams lp;
|
||||
lp.set("num_output", outCn);
|
||||
lp.set("group", groups);
|
||||
lp.set("stride", stride);
|
||||
lp.set("kernel_size", ksz);
|
||||
lp.blobs.reserve(2);
|
||||
lp.blobs.push_back(wgtBlob);
|
||||
lp.blobs.push_back(biasBlob);
|
||||
|
||||
std::vector<Mat*> inpBlobs(1, &inpBlob);
|
||||
std::vector<Mat> outBlobs, internalBlobs;
|
||||
|
||||
Ptr<Layer> layer = cv::dnn::LayerFactory::createLayerInstance("Convolution", lp);
|
||||
std::vector<MatShape> inputShapes(1, shape(inpBlob)), outShapes, internals;
|
||||
layer->getMemoryShapes(inputShapes, 0, outShapes, internals);
|
||||
for (size_t i = 0; i < outShapes.size(); i++)
|
||||
{
|
||||
outBlobs.push_back(Mat(outShapes[i], CV_32F));
|
||||
}
|
||||
for (size_t i = 0; i < internals.size(); i++)
|
||||
{
|
||||
internalBlobs.push_back(Mat());
|
||||
if (total(internals[i]))
|
||||
internalBlobs.back().create(internals[i], CV_32F);
|
||||
}
|
||||
|
||||
layer->finalize(inpBlobs, outBlobs);
|
||||
layer->preferableTarget = DNN_TARGET_OPENCL;
|
||||
|
||||
Mat inpBlob2D = inpBlob.reshape(1, outCn);
|
||||
Mat wgtBlob2D = wgtBlob.reshape(1, outCn*(inpCn/groups));
|
||||
Mat outBlob2D = outBlobs[0].reshape(1, outBlobs[0].size[0]);
|
||||
declare.in(inpBlob2D, wgtBlob2D, WARMUP_RNG).out(outBlob2D);
|
||||
|
||||
// warmup
|
||||
layer->forward(inpBlobs, outBlobs, internalBlobs);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
layer->forward(inpBlobs, outBlobs, internalBlobs);
|
||||
}
|
||||
|
||||
SANITY_CHECK_NOTHING();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
@ -1,92 +1,674 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "perf_precomp.hpp"
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
|
||||
namespace opencv_test {
|
||||
|
||||
enum {STRIDE_OFF = 1, STRIDE_ON = 2};
|
||||
CV_ENUM(StrideSize, STRIDE_OFF, STRIDE_ON);
|
||||
|
||||
enum {GROUP_OFF = 1, GROUP_2 = 2};
|
||||
CV_ENUM(GroupSize, GROUP_OFF, GROUP_2);
|
||||
|
||||
typedef std::pair<MatShape, int> InpShapeNumOut;
|
||||
typedef tuple<Size, InpShapeNumOut, GroupSize, StrideSize> ConvParam; //kernel_size, inp shape, groups, stride
|
||||
typedef TestBaseWithParam<ConvParam> ConvolutionPerfTest;
|
||||
|
||||
static inline MatShape blobShape(int count, int nplanes, int height, int width)
|
||||
// Flops_Kernel_Input_OutCN_Group_Stride_Pad_Dilation_PadAdjust_PadMode_Bias
|
||||
struct TestSize_ {
|
||||
int width, height;
|
||||
operator Size() const { return Size(width, height); }
|
||||
};
|
||||
struct ConvParam_t {
|
||||
struct TestSize_ kernel;
|
||||
struct BlobShape { int dims[4]; } shapeIn;
|
||||
int outCN;
|
||||
int groups;
|
||||
struct TestSize_ stride;
|
||||
struct TestSize_ dilation;
|
||||
struct TestSize_ pad;
|
||||
struct TestSize_ padAdjust;
|
||||
const char* padMode;
|
||||
bool hasBias;
|
||||
double declared_flops;
|
||||
};
|
||||
// Details: #12142
|
||||
static const ConvParam_t testConvolutionConfigs[] = {
|
||||
/* GFLOPS 10.087 x 1 = 10.087 */ {{3, 3}, {{1, 576, 38, 50}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 10086963200.},
|
||||
/* GFLOPS 1.704 x 5 = 8.518 */ {{3, 3}, {{1, 512, 19, 19}}, 512, 512, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 1703596544.},
|
||||
/* GFLOPS 1.704 x 5 = 8.518 */ {{3, 3}, {{1, 512, 19, 19}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1703596544.},
|
||||
/* GFLOPS 6.641 x 1 = 6.641 */ {{3, 3}, {{1, 64, 150, 200}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 6641280000.},
|
||||
/* GFLOPS 1.659 x 3 = 4.977 */ {{3, 3}, {{1, 960, 10, 10}}, 960, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1658976000.},
|
||||
/* GFLOPS 2.156 x 2 = 4.312 */ {{3, 3}, {{1, 576, 19, 19}}, 576, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 2156088384.},
|
||||
/* GFLOPS 0.958 x 4 = 3.833 */ {{3, 3}, {{1, 384, 19, 19}}, 384, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 958307712.},
|
||||
/* GFLOPS 0.830 x 4 = 3.321 */ {{3, 3}, {{1, 64, 75, 100}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 830160000.},
|
||||
/* GFLOPS 1.245 x 2 = 2.490 */ {{3, 3}, {{1, 96, 75, 100}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1244880000.},
|
||||
/* GFLOPS 2.100 x 1 = 2.100 */ {{3, 3}, {{1, 144, 75, 75}}, 144, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 2100330000.},
|
||||
/* GFLOPS 1.022 x 2 = 2.044 */ {{3, 3}, {{1, 576, 19, 19}}, 273, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1021896057.},
|
||||
/* GFLOPS 0.958 x 2 = 1.917 */ {{3, 3}, {{1, 192, 38, 38}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 958446336.},
|
||||
/* GFLOPS 1.888 x 1 = 1.888 */ {{3, 3}, {{1, 1024, 10, 10}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1887539200.},
|
||||
/* GFLOPS 1.888 x 1 = 1.888 */ {{3, 3}, {{1, 1024, 10, 10}}, 1024, 1024, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 1887539200.},
|
||||
/* GFLOPS 1.704 x 1 = 1.704 */ {{3, 3}, {{1, 256, 38, 38}}, 256, 256, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 1703781376.},
|
||||
/* GFLOPS 1.704 x 1 = 1.704 */ {{3, 3}, {{1, 256, 38, 38}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1703781376.},
|
||||
/* GFLOPS 1.660 x 1 = 1.660 */ {{3, 3}, {{1, 128, 75, 75}}, 128, 128, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 1659600000.},
|
||||
/* GFLOPS 1.660 x 1 = 1.660 */ {{3, 3}, {{1, 128, 75, 75}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1659600000.},
|
||||
/* GFLOPS 0.280 x 5 = 1.402 */ {{1, 1}, {{1, 576, 38, 50}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 280409600.},
|
||||
/* GFLOPS 0.701 x 2 = 1.401 */ {{3, 3}, {{1, 128, 38, 50}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 700720000.},
|
||||
/* GFLOPS 0.231 x 6 = 1.388 */ {{3, 3}, {{1, 128, 56, 56}}, 32, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 231311360.},
|
||||
/* GFLOPS 0.231 x 6 = 1.388 */ {{3, 3}, {{1, 256, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 231261184.},
|
||||
/* GFLOPS 0.210 x 6 = 1.262 */ {{1, 1}, {{1, 576, 38, 50}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 210307200.},
|
||||
/* GFLOPS 0.420 x 3 = 1.261 */ {{3, 3}, {{1, 96, 38, 50}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 420492800.},
|
||||
/* GFLOPS 1.261 x 1 = 1.261 */ {{3, 3}, {{1, 192, 38, 50}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1261113600.},
|
||||
/* GFLOPS 1.258 x 1 = 1.258 */ {{3, 3}, {{1, 1280, 10, 10}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1258038600.},
|
||||
/* GFLOPS 1.245 x 1 = 1.245 */ {{3, 3}, {{1, 64, 75, 75}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1245240000.},
|
||||
/* GFLOPS 0.561 x 2 = 1.121 */ {{3, 3}, {{1, 128, 38, 50}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 560576000.},
|
||||
/* GFLOPS 1.051 x 1 = 1.051 */ {{3, 3}, {{1, 160, 38, 50}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1050988800.},
|
||||
/* GFLOPS 1.006 x 1 = 1.006 */ {{3, 3}, {{1, 1024, 10, 10}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1006441800.},
|
||||
/* GFLOPS 0.246 x 4 = 0.985 */ {{1, 1}, {{1, 256, 75, 100}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 246240000.},
|
||||
/* GFLOPS 0.189 x 5 = 0.947 */ {{1, 1}, {{1, 512, 19, 19}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 189452800.},
|
||||
/* GFLOPS 0.189 x 5 = 0.947 */ {{1, 1}, {{1, 512, 19, 19}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 189452800.},
|
||||
/* GFLOPS 0.934 x 1 = 0.934 */ {{3, 3}, {{1, 96, 150, 150}}, 96, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 933660000.},
|
||||
/* GFLOPS 0.231 x 4 = 0.925 */ {{3, 3}, {{1, 128, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 231311360.},
|
||||
/* GFLOPS 0.896 x 1 = 0.896 */ {{5, 5}, {{1, 96, 27, 27}}, 256, 2, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 895981824.},
|
||||
/* GFLOPS 0.876 x 1 = 0.876 */ {{3, 3}, {{1, 160, 38, 50}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 875824000.},
|
||||
/* GFLOPS 0.850 x 1 = 0.850 */ {{7, 7}, {{1, 3, 600, 800}}, 24, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 849600000.},
|
||||
/* GFLOPS 0.841 x 1 = 0.841 */ {{3, 3}, {{1, 128, 38, 50}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 840864000.},
|
||||
/* GFLOPS 0.415 x 2 = 0.831 */ {{3, 3}, {{1, 32, 150, 150}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 415440000.},
|
||||
/* GFLOPS 0.351 x 2 = 0.701 */ {{1, 1}, {{1, 576, 38, 50}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 350512000.},
|
||||
/* GFLOPS 0.701 x 1 = 0.701 */ {{3, 3}, {{1, 128, 75, 100}}, 160, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 700720000.},
|
||||
/* GFLOPS 0.694 x 1 = 0.694 */ {{3, 3}, {{1, 64, 56, 56}}, 192, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 694235136.},
|
||||
/* GFLOPS 0.694 x 1 = 0.694 */ {{3, 3}, {{1, 64, 56, 56}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 694235136.},
|
||||
/* GFLOPS 0.231 x 3 = 0.694 */ {{3, 3}, {{1, 64, 56, 56}}, 64, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 231411712.},
|
||||
/* GFLOPS 0.058 x 12 = 0.694 */ {{3, 3}, {{1, 128, 28, 28}}, 32, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 57827840.},
|
||||
/* GFLOPS 0.231 x 3 = 0.694 */ {{3, 3}, {{1, 512, 7, 7}}, 512, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 231236096.},
|
||||
/* GFLOPS 0.160 x 4 = 0.639 */ {{3, 3}, {{1, 64, 38, 38}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 159833472.},
|
||||
/* GFLOPS 0.103 x 6 = 0.618 */ {{1, 1}, {{1, 256, 14, 14}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 102961152.},
|
||||
/* GFLOPS 0.615 x 1 = 0.615 */ {{1, 1}, {{1, 320, 75, 100}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 615360000.},
|
||||
/* GFLOPS 0.597 x 1 = 0.597 */ {{3, 3}, {{1, 576, 19, 19}}, 576, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 597254400.},
|
||||
/* GFLOPS 0.185 x 3 = 0.554 */ {{1, 1}, {{1, 192, 75, 100}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 184800000.},
|
||||
/* GFLOPS 0.553 x 1 = 0.553 */ {{3, 3}, {{1, 64, 75, 100}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 553440000.},
|
||||
/* GFLOPS 0.539 x 1 = 0.539 */ {{3, 3}, {{1, 144, 75, 75}}, 144, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 539178048.},
|
||||
/* GFLOPS 0.103 x 5 = 0.514 */ {{1, 1}, {{1, 1024, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 102810624.},
|
||||
/* GFLOPS 0.491 x 1 = 0.491 */ {{1, 1}, {{1, 576, 38, 50}}, 224, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 490716800.},
|
||||
/* GFLOPS 0.240 x 2 = 0.479 */ {{3, 3}, {{1, 96, 38, 38}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 239680896.},
|
||||
/* GFLOPS 0.237 x 2 = 0.474 */ {{7, 7}, {{1, 3, 224, 224}}, 64, 1, {2, 2}, {1, 1}, {3, 3}, {0, 0}, "", true, 236830720.},
|
||||
/* GFLOPS 0.472 x 1 = 0.472 */ {{3, 3}, {{1, 512, 19, 19}}, 512, 512, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 471910400.},
|
||||
/* GFLOPS 0.472 x 1 = 0.472 */ {{3, 3}, {{1, 512, 19, 19}}, 512, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 471910400.},
|
||||
/* GFLOPS 0.449 x 1 = 0.449 */ {{3, 3}, {{1, 384, 13, 13}}, 384, 2, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 448626048.},
|
||||
/* GFLOPS 0.426 x 1 = 0.426 */ {{3, 3}, {{1, 128, 75, 75}}, 128, 128, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 426037760.},
|
||||
/* GFLOPS 0.426 x 1 = 0.426 */ {{3, 3}, {{1, 128, 75, 75}}, 128, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 426037760.},
|
||||
/* GFLOPS 0.426 x 1 = 0.426 */ {{3, 3}, {{1, 128, 38, 38}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 426037760.},
|
||||
/* GFLOPS 0.426 x 1 = 0.426 */ {{3, 3}, {{1, 256, 38, 38}}, 256, 256, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 425945344.},
|
||||
/* GFLOPS 0.426 x 1 = 0.426 */ {{3, 3}, {{1, 256, 38, 38}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 425945344.},
|
||||
/* GFLOPS 0.426 x 1 = 0.426 */ {{3, 3}, {{1, 256, 19, 19}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 425945344.},
|
||||
/* GFLOPS 0.421 x 1 = 0.421 */ {{1, 1}, {{1, 576, 38, 50}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 420614400.},
|
||||
/* GFLOPS 0.415 x 1 = 0.415 */ {{3, 3}, {{1, 32, 150, 150}}, 32, 32, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 415440000.},
|
||||
/* GFLOPS 0.415 x 1 = 0.415 */ {{3, 3}, {{1, 64, 150, 150}}, 64, 64, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 415080000.},
|
||||
/* GFLOPS 0.415 x 1 = 0.415 */ {{3, 3}, {{1, 64, 150, 150}}, 64, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 415080000.},
|
||||
/* GFLOPS 0.104 x 4 = 0.414 */ {{1, 1}, {{1, 64, 56, 56}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 103563264.},
|
||||
/* GFLOPS 0.103 x 4 = 0.413 */ {{1, 1}, {{1, 128, 28, 28}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 103161856.},
|
||||
/* GFLOPS 0.376 x 1 = 0.376 */ {{1, 1}, {{1, 24, 300, 400}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 376320000.},
|
||||
/* GFLOPS 0.347 x 1 = 0.347 */ {{3, 3}, {{1, 128, 28, 28}}, 192, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 346967040.},
|
||||
/* GFLOPS 0.347 x 1 = 0.347 */ {{3, 3}, {{1, 128, 28, 28}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 346967040.},
|
||||
/* GFLOPS 0.014 x 24 = 0.347 */ {{3, 3}, {{1, 128, 14, 14}}, 32, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 14456960.},
|
||||
/* GFLOPS 0.053 x 6 = 0.320 */ {{1, 1}, {{1, 576, 19, 19}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 53277824.},
|
||||
/* GFLOPS 0.319 x 1 = 0.319 */ {{3, 3}, {{1, 192, 19, 19}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 319482112.},
|
||||
/* GFLOPS 0.315 x 1 = 0.315 */ {{3, 3}, {{1, 96, 75, 100}}, 96, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 315369600.},
|
||||
/* GFLOPS 0.103 x 3 = 0.309 */ {{1, 1}, {{1, 512, 7, 7}}, 2048, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 102860800.},
|
||||
/* GFLOPS 0.103 x 3 = 0.309 */ {{1, 1}, {{1, 512, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 102860800.},
|
||||
/* GFLOPS 0.308 x 1 = 0.308 */ {{1, 1}, {{1, 320, 75, 100}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 307680000.},
|
||||
/* GFLOPS 0.299 x 1 = 0.299 */ {{3, 3}, {{1, 256, 13, 13}}, 384, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 299105664.},
|
||||
/* GFLOPS 0.299 x 1 = 0.299 */ {{3, 3}, {{1, 384, 13, 13}}, 256, 2, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 299084032.},
|
||||
/* GFLOPS 0.017 x 17 = 0.290 */ {{1, 1}, {{1, 32, 32, 64}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 17039360.},
|
||||
/* GFLOPS 0.017 x 16 = 0.269 */ {{1, 1}, {{1, 128, 32, 64}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 16842752.},
|
||||
/* GFLOPS 0.133 x 2 = 0.266 */ {{3, 3}, {{1, 128, 19, 19}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 133136800.},
|
||||
/* GFLOPS 0.038 x 7 = 0.265 */ {{3, 3}, {{1, 16, 64, 128}}, 16, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 37879808.},
|
||||
/* GFLOPS 0.126 x 2 = 0.252 */ {{3, 3}, {{1, 512, 5, 5}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 125812050.},
|
||||
/* GFLOPS 0.248 x 1 = 0.248 */ {{1, 1}, {{1, 64, 150, 200}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 247680000.},
|
||||
/* GFLOPS 0.040 x 6 = 0.240 */ {{1, 1}, {{1, 576, 19, 19}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 39958368.},
|
||||
/* GFLOPS 0.080 x 3 = 0.240 */ {{3, 3}, {{1, 96, 19, 19}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 79893632.},
|
||||
/* GFLOPS 0.240 x 1 = 0.240 */ {{3, 3}, {{1, 192, 38, 38}}, 192, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 239611584.},
|
||||
/* GFLOPS 0.240 x 1 = 0.240 */ {{3, 3}, {{1, 192, 19, 19}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 239611584.},
|
||||
/* GFLOPS 0.237 x 1 = 0.237 */ {{7, 7}, {{1, 3, 224, 224}}, 64, 1, {2, 2}, {1, 1}, {3, 3}, {0, 0}, "", false, 236830720.},
|
||||
/* GFLOPS 0.237 x 1 = 0.237 */ {{7, 7}, {{1, 3, 224, 224}}, 64, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 236830720.},
|
||||
/* GFLOPS 0.111 x 2 = 0.221 */ {{3, 3}, {{1, 192, 10, 10}}, 320, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 110624000.},
|
||||
/* GFLOPS 0.213 x 1 = 0.213 */ {{3, 3}, {{1, 128, 38, 38}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 213018880.},
|
||||
/* GFLOPS 0.213 x 1 = 0.213 */ {{3, 3}, {{1, 128, 19, 19}}, 256, 1, {1, 1}, {2, 2}, {2, 2}, {0, 0}, "", false, 213018880.},
|
||||
/* GFLOPS 0.107 x 2 = 0.213 */ {{3, 3}, {{1, 128, 19, 19}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 106509440.},
|
||||
/* GFLOPS 0.213 x 1 = 0.213 */ {{3, 3}, {{1, 256, 19, 19}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 212972672.},
|
||||
/* GFLOPS 0.212 x 1 = 0.212 */ {{7, 7}, {{1, 3, 300, 300}}, 32, 1, {2, 2}, {1, 1}, {3, 3}, {0, 0}, "", true, 212400000.},
|
||||
/* GFLOPS 0.211 x 1 = 0.211 */ {{11, 11}, {{1, 3, 227, 227}}, 96, 1, {4, 4}, {1, 1}, {0, 0}, {0, 0}, "", true, 211120800.},
|
||||
/* GFLOPS 0.210 x 1 = 0.210 */ {{3, 3}, {{1, 64, 38, 50}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 210307200.},
|
||||
/* GFLOPS 0.210 x 1 = 0.210 */ {{1, 1}, {{1, 1024, 10, 10}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 209817600.},
|
||||
/* GFLOPS 0.210 x 1 = 0.210 */ {{1, 1}, {{1, 1024, 10, 10}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 209817600.},
|
||||
/* GFLOPS 0.104 x 2 = 0.208 */ {{3, 3}, {{1, 32, 75, 75}}, 32, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 103860000.},
|
||||
/* GFLOPS 0.206 x 1 = 0.206 */ {{1, 1}, {{1, 256, 56, 56}}, 512, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 205922304.},
|
||||
/* GFLOPS 0.206 x 1 = 0.206 */ {{1, 1}, {{1, 256, 56, 56}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 205922304.},
|
||||
/* GFLOPS 0.103 x 2 = 0.206 */ {{1, 1}, {{1, 256, 56, 56}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 102961152.},
|
||||
/* GFLOPS 0.206 x 1 = 0.206 */ {{1, 1}, {{1, 512, 28, 28}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 205721600.},
|
||||
/* GFLOPS 0.206 x 1 = 0.206 */ {{1, 1}, {{1, 512, 28, 28}}, 1024, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 205721600.},
|
||||
/* GFLOPS 0.206 x 1 = 0.206 */ {{1, 1}, {{1, 1024, 14, 14}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 205621248.},
|
||||
/* GFLOPS 0.206 x 1 = 0.206 */ {{1, 1}, {{1, 1024, 14, 14}}, 2048, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 205621248.},
|
||||
/* GFLOPS 0.103 x 2 = 0.206 */ {{1, 1}, {{1, 2048, 7, 7}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 102785536.},
|
||||
/* GFLOPS 0.201 x 1 = 0.201 */ {{1, 1}, {{1, 512, 14, 14}}, 1000, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 200900000.},
|
||||
/* GFLOPS 0.200 x 1 = 0.200 */ {{3, 3}, {{1, 160, 19, 19}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 199687872.},
|
||||
/* GFLOPS 0.190 x 1 = 0.190 */ {{1, 1}, {{1, 256, 38, 38}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 189637632.},
|
||||
/* GFLOPS 0.190 x 1 = 0.190 */ {{1, 1}, {{1, 256, 38, 38}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 189637632.},
|
||||
/* GFLOPS 0.047 x 4 = 0.190 */ {{1, 1}, {{1, 256, 38, 38}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 47409408.},
|
||||
/* GFLOPS 0.038 x 5 = 0.189 */ {{3, 3}, {{1, 32, 32, 64}}, 32, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 37814272.},
|
||||
/* GFLOPS 0.185 x 1 = 0.185 */ {{1, 1}, {{1, 128, 75, 75}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 185040000.},
|
||||
/* GFLOPS 0.185 x 1 = 0.185 */ {{1, 1}, {{1, 128, 75, 75}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 185040000.},
|
||||
/* GFLOPS 0.181 x 1 = 0.181 */ {{3, 3}, {{1, 160, 14, 14}}, 320, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 180696320.},
|
||||
/* GFLOPS 0.181 x 1 = 0.181 */ {{3, 3}, {{1, 160, 14, 14}}, 320, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 180696320.},
|
||||
/* GFLOPS 0.090 x 2 = 0.181 */ {{3, 3}, {{1, 224, 10, 10}}, 224, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 90339200.},
|
||||
/* GFLOPS 0.180 x 1 = 0.180 */ {{1, 1}, {{1, 224, 56, 56}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 180232192.},
|
||||
/* GFLOPS 0.174 x 1 = 0.174 */ {{3, 3}, {{1, 96, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 173508608.},
|
||||
/* GFLOPS 0.174 x 1 = 0.174 */ {{3, 3}, {{1, 96, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 173508608.},
|
||||
/* GFLOPS 0.166 x 1 = 0.166 */ {{3, 3}, {{1, 160, 19, 19}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 166406560.},
|
||||
/* GFLOPS 0.080 x 2 = 0.160 */ {{1, 1}, {{1, 576, 19, 19}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 79916736.},
|
||||
/* GFLOPS 0.160 x 1 = 0.160 */ {{3, 3}, {{1, 128, 19, 19}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 159764160.},
|
||||
/* GFLOPS 0.159 x 1 = 0.159 */ {{7, 7}, {{1, 3, 300, 300}}, 24, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 159300000.},
|
||||
/* GFLOPS 0.155 x 1 = 0.155 */ {{1, 1}, {{1, 192, 56, 56}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 154542080.},
|
||||
/* GFLOPS 0.146 x 1 = 0.146 */ {{3, 3}, {{1, 144, 14, 14}}, 288, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 146369664.},
|
||||
/* GFLOPS 0.146 x 1 = 0.146 */ {{3, 3}, {{1, 144, 14, 14}}, 288, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 146369664.},
|
||||
/* GFLOPS 0.072 x 2 = 0.144 */ {{1, 1}, {{1, 1024, 10, 10}}, 352, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 72124800.},
|
||||
/* GFLOPS 0.140 x 1 = 0.140 */ {{1, 1}, {{1, 576, 38, 50}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 140204800.},
|
||||
/* GFLOPS 0.017 x 8 = 0.138 */ {{1, 1}, {{1, 16, 64, 128}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 17301504.},
|
||||
/* GFLOPS 0.067 x 2 = 0.133 */ {{1, 1}, {{1, 576, 19, 19}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 66597280.},
|
||||
/* GFLOPS 0.133 x 1 = 0.133 */ {{3, 3}, {{1, 128, 38, 38}}, 160, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 133136800.},
|
||||
/* GFLOPS 0.129 x 1 = 0.129 */ {{1, 1}, {{1, 160, 56, 56}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 128851968.},
|
||||
/* GFLOPS 0.128 x 1 = 0.128 */ {{3, 3}, {{1, 64, 24, 24}}, 192, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 127512576.},
|
||||
/* GFLOPS 0.120 x 1 = 0.120 */ {{5, 5}, {{1, 32, 28, 28}}, 96, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 120497664.},
|
||||
/* GFLOPS 0.120 x 1 = 0.120 */ {{5, 5}, {{1, 32, 28, 28}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 120497664.},
|
||||
/* GFLOPS 0.040 x 3 = 0.120 */ {{1, 1}, {{1, 96, 19, 19}}, 576, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 40131648.},
|
||||
/* GFLOPS 0.118 x 1 = 0.118 */ {{1, 1}, {{1, 320, 38, 38}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 118477312.},
|
||||
/* GFLOPS 0.017 x 7 = 0.118 */ {{1, 1}, {{1, 64, 64, 128}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 16908288.},
|
||||
/* GFLOPS 0.039 x 3 = 0.118 */ {{1, 1}, {{1, 1024, 10, 10}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 39340800.},
|
||||
/* GFLOPS 0.118 x 1 = 0.118 */ {{3, 3}, {{1, 256, 19, 19}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 117990400.},
|
||||
/* GFLOPS 0.058 x 2 = 0.116 */ {{3, 3}, {{1, 16, 56, 56}}, 64, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 58003456.},
|
||||
/* GFLOPS 0.058 x 2 = 0.116 */ {{3, 3}, {{1, 32, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 57903104.},
|
||||
/* GFLOPS 0.058 x 2 = 0.116 */ {{3, 3}, {{1, 64, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 57852928.},
|
||||
/* GFLOPS 0.116 x 1 = 0.116 */ {{3, 3}, {{1, 128, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 115655680.},
|
||||
/* GFLOPS 0.116 x 1 = 0.116 */ {{3, 3}, {{1, 128, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 115655680.},
|
||||
/* GFLOPS 0.112 x 1 = 0.112 */ {{1, 1}, {{1, 1024, 10, 10}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 111875400.},
|
||||
/* GFLOPS 0.036 x 3 = 0.107 */ {{1, 1}, {{1, 192, 38, 38}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 35580160.},
|
||||
/* GFLOPS 0.107 x 1 = 0.107 */ {{3, 3}, {{1, 32, 75, 75}}, 128, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 106648064.},
|
||||
/* GFLOPS 0.107 x 1 = 0.107 */ {{3, 3}, {{1, 64, 38, 38}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 106555648.},
|
||||
/* GFLOPS 0.105 x 1 = 0.105 */ {{1, 1}, {{1, 512, 10, 10}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 104960000.},
|
||||
/* GFLOPS 0.105 x 1 = 0.105 */ {{1, 1}, {{1, 512, 10, 10}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 104960000.},
|
||||
/* GFLOPS 0.103 x 1 = 0.103 */ {{1, 1}, {{1, 128, 56, 56}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 103161856.},
|
||||
/* GFLOPS 0.051 x 2 = 0.103 */ {{1, 1}, {{1, 256, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 51480576.},
|
||||
/* GFLOPS 0.051 x 2 = 0.103 */ {{1, 1}, {{1, 256, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 51480576.},
|
||||
/* GFLOPS 0.101 x 1 = 0.101 */ {{1, 1}, {{1, 512, 19, 19}}, 273, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 101016825.},
|
||||
/* GFLOPS 0.096 x 1 = 0.096 */ {{1, 1}, {{1, 480, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 96438272.},
|
||||
/* GFLOPS 0.095 x 1 = 0.095 */ {{1, 1}, {{1, 128, 38, 38}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 95003648.},
|
||||
/* GFLOPS 0.095 x 1 = 0.095 */ {{1, 1}, {{1, 128, 38, 38}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 95003648.},
|
||||
/* GFLOPS 0.095 x 1 = 0.095 */ {{1, 1}, {{1, 256, 19, 19}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 94818816.},
|
||||
/* GFLOPS 0.095 x 1 = 0.095 */ {{1, 1}, {{1, 256, 19, 19}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 94818816.},
|
||||
/* GFLOPS 0.094 x 1 = 0.094 */ {{1, 1}, {{1, 32, 150, 150}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 93600000.},
|
||||
/* GFLOPS 0.094 x 1 = 0.094 */ {{1, 1}, {{1, 32, 150, 150}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 93600000.},
|
||||
/* GFLOPS 0.093 x 1 = 0.093 */ {{1, 1}, {{1, 512, 38, 50}}, 48, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 93480000.},
|
||||
/* GFLOPS 0.093 x 1 = 0.093 */ {{1, 1}, {{1, 576, 19, 19}}, 224, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 93236192.},
|
||||
/* GFLOPS 0.093 x 1 = 0.093 */ {{1, 1}, {{1, 64, 75, 75}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 92880000.},
|
||||
/* GFLOPS 0.093 x 1 = 0.093 */ {{1, 1}, {{1, 64, 75, 75}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 92880000.},
|
||||
/* GFLOPS 0.031 x 3 = 0.092 */ {{1, 1}, {{1, 160, 10, 10}}, 960, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 30816000.},
|
||||
/* GFLOPS 0.092 x 1 = 0.092 */ {{1, 1}, {{1, 192, 75, 100}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 92400000.},
|
||||
/* GFLOPS 0.090 x 1 = 0.090 */ {{1, 1}, {{1, 448, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 90015744.},
|
||||
/* GFLOPS 0.045 x 2 = 0.090 */ {{3, 3}, {{1, 576, 19, 19}}, 12, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 44918508.},
|
||||
/* GFLOPS 0.089 x 1 = 0.089 */ {{3, 3}, {{1, 112, 14, 14}}, 224, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 88554368.},
|
||||
/* GFLOPS 0.089 x 1 = 0.089 */ {{3, 3}, {{1, 112, 14, 14}}, 224, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 88554368.},
|
||||
/* GFLOPS 0.021 x 4 = 0.084 */ {{5, 1}, {{1, 32, 32, 64}}, 32, 1, {1, 1}, {1, 1}, {2, 0}, {0, 0}, "", false, 21037056.},
|
||||
/* GFLOPS 0.021 x 4 = 0.084 */ {{1, 5}, {{1, 32, 32, 64}}, 32, 1, {1, 1}, {1, 1}, {0, 2}, {0, 0}, "", true, 21037056.},
|
||||
/* GFLOPS 0.084 x 1 = 0.084 */ {{1, 1}, {{1, 416, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 83593216.},
|
||||
/* GFLOPS 0.082 x 1 = 0.082 */ {{1, 1}, {{1, 320, 10, 10}}, 1280, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 82048000.},
|
||||
/* GFLOPS 0.040 x 2 = 0.080 */ {{1, 1}, {{1, 576, 19, 19}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 39958368.},
|
||||
/* GFLOPS 0.040 x 2 = 0.079 */ {{1, 1}, {{1, 24, 75, 75}}, 144, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 39690000.},
|
||||
/* GFLOPS 0.040 x 2 = 0.079 */ {{3, 3}, {{1, 3, 300, 300}}, 32, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 39600000.},
|
||||
/* GFLOPS 0.077 x 1 = 0.077 */ {{1, 1}, {{1, 96, 56, 56}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 77471744.},
|
||||
/* GFLOPS 0.077 x 1 = 0.077 */ {{3, 3}, {{1, 192, 10, 10}}, 224, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 77436800.},
|
||||
/* GFLOPS 0.077 x 1 = 0.077 */ {{1, 1}, {{1, 384, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 77170688.},
|
||||
/* GFLOPS 0.038 x 2 = 0.076 */ {{3, 3}, {{1, 32, 32, 64}}, 32, 1, {1, 1}, {8, 8}, {8, 8}, {0, 0}, "", true, 37814272.},
|
||||
/* GFLOPS 0.038 x 2 = 0.076 */ {{3, 3}, {{1, 32, 32, 64}}, 32, 1, {1, 1}, {4, 4}, {4, 4}, {0, 0}, "", true, 37814272.},
|
||||
/* GFLOPS 0.038 x 2 = 0.076 */ {{3, 3}, {{1, 32, 32, 64}}, 32, 1, {1, 1}, {2, 2}, {2, 2}, {0, 0}, "", true, 37814272.},
|
||||
/* GFLOPS 0.038 x 2 = 0.076 */ {{3, 3}, {{1, 32, 32, 64}}, 32, 1, {1, 1}, {16, 16}, {16, 16}, {0, 0}, "", true, 37814272.},
|
||||
/* GFLOPS 0.018 x 4 = 0.072 */ {{1, 1}, {{1, 64, 19, 19}}, 384, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 17882496.},
|
||||
/* GFLOPS 0.071 x 1 = 0.071 */ {{1, 1}, {{1, 16, 150, 150}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 71280000.},
|
||||
/* GFLOPS 0.071 x 1 = 0.071 */ {{1, 1}, {{1, 352, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 70748160.},
|
||||
/* GFLOPS 0.071 x 1 = 0.071 */ {{1, 1}, {{1, 24, 150, 150}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 70560000.},
|
||||
/* GFLOPS 0.070 x 1 = 0.070 */ {{3, 3}, {{1, 96, 14, 14}}, 208, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 70487872.},
|
||||
/* GFLOPS 0.069 x 1 = 0.069 */ {{3, 3}, {{1, 96, 14, 14}}, 204, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 69132336.},
|
||||
/* GFLOPS 0.066 x 1 = 0.066 */ {{1, 1}, {{1, 1280, 10, 10}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 65561600.},
|
||||
/* GFLOPS 0.033 x 2 = 0.065 */ {{3, 3}, {{1, 48, 14, 14}}, 192, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 32551680.},
|
||||
/* GFLOPS 0.065 x 1 = 0.065 */ {{3, 3}, {{1, 192, 7, 7}}, 384, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 65046912.},
|
||||
/* GFLOPS 0.065 x 1 = 0.065 */ {{3, 3}, {{1, 192, 7, 7}}, 384, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 65046912.},
|
||||
/* GFLOPS 0.065 x 1 = 0.065 */ {{3, 3}, {{1, 160, 10, 10}}, 224, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 64534400.},
|
||||
/* GFLOPS 0.064 x 1 = 0.064 */ {{1, 1}, {{1, 320, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 64325632.},
|
||||
/* GFLOPS 0.032 x 2 = 0.064 */ {{3, 3}, {{1, 96, 12, 12}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 31868928.},
|
||||
/* GFLOPS 0.061 x 1 = 0.061 */ {{1, 1}, {{1, 960, 10, 10}}, 320, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 61472000.},
|
||||
/* GFLOPS 0.031 x 2 = 0.061 */ {{1, 1}, {{1, 960, 10, 10}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 30736000.},
|
||||
/* GFLOPS 0.060 x 1 = 0.060 */ {{3, 3}, {{1, 96, 38, 38}}, 96, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 59920224.},
|
||||
/* GFLOPS 0.059 x 1 = 0.059 */ {{1, 1}, {{1, 320, 38, 38}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 59238656.},
|
||||
/* GFLOPS 0.059 x 1 = 0.059 */ {{3, 3}, {{1, 128, 19, 19}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 59008000.},
|
||||
/* GFLOPS 0.059 x 1 = 0.059 */ {{3, 3}, {{1, 256, 10, 10}}, 512, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 58995200.},
|
||||
/* GFLOPS 0.059 x 1 = 0.059 */ {{3, 3}, {{1, 256, 10, 10}}, 512, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 58995200.},
|
||||
/* GFLOPS 0.059 x 1 = 0.059 */ {{3, 3}, {{1, 256, 10, 10}}, 512, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 58995200.},
|
||||
/* GFLOPS 0.058 x 1 = 0.058 */ {{1, 1}, {{1, 288, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 57903104.},
|
||||
/* GFLOPS 0.004 x 16 = 0.058 */ {{3, 3}, {{1, 128, 7, 7}}, 32, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 3614240.},
|
||||
/* GFLOPS 0.055 x 1 = 0.055 */ {{3, 3}, {{1, 1280, 10, 10}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 55298400.},
|
||||
/* GFLOPS 0.018 x 3 = 0.054 */ {{1, 1}, {{1, 32, 38, 38}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 18021120.},
|
||||
/* GFLOPS 0.018 x 3 = 0.053 */ {{1, 1}, {{1, 384, 19, 19}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 17766976.},
|
||||
/* GFLOPS 0.053 x 1 = 0.053 */ {{3, 3}, {{1, 128, 38, 38}}, 16, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 53254720.},
|
||||
/* GFLOPS 0.053 x 1 = 0.053 */ {{1, 1}, {{1, 528, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 53036032.},
|
||||
/* GFLOPS 0.053 x 1 = 0.053 */ {{1, 1}, {{1, 528, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 53036032.},
|
||||
/* GFLOPS 0.052 x 1 = 0.052 */ {{1, 1}, {{1, 1024, 10, 10}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 52454400.},
|
||||
/* GFLOPS 0.052 x 1 = 0.052 */ {{1, 1}, {{1, 1024, 10, 10}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 52454400.},
|
||||
/* GFLOPS 0.052 x 1 = 0.052 */ {{1, 1}, {{1, 1024, 10, 10}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 52454400.},
|
||||
/* GFLOPS 0.026 x 2 = 0.052 */ {{1, 1}, {{1, 1024, 10, 10}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 26227200.},
|
||||
/* GFLOPS 0.052 x 1 = 0.052 */ {{1, 1}, {{1, 64, 56, 56}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 51781632.},
|
||||
/* GFLOPS 0.051 x 1 = 0.051 */ {{1, 1}, {{1, 256, 56, 56}}, 128, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 51480576.},
|
||||
/* GFLOPS 0.051 x 1 = 0.051 */ {{1, 1}, {{1, 256, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 51480576.},
|
||||
/* GFLOPS 0.051 x 1 = 0.051 */ {{1, 1}, {{1, 512, 28, 28}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 51430400.},
|
||||
/* GFLOPS 0.026 x 2 = 0.051 */ {{1, 1}, {{1, 512, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 25715200.},
|
||||
/* GFLOPS 0.026 x 2 = 0.051 */ {{1, 1}, {{1, 512, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 25715200.},
|
||||
/* GFLOPS 0.013 x 4 = 0.051 */ {{1, 1}, {{1, 512, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 12857600.},
|
||||
/* GFLOPS 0.051 x 1 = 0.051 */ {{1, 1}, {{1, 1024, 14, 14}}, 512, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 51405312.},
|
||||
/* GFLOPS 0.050 x 1 = 0.050 */ {{1, 1}, {{1, 992, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 49799680.},
|
||||
/* GFLOPS 0.048 x 1 = 0.048 */ {{1, 1}, {{1, 960, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 48194048.},
|
||||
/* GFLOPS 0.047 x 1 = 0.047 */ {{1, 1}, {{1, 256, 19, 19}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 47409408.},
|
||||
/* GFLOPS 0.047 x 1 = 0.047 */ {{1, 1}, {{1, 512, 38, 50}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 46740000.},
|
||||
/* GFLOPS 0.047 x 1 = 0.047 */ {{1, 1}, {{1, 928, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 46588416.},
|
||||
/* GFLOPS 0.046 x 1 = 0.046 */ {{1, 1}, {{1, 64, 75, 75}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 46440000.},
|
||||
/* GFLOPS 0.023 x 2 = 0.045 */ {{3, 3}, {{1, 256, 3, 3}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 22648626.},
|
||||
/* GFLOPS 0.045 x 1 = 0.045 */ {{3, 3}, {{1, 160, 7, 7}}, 320, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 45174080.},
|
||||
/* GFLOPS 0.045 x 1 = 0.045 */ {{3, 3}, {{1, 160, 7, 7}}, 320, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 45174080.},
|
||||
/* GFLOPS 0.045 x 1 = 0.045 */ {{1, 1}, {{1, 224, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 45058048.},
|
||||
/* GFLOPS 0.023 x 2 = 0.045 */ {{1, 1}, {{1, 512, 14, 14}}, 112, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 22500800.},
|
||||
/* GFLOPS 0.045 x 1 = 0.045 */ {{1, 1}, {{1, 896, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 44982784.},
|
||||
/* GFLOPS 0.045 x 1 = 0.045 */ {{3, 3}, {{1, 3, 227, 227}}, 64, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", true, 44946880.},
|
||||
/* GFLOPS 0.044 x 1 = 0.044 */ {{3, 3}, {{1, 128, 19, 19}}, 192, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 44256000.},
|
||||
/* GFLOPS 0.044 x 1 = 0.044 */ {{3, 3}, {{1, 1024, 10, 10}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 44239200.},
|
||||
/* GFLOPS 0.043 x 1 = 0.043 */ {{7, 7}, {{1, 3, 96, 96}}, 64, 1, {2, 2}, {1, 1}, {3, 3}, {0, 0}, "", true, 43499520.},
|
||||
/* GFLOPS 0.043 x 1 = 0.043 */ {{1, 1}, {{1, 864, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 43377152.},
|
||||
/* GFLOPS 0.042 x 1 = 0.042 */ {{1, 1}, {{1, 832, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 41771520.},
|
||||
/* GFLOPS 0.040 x 1 = 0.040 */ {{5, 5}, {{1, 32, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 40165888.},
|
||||
/* GFLOPS 0.040 x 1 = 0.040 */ {{5, 5}, {{1, 32, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 40165888.},
|
||||
/* GFLOPS 0.040 x 1 = 0.040 */ {{1, 1}, {{1, 800, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 40165888.},
|
||||
/* GFLOPS 0.040 x 1 = 0.040 */ {{3, 3}, {{1, 64, 19, 19}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 39958368.},
|
||||
/* GFLOPS 0.040 x 1 = 0.040 */ {{3, 3}, {{1, 256, 19, 19}}, 24, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 39932376.},
|
||||
/* GFLOPS 0.040 x 1 = 0.040 */ {{3, 3}, {{1, 3, 300, 300}}, 32, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 39600000.},
|
||||
/* GFLOPS 0.039 x 1 = 0.039 */ {{1, 1}, {{1, 144, 75, 75}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 39015000.},
|
||||
/* GFLOPS 0.039 x 1 = 0.039 */ {{1, 1}, {{1, 192, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 38635520.},
|
||||
/* GFLOPS 0.039 x 1 = 0.039 */ {{1, 1}, {{1, 768, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 38560256.},
|
||||
/* GFLOPS 0.037 x 1 = 0.037 */ {{1, 1}, {{1, 736, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 36954624.},
|
||||
/* GFLOPS 0.036 x 1 = 0.036 */ {{1, 1}, {{1, 480, 14, 14}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 36164352.},
|
||||
/* GFLOPS 0.036 x 1 = 0.036 */ {{1, 1}, {{1, 480, 14, 14}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 36164352.},
|
||||
/* GFLOPS 0.018 x 2 = 0.036 */ {{1, 1}, {{1, 192, 38, 38}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 17790080.},
|
||||
/* GFLOPS 0.035 x 1 = 0.035 */ {{1, 1}, {{1, 704, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 35348992.},
|
||||
/* GFLOPS 0.034 x 1 = 0.034 */ {{1, 1}, {{1, 672, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 33743360.},
|
||||
/* GFLOPS 0.034 x 1 = 0.034 */ {{1, 1}, {{1, 128, 32, 64}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 33685504.},
|
||||
/* GFLOPS 0.034 x 1 = 0.034 */ {{2, 2}, {{1, 64, 64, 128}}, 32, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 33619968.},
|
||||
/* GFLOPS 0.033 x 1 = 0.033 */ {{1, 1}, {{1, 528, 14, 14}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 33147520.},
|
||||
/* GFLOPS 0.033 x 1 = 0.033 */ {{1, 1}, {{1, 528, 14, 14}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 33147520.},
|
||||
/* GFLOPS 0.033 x 1 = 0.033 */ {{1, 1}, {{1, 1024, 10, 10}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 32784000.},
|
||||
/* GFLOPS 0.032 x 1 = 0.032 */ {{1, 1}, {{1, 160, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 32212992.},
|
||||
/* GFLOPS 0.032 x 1 = 0.032 */ {{1, 1}, {{1, 512, 14, 14}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 32144000.},
|
||||
/* GFLOPS 0.032 x 1 = 0.032 */ {{1, 1}, {{1, 640, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 32137728.},
|
||||
/* GFLOPS 0.032 x 1 = 0.032 */ {{1, 1}, {{1, 508, 14, 14}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 31893120.},
|
||||
/* GFLOPS 0.031 x 1 = 0.031 */ {{1, 1}, {{1, 832, 7, 7}}, 384, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 31328640.},
|
||||
/* GFLOPS 0.031 x 1 = 0.031 */ {{1, 1}, {{1, 832, 7, 7}}, 384, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 31328640.},
|
||||
/* GFLOPS 0.031 x 1 = 0.031 */ {{1, 1}, {{1, 608, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 30532096.},
|
||||
/* GFLOPS 0.015 x 2 = 0.030 */ {{5, 5}, {{1, 24, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 15065344.},
|
||||
/* GFLOPS 0.015 x 2 = 0.030 */ {{5, 5}, {{1, 24, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 15065344.},
|
||||
/* GFLOPS 0.015 x 2 = 0.030 */ {{5, 5}, {{1, 48, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 15059072.},
|
||||
/* GFLOPS 0.029 x 1 = 0.029 */ {{3, 3}, {{1, 256, 10, 10}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 29497600.},
|
||||
/* GFLOPS 0.029 x 1 = 0.029 */ {{1, 1}, {{1, 192, 28, 28}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 28976640.},
|
||||
/* GFLOPS 0.029 x 1 = 0.029 */ {{1, 1}, {{1, 192, 28, 28}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 28976640.},
|
||||
/* GFLOPS 0.029 x 1 = 0.029 */ {{1, 1}, {{1, 512, 14, 14}}, 144, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 28929600.},
|
||||
/* GFLOPS 0.029 x 1 = 0.029 */ {{1, 1}, {{1, 512, 14, 14}}, 144, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 28929600.},
|
||||
/* GFLOPS 0.029 x 1 = 0.029 */ {{1, 1}, {{1, 576, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 28926464.},
|
||||
/* GFLOPS 0.027 x 1 = 0.027 */ {{1, 1}, {{1, 544, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 27320832.},
|
||||
/* GFLOPS 0.027 x 1 = 0.027 */ {{1, 1}, {{1, 384, 19, 19}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 26650464.},
|
||||
/* GFLOPS 0.027 x 1 = 0.027 */ {{1, 1}, {{1, 576, 19, 19}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 26638912.},
|
||||
/* GFLOPS 0.027 x 1 = 0.027 */ {{3, 3}, {{1, 128, 38, 38}}, 8, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 26627360.},
|
||||
/* GFLOPS 0.027 x 1 = 0.027 */ {{1, 1}, {{1, 528, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 26518016.},
|
||||
/* GFLOPS 0.027 x 1 = 0.027 */ {{1, 1}, {{1, 528, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 26518016.},
|
||||
/* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 96, 75, 75}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 26055000.},
|
||||
/* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 64, 56, 56}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 25890816.},
|
||||
/* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 64, 56, 56}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 25890816.},
|
||||
/* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 64, 56, 56}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 25890816.},
|
||||
/* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 1024, 10, 10}}, 126, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 25817400.},
|
||||
/* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 128, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 25790464.},
|
||||
/* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 256, 28, 28}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 25740288.},
|
||||
/* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 256, 28, 28}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 25740288.},
|
||||
/* GFLOPS 0.013 x 2 = 0.026 */ {{1, 1}, {{1, 256, 28, 28}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 12870144.},
|
||||
/* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 512, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 25715200.},
|
||||
/* GFLOPS 0.013 x 2 = 0.026 */ {{1, 1}, {{1, 512, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 12857600.},
|
||||
/* GFLOPS 0.024 x 1 = 0.024 */ {{1, 1}, {{1, 480, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 24109568.},
|
||||
/* GFLOPS 0.024 x 1 = 0.024 */ {{1, 1}, {{1, 128, 38, 38}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 23750912.},
|
||||
/* GFLOPS 0.024 x 1 = 0.024 */ {{1, 1}, {{1, 256, 19, 19}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 23704704.},
|
||||
/* GFLOPS 0.023 x 1 = 0.023 */ {{3, 3}, {{1, 3, 256, 512}}, 13, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 23429120.},
|
||||
/* GFLOPS 0.023 x 1 = 0.023 */ {{1, 1}, {{1, 32, 150, 150}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 23400000.},
|
||||
/* GFLOPS 0.023 x 1 = 0.023 */ {{1, 1}, {{1, 512, 19, 19}}, 63, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 23311575.},
|
||||
/* GFLOPS 0.023 x 1 = 0.023 */ {{1, 1}, {{1, 448, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 22503936.},
|
||||
/* GFLOPS 0.023 x 1 = 0.023 */ {{1, 1}, {{1, 512, 14, 14}}, 112, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 22500800.},
|
||||
/* GFLOPS 0.022 x 1 = 0.022 */ {{1, 1}, {{1, 508, 14, 14}}, 112, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 22325184.},
|
||||
/* GFLOPS 0.021 x 1 = 0.021 */ {{3, 3}, {{1, 128, 12, 12}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 21242880.},
|
||||
/* GFLOPS 0.021 x 1 = 0.021 */ {{1, 1}, {{1, 416, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 20898304.},
|
||||
/* GFLOPS 0.021 x 1 = 0.021 */ {{1, 1}, {{1, 832, 7, 7}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 20885760.},
|
||||
/* GFLOPS 0.021 x 1 = 0.021 */ {{1, 1}, {{1, 832, 7, 7}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 20885760.},
|
||||
/* GFLOPS 0.010 x 2 = 0.021 */ {{1, 1}, {{1, 832, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 10442880.},
|
||||
/* GFLOPS 0.010 x 2 = 0.021 */ {{1, 1}, {{1, 832, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 10442880.},
|
||||
/* GFLOPS 0.010 x 2 = 0.020 */ {{3, 3}, {{1, 256, 2, 2}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 10066056.},
|
||||
/* GFLOPS 0.020 x 1 = 0.020 */ {{5, 5}, {{1, 16, 28, 28}}, 32, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 20095488.},
|
||||
/* GFLOPS 0.020 x 1 = 0.020 */ {{5, 5}, {{1, 16, 28, 28}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 20095488.},
|
||||
/* GFLOPS 0.020 x 1 = 0.020 */ {{5, 5}, {{1, 32, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 20082944.},
|
||||
/* GFLOPS 0.020 x 1 = 0.020 */ {{5, 5}, {{1, 32, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 20082944.},
|
||||
/* GFLOPS 0.020 x 1 = 0.020 */ {{3, 3}, {{1, 256, 19, 19}}, 12, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 19966188.},
|
||||
/* GFLOPS 0.019 x 1 = 0.019 */ {{1, 1}, {{1, 192, 28, 28}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 19317760.},
|
||||
/* GFLOPS 0.019 x 1 = 0.019 */ {{1, 1}, {{1, 192, 28, 28}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 19317760.},
|
||||
/* GFLOPS 0.019 x 1 = 0.019 */ {{1, 1}, {{1, 384, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 19292672.},
|
||||
/* GFLOPS 0.018 x 1 = 0.018 */ {{1, 1}, {{1, 576, 10, 10}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 18448000.},
|
||||
/* GFLOPS 0.018 x 1 = 0.018 */ {{1, 1}, {{1, 480, 14, 14}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 18082176.},
|
||||
/* GFLOPS 0.018 x 1 = 0.018 */ {{1, 1}, {{1, 480, 14, 14}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 18082176.},
|
||||
/* GFLOPS 0.018 x 1 = 0.018 */ {{1, 1}, {{1, 192, 38, 38}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 17790080.},
|
||||
/* GFLOPS 0.018 x 1 = 0.018 */ {{1, 1}, {{1, 352, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 17687040.},
|
||||
/* GFLOPS 0.017 x 1 = 0.017 */ {{2, 2}, {{1, 16, 128, 256}}, 16, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 16908288.},
|
||||
/* GFLOPS 0.016 x 1 = 0.016 */ {{1, 1}, {{1, 320, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 16081408.},
|
||||
/* GFLOPS 0.016 x 1 = 0.016 */ {{1, 1}, {{1, 832, 7, 7}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 15664320.},
|
||||
/* GFLOPS 0.016 x 1 = 0.016 */ {{1, 1}, {{1, 832, 7, 7}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 15664320.},
|
||||
/* GFLOPS 0.015 x 1 = 0.015 */ {{5, 5}, {{1, 48, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 15059072.},
|
||||
/* GFLOPS 0.015 x 1 = 0.015 */ {{5, 5}, {{1, 32, 12, 12}}, 64, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 14754816.},
|
||||
/* GFLOPS 0.014 x 1 = 0.014 */ {{1, 1}, {{1, 288, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 14475776.},
|
||||
/* GFLOPS 0.014 x 1 = 0.014 */ {{1, 1}, {{1, 512, 5, 5}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 13991250.},
|
||||
/* GFLOPS 0.013 x 1 = 0.013 */ {{1, 1}, {{1, 144, 38, 38}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 13354112.},
|
||||
/* GFLOPS 0.007 x 2 = 0.013 */ {{1, 1}, {{1, 16, 56, 56}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6623232.},
|
||||
/* GFLOPS 0.013 x 1 = 0.013 */ {{1, 1}, {{1, 832, 7, 7}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 13053600.},
|
||||
/* GFLOPS 0.013 x 1 = 0.013 */ {{1, 1}, {{1, 832, 7, 7}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 13053600.},
|
||||
/* GFLOPS 0.007 x 2 = 0.013 */ {{1, 1}, {{1, 32, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6522880.},
|
||||
/* GFLOPS 0.006 x 2 = 0.013 */ {{1, 1}, {{1, 64, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6472704.},
|
||||
/* GFLOPS 0.013 x 1 = 0.013 */ {{1, 1}, {{1, 128, 56, 56}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 12895232.},
|
||||
/* GFLOPS 0.013 x 1 = 0.013 */ {{1, 1}, {{1, 256, 28, 28}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 12870144.},
|
||||
/* GFLOPS 0.013 x 1 = 0.013 */ {{1, 1}, {{1, 256, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 12870144.},
|
||||
/* GFLOPS 0.013 x 1 = 0.013 */ {{1, 1}, {{1, 508, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 12757248.},
|
||||
/* GFLOPS 0.012 x 1 = 0.012 */ {{1, 1}, {{1, 992, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 12449920.},
|
||||
/* GFLOPS 0.012 x 1 = 0.012 */ {{1, 1}, {{1, 480, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 12054784.},
|
||||
/* GFLOPS 0.012 x 1 = 0.012 */ {{1, 1}, {{1, 480, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 12054784.},
|
||||
/* GFLOPS 0.012 x 1 = 0.012 */ {{1, 1}, {{1, 960, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 12048512.},
|
||||
/* GFLOPS 0.012 x 1 = 0.012 */ {{1, 1}, {{1, 32, 75, 75}}, 128, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 12014080.},
|
||||
/* GFLOPS 0.012 x 1 = 0.012 */ {{3, 3}, {{1, 96, 6, 6}}, 192, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 11950848.},
|
||||
/* GFLOPS 0.006 x 2 = 0.012 */ {{3, 3}, {{1, 96, 3, 3}}, 384, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 5975424.},
|
||||
/* GFLOPS 0.012 x 1 = 0.012 */ {{1, 1}, {{1, 320, 12, 12}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 11814912.},
|
||||
/* GFLOPS 0.012 x 1 = 0.012 */ {{1, 1}, {{1, 640, 6, 6}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 11805696.},
|
||||
/* GFLOPS 0.012 x 1 = 0.012 */ {{1, 1}, {{1, 928, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 11647104.},
|
||||
/* GFLOPS 0.011 x 1 = 0.011 */ {{1, 1}, {{1, 896, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 11245696.},
|
||||
/* GFLOPS 0.011 x 1 = 0.011 */ {{3, 3}, {{1, 256, 10, 10}}, 24, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 11061600.},
|
||||
/* GFLOPS 0.006 x 2 = 0.011 */ {{3, 3}, {{1, 512, 5, 5}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 5530200.},
|
||||
/* GFLOPS 0.011 x 1 = 0.011 */ {{1, 1}, {{1, 864, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 10844288.},
|
||||
/* GFLOPS 0.010 x 1 = 0.010 */ {{1, 1}, {{1, 832, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 10442880.},
|
||||
/* GFLOPS 0.010 x 1 = 0.010 */ {{5, 5}, {{1, 32, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 10041472.},
|
||||
/* GFLOPS 0.010 x 1 = 0.010 */ {{1, 1}, {{1, 800, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 10041472.},
|
||||
/* GFLOPS 0.010 x 1 = 0.010 */ {{1, 1}, {{1, 192, 28, 28}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 9658880.},
|
||||
/* GFLOPS 0.010 x 1 = 0.010 */ {{1, 1}, {{1, 192, 28, 28}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 9658880.},
|
||||
/* GFLOPS 0.010 x 1 = 0.010 */ {{1, 1}, {{1, 384, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 9646336.},
|
||||
/* GFLOPS 0.005 x 2 = 0.010 */ {{1, 1}, {{1, 512, 14, 14}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4821600.},
|
||||
/* GFLOPS 0.010 x 1 = 0.010 */ {{1, 1}, {{1, 768, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 9640064.},
|
||||
/* GFLOPS 0.010 x 1 = 0.010 */ {{3, 3}, {{1, 4, 128, 256}}, 4, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 9568256.},
|
||||
/* GFLOPS 0.005 x 2 = 0.009 */ {{1, 1}, {{1, 4, 128, 256}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 4718592.},
|
||||
/* GFLOPS 0.009 x 1 = 0.009 */ {{1, 1}, {{1, 736, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 9238656.},
|
||||
/* GFLOPS 0.009 x 1 = 0.009 */ {{1, 1}, {{1, 192, 19, 19}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 8895040.},
|
||||
/* GFLOPS 0.009 x 1 = 0.009 */ {{1, 1}, {{1, 704, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 8837248.},
|
||||
/* GFLOPS 0.008 x 1 = 0.008 */ {{1, 1}, {{1, 672, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 8435840.},
|
||||
/* GFLOPS 0.008 x 1 = 0.008 */ {{1, 1}, {{1, 128, 32, 64}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 8421376.},
|
||||
/* GFLOPS 0.008 x 1 = 0.008 */ {{1, 1}, {{1, 640, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 8034432.},
|
||||
/* GFLOPS 0.004 x 2 = 0.008 */ {{1, 1}, {{1, 832, 7, 7}}, 48, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 3916080.},
|
||||
/* GFLOPS 0.008 x 1 = 0.008 */ {{1, 1}, {{1, 608, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 7633024.},
|
||||
/* GFLOPS 0.008 x 1 = 0.008 */ {{5, 5}, {{1, 16, 14, 14}}, 48, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 7535808.},
|
||||
/* GFLOPS 0.008 x 1 = 0.008 */ {{5, 5}, {{1, 16, 14, 14}}, 48, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 7535808.},
|
||||
/* GFLOPS 0.004 x 2 = 0.007 */ {{3, 3}, {{1, 64, 5, 5}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 3689600.},
|
||||
/* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 640, 6, 6}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 7378560.},
|
||||
/* GFLOPS 0.004 x 2 = 0.007 */ {{1, 1}, {{1, 48, 14, 14}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 3650304.},
|
||||
/* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 384, 14, 14}}, 48, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 7234752.},
|
||||
/* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 576, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 7231616.},
|
||||
/* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 256, 12, 12}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 7091712.},
|
||||
/* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 544, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 6830208.},
|
||||
/* GFLOPS 0.007 x 1 = 0.007 */ {{3, 3}, {{1, 160, 6, 6}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 6637824.},
|
||||
/* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 528, 14, 14}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6629504.},
|
||||
/* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 528, 14, 14}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 6629504.},
|
||||
/* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 256, 5, 5}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 6566400.},
|
||||
/* GFLOPS 0.003 x 2 = 0.007 */ {{1, 1}, {{1, 512, 5, 5}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 3280000.},
|
||||
/* GFLOPS 0.006 x 1 = 0.006 */ {{1, 1}, {{1, 64, 56, 56}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6472704.},
|
||||
/* GFLOPS 0.006 x 1 = 0.006 */ {{1, 1}, {{1, 128, 28, 28}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6447616.},
|
||||
/* GFLOPS 0.006 x 1 = 0.006 */ {{1, 1}, {{1, 512, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 6428800.},
|
||||
/* GFLOPS 0.006 x 1 = 0.006 */ {{1, 1}, {{1, 512, 14, 14}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6428800.},
|
||||
/* GFLOPS 0.006 x 1 = 0.006 */ {{1, 1}, {{1, 512, 14, 14}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 6428800.},
|
||||
/* GFLOPS 0.006 x 1 = 0.006 */ {{3, 3}, {{1, 256, 10, 10}}, 12, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 5530800.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 192, 12, 12}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 5322240.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{3, 3}, {{1, 128, 5, 5}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 5310720.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{3, 3}, {{1, 128, 5, 5}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 5310720.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{3, 3}, {{1, 128, 5, 5}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 5310720.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 1024, 10, 10}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4917600.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 1024, 10, 10}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 4917600.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 192, 28, 28}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4829440.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 192, 28, 28}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 4829440.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 256, 14, 14}}, 48, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4826304.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 512, 14, 14}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 4821600.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 508, 14, 14}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 4783968.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 64, 24, 24}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4755456.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 256, 12, 12}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4727808.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 1024, 3, 3}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4720896.},
|
||||
/* GFLOPS 0.004 x 1 = 0.004 */ {{1, 1}, {{1, 512, 19, 19}}, 12, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4440300.},
|
||||
/* GFLOPS 0.004 x 1 = 0.004 */ {{1, 1}, {{1, 512, 19, 19}}, 12, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 4440300.},
|
||||
/* GFLOPS 0.004 x 1 = 0.004 */ {{1, 1}, {{1, 640, 6, 6}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4427136.},
|
||||
/* GFLOPS 0.004 x 1 = 0.004 */ {{1, 1}, {{1, 16, 128, 256}}, 4, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 4325376.},
|
||||
/* GFLOPS 0.004 x 1 = 0.004 */ {{1, 1}, {{1, 64, 64, 128}}, 4, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 4227072.},
|
||||
/* GFLOPS 0.004 x 1 = 0.004 */ {{1, 1}, {{1, 832, 7, 7}}, 48, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 3916080.},
|
||||
/* GFLOPS 0.004 x 1 = 0.004 */ {{5, 5}, {{1, 16, 12, 12}}, 32, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 3691008.},
|
||||
/* GFLOPS 0.004 x 1 = 0.004 */ {{3, 3}, {{1, 64, 10, 10}}, 128, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 3689600.},
|
||||
/* GFLOPS 0.004 x 1 = 0.004 */ {{5, 5}, {{1, 32, 6, 6}}, 64, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 3688704.},
|
||||
/* GFLOPS 0.004 x 1 = 0.004 */ {{5, 5}, {{1, 32, 12, 12}}, 64, 1, {2, 2}, {1, 1}, {2, 2}, {0, 0}, "", true, 3688704.},
|
||||
/* GFLOPS 0.004 x 1 = 0.004 */ {{5, 5}, {{1, 64, 6, 6}}, 128, 1, {2, 2}, {1, 1}, {2, 2}, {0, 0}, "", true, 3687552.},
|
||||
/* GFLOPS 0.004 x 1 = 0.004 */ {{1, 1}, {{1, 192, 12, 12}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 3548160.},
|
||||
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 736, 3, 3}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 3393792.},
|
||||
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 256, 10, 10}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 3283200.},
|
||||
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 512, 5, 5}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 3280000.},
|
||||
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 512, 5, 5}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 3280000.},
|
||||
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 512, 5, 5}}, 126, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 3228750.},
|
||||
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 480, 14, 14}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 3013696.},
|
||||
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 480, 14, 14}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 3013696.},
|
||||
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 320, 12, 12}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 2953728.},
|
||||
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 640, 6, 6}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 2951424.},
|
||||
/* GFLOPS 0.003 x 1 = 0.003 */ {{3, 3}, {{1, 128, 5, 5}}, 128, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 2655360.},
|
||||
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 832, 7, 7}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 2610720.},
|
||||
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 256, 3, 3}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 2520882.},
|
||||
/* GFLOPS 0.001 x 2 = 0.003 */ {{3, 3}, {{1, 128, 1, 1}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1258530.},
|
||||
/* GFLOPS 0.002 x 1 = 0.002 */ {{1, 1}, {{1, 256, 12, 12}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 2363904.},
|
||||
/* GFLOPS 0.002 x 1 = 0.002 */ {{3, 3}, {{1, 128, 3, 3}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 2360320.},
|
||||
/* GFLOPS 0.002 x 1 = 0.002 */ {{3, 3}, {{1, 128, 3, 3}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 2360320.},
|
||||
/* GFLOPS 0.002 x 1 = 0.002 */ {{3, 3}, {{1, 128, 3, 3}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 2360320.},
|
||||
/* GFLOPS 0.002 x 1 = 0.002 */ {{1, 1}, {{1, 528, 4, 4}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 2164736.},
|
||||
/* GFLOPS 0.002 x 1 = 0.002 */ {{1, 1}, {{1, 508, 4, 4}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 2082816.},
|
||||
/* GFLOPS 0.002 x 1 = 0.002 */ {{1, 1}, {{1, 1024, 1, 1}}, 1000, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 2049000.},
|
||||
/* GFLOPS 0.001 x 2 = 0.002 */ {{3, 3}, {{1, 256, 3, 3}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 995544.},
|
||||
/* GFLOPS 0.001 x 2 = 0.002 */ {{3, 3}, {{1, 128, 5, 5}}, 16, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 922000.},
|
||||
/* GFLOPS 0.002 x 1 = 0.002 */ {{1, 1}, {{1, 1024, 3, 3}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1770336.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 640, 6, 6}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1475712.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{3, 3}, {{1, 128, 5, 5}}, 24, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 1383000.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 736, 3, 3}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1272672.},
|
||||
/* GFLOPS 0.001 x 2 = 0.001 */ {{1, 1}, {{1, 256, 3, 3}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 590976.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{3, 3}, {{1, 128, 3, 3}}, 128, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1180160.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 256, 2, 2}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1120392.},
|
||||
/* GFLOPS 0.000 x 2 = 0.001 */ {{3, 3}, {{1, 128, 5, 5}}, 8, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 461000.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 192, 12, 12}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 887040.},
|
||||
/* GFLOPS 0.000 x 2 = 0.001 */ {{3, 3}, {{1, 256, 2, 2}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 442464.},
|
||||
/* GFLOPS 0.000 x 2 = 0.001 */ {{1, 1}, {{1, 128, 5, 5}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 411200.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{3, 3}, {{1, 128, 5, 5}}, 12, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 691500.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 640, 2, 2}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 655872.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 512, 5, 5}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 615000.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 512, 5, 5}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 615000.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 128, 3, 3}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 592128.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 256, 3, 3}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 590976.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 256, 3, 3}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 590976.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 256, 3, 3}}, 126, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 581742.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 256, 4, 4}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 525312.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 192, 5, 5}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 308000.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 128, 2, 2}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 263168.},
|
||||
/* GFLOPS 0.000 x 2 = 0.000 */ {{1, 1}, {{1, 256, 2, 2}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 131328.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 256, 2, 2}}, 126, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 258552.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 1024, 1, 1}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 196704.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{3, 3}, {{1, 64, 2, 2}}, 128, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 147584.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{3, 3}, {{1, 64, 2, 2}}, 128, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 147584.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{3, 3}, {{1, 64, 2, 2}}, 128, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 147584.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 736, 1, 1}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 141408.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 128, 1, 1}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 140322.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 256, 2, 2}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 131328.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 256, 2, 2}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 131328.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 256, 3, 3}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 110808.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 256, 3, 3}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 110808.},
|
||||
/* GFLOPS 0.000 x 2 = 0.000 */ {{3, 3}, {{1, 128, 1, 1}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 55320.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{3, 3}, {{1, 64, 2, 2}}, 64, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 73792.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 256, 2, 2}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 49248.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 256, 2, 2}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 49248.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 128, 1, 1}}, 126, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 32382.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 64, 1, 1}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 16512.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 128, 1, 1}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6168.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 128, 1, 1}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 6168.}
|
||||
};
|
||||
struct ConvParamID
|
||||
{
|
||||
int data[] = {count, nplanes, height, width};
|
||||
return MatShape(data, data+4);
|
||||
enum {
|
||||
CONV_0 = 0,
|
||||
CONV_100 = 100,
|
||||
CONV_LAST = sizeof(testConvolutionConfigs) / sizeof(testConvolutionConfigs[0])
|
||||
};
|
||||
int val_; \
|
||||
ConvParamID(int val = 0) : val_(val) {}
|
||||
operator int() const { return val_; }
|
||||
static ::testing::internal::ParamGenerator<ConvParamID> all()
|
||||
{
|
||||
#if 0
|
||||
enum { NUM = (int)CONV_LAST };
|
||||
#else
|
||||
enum { NUM = (int)CONV_100 };
|
||||
#endif
|
||||
ConvParamID v_[NUM]; for (int i = 0; i < NUM; ++i) { v_[i] = ConvParamID(i); } // reduce generated code size
|
||||
return ::testing::ValuesIn(v_, v_ + NUM);
|
||||
}
|
||||
}; \
|
||||
static inline void PrintTo(const ConvParamID& v, std::ostream* os)
|
||||
{
|
||||
CV_Assert((int)v >= 0); CV_Assert((int)v < ConvParamID::CONV_LAST);
|
||||
const ConvParam_t& p = testConvolutionConfigs[(int)v];
|
||||
|
||||
*os << "GFLOPS=" << cv::format("%.3f", p.declared_flops * 1e-9)
|
||||
<< ", K=" << (Size)p.kernel
|
||||
<< ", IN={" << p.shapeIn.dims[0] << ", " << p.shapeIn.dims[1] << ", " << p.shapeIn.dims[2] << ", " << p.shapeIn.dims[3] << "}"
|
||||
<< ", OCN=" << p.outCN;
|
||||
if (p.groups > 1)
|
||||
*os << ", G=" << p.groups;
|
||||
if (((Size)p.stride).area() != 1)
|
||||
*os << ", S=" << ((Size)p.stride);
|
||||
if (((Size)p.dilation).area() != 1)
|
||||
*os << ", D=" << ((Size)p.dilation);
|
||||
if (((Size)p.pad).area() != 0)
|
||||
*os << ", P=" << ((Size)p.pad);
|
||||
if (((Size)p.padAdjust).area() != 0)
|
||||
*os << ", PAdj=" << ((Size)p.padAdjust);
|
||||
if (!((std::string)p.padMode).empty())
|
||||
*os << ", PM=" << ((std::string)p.padMode);
|
||||
if (p.hasBias)
|
||||
*os << ", BIAS";
|
||||
}
|
||||
|
||||
PERF_TEST_P( ConvolutionPerfTest, perf, Combine(
|
||||
Values(Size(1, 1), Size(3, 3), Size(5, 5), Size(11, 11)),
|
||||
Values(make_pair(blobShape(1, 4, 224, 224), 64),
|
||||
make_pair(blobShape(1, 64, 112, 122), 128),
|
||||
make_pair(blobShape(1, 256, 28, 28), 512)),
|
||||
GroupSize::all(),
|
||||
StrideSize::all())
|
||||
)
|
||||
|
||||
|
||||
typedef tuple<ConvParamID, tuple<Backend, Target> > ConvTestParam_t;
|
||||
typedef TestBaseWithParam<ConvTestParam_t> Conv;
|
||||
|
||||
PERF_TEST_P_(Conv, conv)
|
||||
{
|
||||
RNG rng(0);
|
||||
int test_id = (int)get<0>(GetParam());
|
||||
ASSERT_GE(test_id, 0); ASSERT_LT(test_id, ConvParamID::CONV_LAST);
|
||||
const ConvParam_t& params = testConvolutionConfigs[test_id];
|
||||
double declared_flops = params.declared_flops;
|
||||
Size kernel = params.kernel;
|
||||
MatShape inputShape = MatShape(params.shapeIn.dims, params.shapeIn.dims + 4);
|
||||
int outChannels = params.outCN;
|
||||
int groups = params.groups;
|
||||
Size stride = params.stride;
|
||||
Size dilation = params.dilation;
|
||||
Size pad = params.pad;
|
||||
Size padAdjust = params.padAdjust;
|
||||
std::string padMode(params.padMode);
|
||||
bool hasBias = params.hasBias;
|
||||
Backend backendId = get<0>(get<1>(GetParam()));
|
||||
Target targetId = get<1>(get<1>(GetParam()));
|
||||
|
||||
ConvParam params = GetParam();
|
||||
int ksz = get<0>(params).width;
|
||||
MatShape inpShape = get<1>(params).first;
|
||||
int outCn = get<1>(params).second;
|
||||
int groups = get<2>(params);
|
||||
int stride = (ksz >= 11) ? 4 : (int)get<3>(params);
|
||||
int inChannels = inputShape[1];
|
||||
Size inSize(inputShape[3], inputShape[2]);
|
||||
|
||||
int inpCn = inpShape[1];
|
||||
int wgtSize[] = { outCn, inpCn/groups, ksz, ksz };
|
||||
int biasSize[] = { outCn, 1, 1, 1 };
|
||||
const int wtype = CV_32F;
|
||||
Mat wgtBlob(4, wgtSize, wtype), biasBlob(4, biasSize, wtype);
|
||||
Mat inpBlob(4, &inpShape[0], wtype);
|
||||
rng.fill(biasBlob, RNG::UNIFORM, -1, +1);
|
||||
rng.fill(wgtBlob, RNG::UNIFORM, -1, +1);
|
||||
rng.fill(inpBlob, RNG::UNIFORM, -1, +1);
|
||||
int sz[] = {outChannels, inChannels / groups, kernel.height, kernel.width};
|
||||
Mat weights(4, &sz[0], CV_32F);
|
||||
randu(weights, -1.0f, 1.0f);
|
||||
|
||||
LayerParams lp;
|
||||
lp.set("num_output", outCn);
|
||||
lp.set("kernel_w", kernel.width);
|
||||
lp.set("kernel_h", kernel.height);
|
||||
lp.set("pad_w", pad.width);
|
||||
lp.set("pad_h", pad.height);
|
||||
if (padAdjust.width > 0 || padAdjust.height > 0)
|
||||
{
|
||||
lp.set("adj_w", padAdjust.width);
|
||||
lp.set("adj_h", padAdjust.height);
|
||||
}
|
||||
if (!padMode.empty())
|
||||
lp.set("pad_mode", padMode);
|
||||
lp.set("stride_w", stride.width);
|
||||
lp.set("stride_h", stride.height);
|
||||
lp.set("dilation_w", dilation.width);
|
||||
lp.set("dilation_h", dilation.height);
|
||||
lp.set("num_output", outChannels);
|
||||
lp.set("group", groups);
|
||||
lp.set("stride", stride);
|
||||
lp.set("kernel_size", ksz);
|
||||
lp.blobs.reserve(2);
|
||||
lp.blobs.push_back(wgtBlob);
|
||||
lp.blobs.push_back(biasBlob);
|
||||
|
||||
std::vector<Mat*> inpBlobs(1, &inpBlob);
|
||||
std::vector<Mat> outBlobs, internalBlobs;
|
||||
|
||||
Ptr<Layer> layer = cv::dnn::LayerFactory::createLayerInstance("Convolution", lp);
|
||||
std::vector<MatShape> inputShapes(1, shape(inpBlob)), outShapes, internals;
|
||||
layer->getMemoryShapes(inputShapes, 0, outShapes, internals);
|
||||
for (size_t i = 0; i < outShapes.size(); i++)
|
||||
lp.set("bias_term", hasBias);
|
||||
lp.type = "Convolution";
|
||||
lp.name = "testLayer";
|
||||
lp.blobs.push_back(weights);
|
||||
if (hasBias)
|
||||
{
|
||||
outBlobs.push_back(Mat(outShapes[i], CV_32F));
|
||||
Mat bias(1, outChannels, CV_32F);
|
||||
randu(bias, -1.0f, 1.0f);
|
||||
lp.blobs.push_back(bias);
|
||||
}
|
||||
for (size_t i = 0; i < internals.size(); i++)
|
||||
int inpSz[] = {1, inChannels, inSize.height, inSize.width};
|
||||
Mat input(4, &inpSz[0], CV_32F);
|
||||
randu(input, -1.0f, 1.0f);
|
||||
|
||||
Net net;
|
||||
net.addLayerToPrev(lp.name, lp.type, lp);
|
||||
|
||||
net.setInput(input);
|
||||
net.setPreferableBackend(backendId);
|
||||
net.setPreferableTarget(targetId);
|
||||
|
||||
// warmup
|
||||
Mat output = net.forward();
|
||||
|
||||
MatShape netInputShape = shape(input);
|
||||
size_t weightsMemory = 0, blobsMemory = 0;
|
||||
net.getMemoryConsumption(netInputShape, weightsMemory, blobsMemory);
|
||||
int64 flops = net.getFLOPS(netInputShape);
|
||||
CV_Assert(flops > 0);
|
||||
|
||||
std::cout
|
||||
<< "IN=" << divUp(input.total() * input.elemSize(), 1u<<10) << " Kb " << netInputShape
|
||||
<< " OUT=" << divUp(output.total() * output.elemSize(), 1u<<10) << " Kb " << shape(output)
|
||||
<< " Weights(parameters): " << divUp(weightsMemory, 1u<<10) << " Kb"
|
||||
<< " MFLOPS=" << flops * 1e-6 << std::endl;
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
internalBlobs.push_back(Mat());
|
||||
if (total(internals[i]))
|
||||
internalBlobs.back().create(internals[i], CV_32F);
|
||||
Mat res = net.forward();
|
||||
}
|
||||
|
||||
layer->finalize(inpBlobs, outBlobs);
|
||||
|
||||
Mat inpBlob2D = inpBlob.reshape(1, outCn);
|
||||
Mat wgtBlob2D = wgtBlob.reshape(1, outCn*(inpCn/groups));
|
||||
Mat outBlob2D = outBlobs[0].reshape(1, outBlobs[0].size[0]);
|
||||
declare.in(inpBlob2D, wgtBlob2D, WARMUP_RNG).out(outBlob2D);
|
||||
|
||||
layer->forward(inpBlobs, outBlobs, internalBlobs); /// warmup
|
||||
|
||||
PERF_SAMPLE_BEGIN()
|
||||
layer->forward(inpBlobs, outBlobs, internalBlobs);
|
||||
PERF_SAMPLE_END()
|
||||
|
||||
EXPECT_NEAR(flops, declared_flops, declared_flops * 1e-6);
|
||||
SANITY_CHECK_NOTHING();
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Conv, Combine(
|
||||
ConvParamID::all(),
|
||||
dnnBackendsAndTargets(false, false) // defined in ../test/test_common.hpp
|
||||
));
|
||||
|
||||
} // namespace
|
||||
|
@ -14,10 +14,7 @@
|
||||
|
||||
namespace opencv_test {
|
||||
|
||||
CV_ENUM(DNNBackend, DNN_BACKEND_DEFAULT, DNN_BACKEND_HALIDE, DNN_BACKEND_INFERENCE_ENGINE, DNN_BACKEND_OPENCV)
|
||||
CV_ENUM(DNNTarget, DNN_TARGET_CPU, DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16, DNN_TARGET_MYRIAD)
|
||||
|
||||
class DNNTestNetwork : public ::perf::TestBaseWithParam< tuple<DNNBackend, DNNTarget> >
|
||||
class DNNTestNetwork : public ::perf::TestBaseWithParam< tuple<Backend, Target> >
|
||||
{
|
||||
public:
|
||||
dnn::Backend backend;
|
||||
@ -269,22 +266,6 @@ PERF_TEST_P_(DNNTestNetwork, Inception_v2_Faster_RCNN)
|
||||
Mat(cv::Size(800, 600), CV_32FC3));
|
||||
}
|
||||
|
||||
const tuple<DNNBackend, DNNTarget> testCases[] = {
|
||||
#ifdef HAVE_HALIDE
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_HALIDE, DNN_TARGET_CPU),
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL),
|
||||
#endif
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU),
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL),
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16),
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD),
|
||||
#endif
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_CPU),
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL),
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16)
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/*nothing*/, DNNTestNetwork, testing::ValuesIn(testCases));
|
||||
INSTANTIATE_TEST_CASE_P(/*nothing*/, DNNTestNetwork, dnnBackendsAndTargets());
|
||||
|
||||
} // namespace
|
||||
|
@ -4,6 +4,8 @@
|
||||
#include <opencv2/ts.hpp>
|
||||
#include <opencv2/dnn.hpp>
|
||||
|
||||
#include "../test/test_common.hpp"
|
||||
|
||||
namespace opencv_test {
|
||||
using namespace perf;
|
||||
using namespace cv::dnn;
|
||||
|
@ -1676,14 +1676,6 @@ struct Net::Impl
|
||||
// with the current layer if they follow it. Normally, the are fused with the convolution layer,
|
||||
// but some of them (like activation) may be fused with fully-connected, elemwise (+) and
|
||||
// some other layers.
|
||||
|
||||
// TODO: OpenCL target support more fusion styles.
|
||||
if ( preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
||||
(!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" &&
|
||||
ld.layerInstance->type != "MVN" && ld.layerInstance->type != "Pooling" &&
|
||||
ld.layerInstance->type != "Concat")) )
|
||||
continue;
|
||||
|
||||
Ptr<Layer>& currLayer = ld.layerInstance;
|
||||
if( ld.consumers.size() == 1 && pinsToKeep.count(LayerPin(lid, 0)) == 0 )
|
||||
{
|
||||
@ -1717,6 +1709,13 @@ struct Net::Impl
|
||||
if (preferableBackend != DNN_BACKEND_OPENCV)
|
||||
continue; // Go to the next layer.
|
||||
|
||||
// TODO: OpenCL target support more fusion styles.
|
||||
if ( preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
||||
(!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" &&
|
||||
ld.layerInstance->type != "MVN" && ld.layerInstance->type != "Pooling" &&
|
||||
ld.layerInstance->type != "Concat")) )
|
||||
continue;
|
||||
|
||||
while (nextData)
|
||||
{
|
||||
// For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh
|
||||
@ -2693,8 +2692,7 @@ void Net::setInput(InputArray blob, const String& name, double scalefactor, cons
|
||||
Mat Net::getParam(LayerId layer, int numParam)
|
||||
{
|
||||
LayerData &ld = impl->getLayerData(layer);
|
||||
|
||||
std::vector<Mat> &layerBlobs = ld.layerInstance->blobs;
|
||||
std::vector<Mat> &layerBlobs = ld.getLayerInstance()->blobs;
|
||||
CV_Assert(numParam < (int)layerBlobs.size());
|
||||
return layerBlobs[numParam];
|
||||
}
|
||||
@ -2703,7 +2701,7 @@ void Net::setParam(LayerId layer, int numParam, const Mat &blob)
|
||||
{
|
||||
LayerData &ld = impl->getLayerData(layer);
|
||||
|
||||
std::vector<Mat> &layerBlobs = ld.layerInstance->blobs;
|
||||
std::vector<Mat> &layerBlobs = ld.getLayerInstance()->blobs;
|
||||
CV_Assert(numParam < (int)layerBlobs.size());
|
||||
//we don't make strong checks, use this function carefully
|
||||
layerBlobs[numParam] = blob;
|
||||
|
@ -350,12 +350,14 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
void fuseWeights(const Mat& w, const Mat& b)
|
||||
void fuseWeights(const Mat& w_, const Mat& b_)
|
||||
{
|
||||
// Convolution weights have OIHW data layout. Parameters fusion in case of
|
||||
// (conv(I) + b1 ) * w + b2
|
||||
// means to replace convolution's weights to [w*conv(I)] and bias to [b1 * w + b2]
|
||||
const int outCn = weightsMat.size[0];
|
||||
Mat w = w_.total() == 1 ? Mat(1, outCn, CV_32F, Scalar(w_.at<float>(0))) : w_;
|
||||
Mat b = b_.total() == 1 ? Mat(1, outCn, CV_32F, Scalar(b_.at<float>(0))) : b_;
|
||||
CV_Assert_N(!weightsMat.empty(), biasvec.size() == outCn + 2,
|
||||
w.empty() || outCn == w.total(), b.empty() || outCn == b.total());
|
||||
|
||||
|
@ -41,6 +41,7 @@
|
||||
//M*/
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include "../op_inf_engine.hpp"
|
||||
#include "layers_common.hpp"
|
||||
|
||||
namespace cv
|
||||
@ -64,6 +65,12 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_INFERENCE_ENGINE && crop_ranges.size() == 4;
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
@ -109,7 +116,11 @@ public:
|
||||
offset_final[i] = offset[i - start_axis];
|
||||
}
|
||||
|
||||
crop_ranges.resize(dims, Range::all());
|
||||
crop_ranges.resize(dims);
|
||||
for (int i = 0; i < start_axis; i++)
|
||||
{
|
||||
crop_ranges[i] = Range(0, inpBlob.size[i]);
|
||||
}
|
||||
for (int i = start_axis; i < dims; i++)
|
||||
{
|
||||
if (offset_final[i] < 0 || offset_final[i] + inpSzBlob.size[i] > inpBlob.size[i])
|
||||
@ -138,6 +149,38 @@ public:
|
||||
input(&crop_ranges[0]).copyTo(output);
|
||||
}
|
||||
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
InferenceEngine::LayerParams lp;
|
||||
lp.name = name;
|
||||
lp.type = "Crop";
|
||||
lp.precision = InferenceEngine::Precision::FP32;
|
||||
std::shared_ptr<InferenceEngine::CropLayer> ieLayer(new InferenceEngine::CropLayer(lp));
|
||||
|
||||
CV_Assert(crop_ranges.size() == 4);
|
||||
|
||||
ieLayer->axis.push_back(0); // batch
|
||||
ieLayer->offset.push_back(crop_ranges[0].start);
|
||||
ieLayer->dim.push_back(crop_ranges[0].end - crop_ranges[0].start);
|
||||
|
||||
ieLayer->axis.push_back(1); // channels
|
||||
ieLayer->offset.push_back(crop_ranges[1].start);
|
||||
ieLayer->dim.push_back(crop_ranges[1].end - crop_ranges[1].start);
|
||||
|
||||
ieLayer->axis.push_back(3); // height
|
||||
ieLayer->offset.push_back(crop_ranges[2].start);
|
||||
ieLayer->dim.push_back(crop_ranges[2].end - crop_ranges[2].start);
|
||||
|
||||
ieLayer->axis.push_back(2); // width
|
||||
ieLayer->offset.push_back(crop_ranges[3].start);
|
||||
ieLayer->dim.push_back(crop_ranges[3].end - crop_ranges[3].start);
|
||||
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
std::vector<Range> crop_ranges;
|
||||
};
|
||||
|
||||
|
@ -161,6 +161,16 @@ public:
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
virtual bool tryFuse(Ptr<dnn::Layer>& top) CV_OVERRIDE
|
||||
{
|
||||
return func.tryFuse(top);
|
||||
}
|
||||
|
||||
void getScaleShift(Mat& scale_, Mat& shift_) const CV_OVERRIDE
|
||||
{
|
||||
func.getScaleShift(scale_, shift_);
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
@ -343,6 +353,10 @@ struct ReLUFunctor
|
||||
}
|
||||
#endif // HAVE_INF_ENGINE
|
||||
|
||||
bool tryFuse(Ptr<dnn::Layer>&) { return false; }
|
||||
|
||||
void getScaleShift(Mat&, Mat&) const {}
|
||||
|
||||
int64 getFLOPSPerElement() const { return 1; }
|
||||
};
|
||||
|
||||
@ -448,6 +462,10 @@ struct ReLU6Functor
|
||||
}
|
||||
#endif // HAVE_INF_ENGINE
|
||||
|
||||
bool tryFuse(Ptr<dnn::Layer>&) { return false; }
|
||||
|
||||
void getScaleShift(Mat&, Mat&) const {}
|
||||
|
||||
int64 getFLOPSPerElement() const { return 2; }
|
||||
};
|
||||
|
||||
@ -518,6 +536,10 @@ struct TanHFunctor
|
||||
}
|
||||
#endif // HAVE_INF_ENGINE
|
||||
|
||||
bool tryFuse(Ptr<dnn::Layer>&) { return false; }
|
||||
|
||||
void getScaleShift(Mat&, Mat&) const {}
|
||||
|
||||
int64 getFLOPSPerElement() const { return 1; }
|
||||
};
|
||||
|
||||
@ -588,6 +610,10 @@ struct SigmoidFunctor
|
||||
}
|
||||
#endif // HAVE_INF_ENGINE
|
||||
|
||||
bool tryFuse(Ptr<dnn::Layer>&) { return false; }
|
||||
|
||||
void getScaleShift(Mat&, Mat&) const {}
|
||||
|
||||
int64 getFLOPSPerElement() const { return 3; }
|
||||
};
|
||||
|
||||
@ -659,6 +685,10 @@ struct ELUFunctor
|
||||
}
|
||||
#endif // HAVE_INF_ENGINE
|
||||
|
||||
bool tryFuse(Ptr<dnn::Layer>&) { return false; }
|
||||
|
||||
void getScaleShift(Mat&, Mat&) const {}
|
||||
|
||||
int64 getFLOPSPerElement() const { return 2; }
|
||||
};
|
||||
|
||||
@ -727,6 +757,10 @@ struct AbsValFunctor
|
||||
}
|
||||
#endif // HAVE_INF_ENGINE
|
||||
|
||||
bool tryFuse(Ptr<dnn::Layer>&) { return false; }
|
||||
|
||||
void getScaleShift(Mat&, Mat&) const {}
|
||||
|
||||
int64 getFLOPSPerElement() const { return 1; }
|
||||
};
|
||||
|
||||
@ -775,6 +809,10 @@ struct BNLLFunctor
|
||||
}
|
||||
#endif // HAVE_INF_ENGINE
|
||||
|
||||
bool tryFuse(Ptr<dnn::Layer>&) { return false; }
|
||||
|
||||
void getScaleShift(Mat&, Mat&) const {}
|
||||
|
||||
int64 getFLOPSPerElement() const { return 5; }
|
||||
};
|
||||
|
||||
@ -875,15 +913,51 @@ struct PowerFunctor
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
|
||||
{
|
||||
lp.type = "Power";
|
||||
std::shared_ptr<InferenceEngine::PowerLayer> ieLayer(new InferenceEngine::PowerLayer(lp));
|
||||
ieLayer->power = power;
|
||||
ieLayer->scale = scale;
|
||||
ieLayer->offset = shift;
|
||||
return ieLayer;
|
||||
if (power == 1.0f && scale == 1.0f && shift == 0.0f)
|
||||
{
|
||||
// It looks like there is a bug in Inference Engine for DNN_TARGET_OPENCL and DNN_TARGET_OPENCL_FP16
|
||||
// if power layer do nothing so we replace it to Identity.
|
||||
lp.type = "Split";
|
||||
return std::shared_ptr<InferenceEngine::SplitLayer>(new InferenceEngine::SplitLayer(lp));
|
||||
}
|
||||
else
|
||||
{
|
||||
lp.type = "Power";
|
||||
std::shared_ptr<InferenceEngine::PowerLayer> ieLayer(new InferenceEngine::PowerLayer(lp));
|
||||
ieLayer->power = power;
|
||||
ieLayer->scale = scale;
|
||||
ieLayer->offset = shift;
|
||||
return ieLayer;
|
||||
}
|
||||
}
|
||||
#endif // HAVE_INF_ENGINE
|
||||
|
||||
bool tryFuse(Ptr<dnn::Layer>& top)
|
||||
{
|
||||
if (power != 1.0f && shift != 0.0f)
|
||||
return false;
|
||||
|
||||
Mat w, b;
|
||||
top->getScaleShift(w, b);
|
||||
if ((w.empty() && b.empty()) || w.total() > 1 || b.total() > 1)
|
||||
return false;
|
||||
|
||||
float nextScale = w.empty() ? 1.0f : w.at<float>(0);
|
||||
float nextShift = b.empty() ? 0.0f : b.at<float>(0);
|
||||
scale = std::pow(scale, power) * nextScale;
|
||||
shift = nextScale * shift + nextShift;
|
||||
return true;
|
||||
}
|
||||
|
||||
void getScaleShift(Mat& _scale, Mat& _shift) const
|
||||
{
|
||||
if (power == 1.0f)
|
||||
{
|
||||
_scale = Mat(1, 1, CV_32F, Scalar(scale));
|
||||
_shift = Mat(1, 1, CV_32F, Scalar(shift));
|
||||
}
|
||||
}
|
||||
|
||||
int64 getFLOPSPerElement() const { return power == 1 ? 2 : 10; }
|
||||
};
|
||||
|
||||
@ -989,6 +1063,10 @@ struct ChannelsPReLUFunctor
|
||||
}
|
||||
#endif // HAVE_INF_ENGINE
|
||||
|
||||
bool tryFuse(Ptr<dnn::Layer>&) { return false; }
|
||||
|
||||
void getScaleShift(Mat&, Mat&) const {}
|
||||
|
||||
int64 getFLOPSPerElement() const { return 1; }
|
||||
};
|
||||
|
||||
|
@ -83,12 +83,6 @@ public:
|
||||
int startAxis = clamp(_startAxis, numAxes);
|
||||
int endAxis = clamp(_endAxis, numAxes);
|
||||
|
||||
for (size_t i = 1; i < inputs.size(); i++)
|
||||
{
|
||||
CV_Assert(inputs[i] == inputs[0]);
|
||||
}
|
||||
|
||||
|
||||
CV_Assert(startAxis >= 0);
|
||||
CV_Assert(endAxis >= startAxis && endAxis < (int)numAxes);
|
||||
|
||||
|
@ -350,17 +350,33 @@ public:
|
||||
inshape = shape(outerSize, innerSize);
|
||||
outshape = shape(outerSize, numOutput);
|
||||
|
||||
UMat srcMat, dstMat;
|
||||
UMat srcMat, dstMat, srcMat_fp32, dstMat_fp32;
|
||||
srcMat = inputs[i].reshape(1, inshape.size(), &inshape[0]);
|
||||
dstMat = outputs[i].reshape(1, outshape.size(), &outshape[0]);
|
||||
|
||||
cv::gemm(srcMat, weights, 1, noArray(), 0, dstMat, GEMM_2_T);
|
||||
if (use_half)
|
||||
{
|
||||
convertFp16(srcMat, srcMat_fp32);
|
||||
convertFp16(dstMat, dstMat_fp32);
|
||||
}
|
||||
else
|
||||
{
|
||||
srcMat_fp32 = srcMat;
|
||||
dstMat_fp32 = dstMat;
|
||||
}
|
||||
|
||||
cv::gemm(srcMat_fp32, weights, 1, noArray(), 0, dstMat_fp32, GEMM_2_T);
|
||||
|
||||
if (bias)
|
||||
{
|
||||
UMat biasOnesMat = UMat::ones(outerSize, 1, umat_blobs[0].type());
|
||||
UMat& biases = umat_blobs[1];
|
||||
cv::gemm(biasOnesMat, biases, 1, dstMat, 1, dstMat, 0);
|
||||
cv::gemm(biasOnesMat, biases, 1, dstMat_fp32, 1, dstMat_fp32, 0);
|
||||
}
|
||||
if (use_half)
|
||||
{
|
||||
convertFp16(srcMat_fp32, srcMat);
|
||||
convertFp16(dstMat_fp32, dstMat);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -453,8 +453,8 @@ public:
|
||||
outputPtr = outputs[0].ptr<float>(0, 1);
|
||||
if(_variance.size() == 1)
|
||||
{
|
||||
Mat secondChannel(outputs[0].size[2], outputs[0].size[3], CV_32F, outputPtr);
|
||||
secondChannel.setTo(Scalar(_variance[0]));
|
||||
Mat secondChannel(1, outputs[0].size[2], CV_32F, outputPtr);
|
||||
secondChannel.setTo(Scalar::all(_variance[0]));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -161,6 +161,7 @@ InfEngineBackendNet::InfEngineBackendNet(InferenceEngine::CNNNetwork& net)
|
||||
inputs = net.getInputsInfo();
|
||||
outputs = net.getOutputsInfo();
|
||||
layers.resize(net.layerCount()); // A hack to execute InfEngineBackendNet::layerCount correctly.
|
||||
netOwner = net;
|
||||
}
|
||||
|
||||
void InfEngineBackendNet::Release() noexcept
|
||||
|
@ -131,6 +131,8 @@ private:
|
||||
InferenceEngine::InferencePlugin plugin;
|
||||
InferenceEngine::ExecutableNetwork netExec;
|
||||
InferenceEngine::InferRequest infRequest;
|
||||
// In case of models from Model Optimizer we need to manage their lifetime.
|
||||
InferenceEngine::CNNNetwork netOwner;
|
||||
|
||||
std::string name;
|
||||
|
||||
|
@ -782,6 +782,108 @@ void releaseTensor(tensorflow::TensorProto* tensor)
|
||||
}
|
||||
}
|
||||
|
||||
static void permute(google::protobuf::RepeatedPtrField<tensorflow::NodeDef>* data,
|
||||
const std::vector<int>& indices)
|
||||
{
|
||||
const int num = data->size();
|
||||
CV_Assert(num == indices.size());
|
||||
|
||||
std::vector<int> elemIdToPos(num);
|
||||
std::vector<int> posToElemId(num);
|
||||
for (int i = 0; i < num; ++i)
|
||||
{
|
||||
elemIdToPos[i] = i;
|
||||
posToElemId[i] = i;
|
||||
}
|
||||
for (int i = 0; i < num; ++i)
|
||||
{
|
||||
int elemId = indices[i];
|
||||
int pos = elemIdToPos[elemId];
|
||||
if (pos != i)
|
||||
{
|
||||
data->SwapElements(i, pos);
|
||||
const int swappedElemId = posToElemId[i];
|
||||
elemIdToPos[elemId] = i;
|
||||
elemIdToPos[swappedElemId] = pos;
|
||||
|
||||
posToElemId[i] = elemId;
|
||||
posToElemId[pos] = swappedElemId;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Is based on tensorflow::graph_transforms::SortByExecutionOrder
|
||||
void sortByExecutionOrder(tensorflow::GraphDef& net)
|
||||
{
|
||||
// Maps node's name to index at net.node() list.
|
||||
std::map<std::string, int> nodesMap;
|
||||
std::map<std::string, int>::iterator nodesMapIt;
|
||||
for (int i = 0; i < net.node_size(); ++i)
|
||||
{
|
||||
const tensorflow::NodeDef& node = net.node(i);
|
||||
nodesMap.insert(std::make_pair(node.name(), i));
|
||||
}
|
||||
|
||||
// Indices of nodes which use specific node as input.
|
||||
std::vector<std::vector<int> > edges(nodesMap.size());
|
||||
std::vector<int> numRefsToAdd(nodesMap.size(), 0);
|
||||
std::vector<int> nodesToAdd;
|
||||
for (int i = 0; i < net.node_size(); ++i)
|
||||
{
|
||||
const tensorflow::NodeDef& node = net.node(i);
|
||||
for (int j = 0; j < node.input_size(); ++j)
|
||||
{
|
||||
std::string inpName = node.input(j);
|
||||
inpName = inpName.substr(0, inpName.rfind(':'));
|
||||
inpName = inpName.substr(inpName.find('^') + 1);
|
||||
|
||||
nodesMapIt = nodesMap.find(inpName);
|
||||
CV_Assert(nodesMapIt != nodesMap.end());
|
||||
edges[nodesMapIt->second].push_back(i);
|
||||
}
|
||||
if (node.input_size() == 0)
|
||||
nodesToAdd.push_back(i);
|
||||
else
|
||||
{
|
||||
if (node.op() == "Merge" || node.op() == "RefMerge")
|
||||
{
|
||||
int numControlEdges = 0;
|
||||
for (int j = 0; j < node.input_size(); ++j)
|
||||
numControlEdges += node.input(j)[0] == '^';
|
||||
numRefsToAdd[i] = numControlEdges + 1;
|
||||
}
|
||||
else
|
||||
numRefsToAdd[i] = node.input_size();
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int> permIds;
|
||||
permIds.reserve(net.node_size());
|
||||
while (!nodesToAdd.empty())
|
||||
{
|
||||
int nodeToAdd = nodesToAdd.back();
|
||||
nodesToAdd.pop_back();
|
||||
|
||||
permIds.push_back(nodeToAdd);
|
||||
// std::cout << net.node(nodeToAdd).name() << '\n';
|
||||
|
||||
for (int i = 0; i < edges[nodeToAdd].size(); ++i)
|
||||
{
|
||||
int consumerId = edges[nodeToAdd][i];
|
||||
if (numRefsToAdd[consumerId] > 0)
|
||||
{
|
||||
if (numRefsToAdd[consumerId] == 1)
|
||||
nodesToAdd.push_back(consumerId);
|
||||
else
|
||||
CV_Assert(numRefsToAdd[consumerId] >= 0);
|
||||
numRefsToAdd[consumerId] -= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
CV_Assert(permIds.size() == net.node_size());
|
||||
permute(net.mutable_node(), permIds);
|
||||
}
|
||||
|
||||
CV__DNN_INLINE_NS_END
|
||||
}} // namespace dnn, namespace cv
|
||||
|
||||
|
@ -25,6 +25,8 @@ Mat getTensorContent(const tensorflow::TensorProto &tensor);
|
||||
|
||||
void releaseTensor(tensorflow::TensorProto* tensor);
|
||||
|
||||
void sortByExecutionOrder(tensorflow::GraphDef& net);
|
||||
|
||||
CV__DNN_INLINE_NS_END
|
||||
}} // namespace dnn, namespace cv
|
||||
|
||||
|
@ -1950,5 +1950,34 @@ Net readNetFromTensorflow(const std::vector<uchar>& bufferModel, const std::vect
|
||||
bufferConfigPtr, bufferConfig.size());
|
||||
}
|
||||
|
||||
void writeTextGraph(const String& _model, const String& output)
|
||||
{
|
||||
String model = _model;
|
||||
const std::string modelExt = model.substr(model.rfind('.') + 1);
|
||||
if (modelExt != "pb")
|
||||
CV_Error(Error::StsNotImplemented, "Only TensorFlow models support export to text file");
|
||||
|
||||
tensorflow::GraphDef net;
|
||||
ReadTFNetParamsFromBinaryFileOrDie(model.c_str(), &net);
|
||||
|
||||
sortByExecutionOrder(net);
|
||||
|
||||
RepeatedPtrField<tensorflow::NodeDef>::iterator it;
|
||||
for (it = net.mutable_node()->begin(); it != net.mutable_node()->end(); ++it)
|
||||
{
|
||||
if (it->op() == "Const")
|
||||
{
|
||||
it->mutable_attr()->at("value").mutable_tensor()->clear_tensor_content();
|
||||
}
|
||||
}
|
||||
|
||||
std::string content;
|
||||
google::protobuf::TextFormat::PrintToString(net, &content);
|
||||
|
||||
std::ofstream ofs(output.c_str());
|
||||
ofs << content;
|
||||
ofs.close();
|
||||
}
|
||||
|
||||
CV__DNN_INLINE_NS_END
|
||||
}} // namespace
|
||||
|
@ -161,7 +161,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_v1_TensorFlow)
|
||||
if (backend == DNN_BACKEND_HALIDE)
|
||||
throw SkipTestException("");
|
||||
Mat sample = imread(findDataFile("dnn/street.png", false));
|
||||
Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
|
||||
Mat inp = blobFromImage(sample, 1.0f, Size(300, 300), Scalar(), false);
|
||||
float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.011 : 0.0;
|
||||
float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.06 : 0.0;
|
||||
processNet("dnn/ssd_mobilenet_v1_coco_2017_11_17.pb", "dnn/ssd_mobilenet_v1_coco_2017_11_17.pbtxt",
|
||||
@ -173,7 +173,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_v2_TensorFlow)
|
||||
if (backend == DNN_BACKEND_HALIDE)
|
||||
throw SkipTestException("");
|
||||
Mat sample = imread(findDataFile("dnn/street.png", false));
|
||||
Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
|
||||
Mat inp = blobFromImage(sample, 1.0f, Size(300, 300), Scalar(), false);
|
||||
float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.011 : 0.0;
|
||||
float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.062 : 0.0;
|
||||
processNet("dnn/ssd_mobilenet_v2_coco_2018_03_29.pb", "dnn/ssd_mobilenet_v2_coco_2018_03_29.pbtxt",
|
||||
@ -247,8 +247,8 @@ TEST_P(DNNTestNetwork, Inception_v2_SSD_TensorFlow)
|
||||
if (backend == DNN_BACKEND_HALIDE)
|
||||
throw SkipTestException("");
|
||||
Mat sample = imread(findDataFile("dnn/street.png", false));
|
||||
Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
|
||||
float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.008 : 0.0;
|
||||
Mat inp = blobFromImage(sample, 1.0f, Size(300, 300), Scalar(), false);
|
||||
float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.015 : 0.0;
|
||||
float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.0731 : 0.0;
|
||||
processNet("dnn/ssd_inception_v2_coco_2017_11_17.pb", "dnn/ssd_inception_v2_coco_2017_11_17.pbtxt",
|
||||
inp, "detection_out", "", l1, lInf);
|
||||
@ -285,21 +285,6 @@ TEST_P(DNNTestNetwork, FastNeuralStyle_eccv16)
|
||||
processNet("dnn/fast_neural_style_eccv16_starry_night.t7", "", inp, "", "", l1, lInf);
|
||||
}
|
||||
|
||||
const tuple<Backend, Target> testCases[] = {
|
||||
#ifdef HAVE_HALIDE
|
||||
tuple<Backend, Target>(DNN_BACKEND_HALIDE, DNN_TARGET_CPU),
|
||||
tuple<Backend, Target>(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL),
|
||||
#endif
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU),
|
||||
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL),
|
||||
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16),
|
||||
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD),
|
||||
#endif
|
||||
tuple<Backend, Target>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL),
|
||||
tuple<Backend, Target>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16)
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/*nothing*/, DNNTestNetwork, testing::ValuesIn(testCases));
|
||||
INSTANTIATE_TEST_CASE_P(/*nothing*/, DNNTestNetwork, dnnBackendsAndTargets(true, true, false));
|
||||
|
||||
}} // namespace
|
||||
|
@ -417,7 +417,7 @@ TEST_P(Test_Caffe_nets, DenseNet_121)
|
||||
float l1 = default_l1, lInf = default_lInf;
|
||||
if (target == DNN_TARGET_OPENCL_FP16)
|
||||
{
|
||||
l1 = 0.017; lInf = 0.067;
|
||||
l1 = 0.017; lInf = 0.0795;
|
||||
}
|
||||
else if (target == DNN_TARGET_MYRIAD)
|
||||
{
|
||||
@ -490,8 +490,7 @@ INSTANTIATE_TEST_CASE_P(Test_Caffe, opencv_face_detector,
|
||||
|
||||
TEST_P(Test_Caffe_nets, FasterRCNN_vgg16)
|
||||
{
|
||||
if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) ||
|
||||
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
|
||||
throw SkipTestException("");
|
||||
static Mat ref = (Mat_<float>(3, 7) << 0, 2, 0.949398, 99.2454, 210.141, 601.205, 462.849,
|
||||
0, 7, 0.997022, 481.841, 92.3218, 722.685, 175.953,
|
||||
@ -502,8 +501,7 @@ TEST_P(Test_Caffe_nets, FasterRCNN_vgg16)
|
||||
TEST_P(Test_Caffe_nets, FasterRCNN_zf)
|
||||
{
|
||||
if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) ||
|
||||
(backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) ||
|
||||
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
|
||||
(backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD))
|
||||
throw SkipTestException("");
|
||||
static Mat ref = (Mat_<float>(3, 7) << 0, 2, 0.90121, 120.407, 115.83, 570.586, 528.395,
|
||||
0, 7, 0.988779, 469.849, 75.1756, 718.64, 186.762,
|
||||
@ -514,12 +512,13 @@ TEST_P(Test_Caffe_nets, FasterRCNN_zf)
|
||||
TEST_P(Test_Caffe_nets, RFCN)
|
||||
{
|
||||
if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) ||
|
||||
(backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) ||
|
||||
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
|
||||
(backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD))
|
||||
throw SkipTestException("");
|
||||
double scoreDiff = (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) ? 4e-3 : default_l1;
|
||||
double iouDiff = (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) ? 8e-2 : default_lInf;
|
||||
static Mat ref = (Mat_<float>(2, 7) << 0, 7, 0.991359, 491.822, 81.1668, 702.573, 178.234,
|
||||
0, 12, 0.94786, 132.093, 223.903, 338.077, 566.16);
|
||||
testFaster("rfcn_pascal_voc_resnet50.prototxt", "resnet50_rfcn_final.caffemodel", ref);
|
||||
testFaster("rfcn_pascal_voc_resnet50.prototxt", "resnet50_rfcn_final.caffemodel", ref, scoreDiff, iouDiff);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Test_Caffe_nets, dnnBackendsAndTargets());
|
||||
|
@ -42,6 +42,47 @@
|
||||
#ifndef __OPENCV_TEST_COMMON_HPP__
|
||||
#define __OPENCV_TEST_COMMON_HPP__
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
#include "opencv2/core/ocl.hpp"
|
||||
#endif
|
||||
|
||||
namespace cv { namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
static inline void PrintTo(const cv::dnn::Backend& v, std::ostream* os)
|
||||
{
|
||||
switch (v) {
|
||||
case DNN_BACKEND_DEFAULT: *os << "DEFAULT"; return;
|
||||
case DNN_BACKEND_HALIDE: *os << "HALIDE"; return;
|
||||
case DNN_BACKEND_INFERENCE_ENGINE: *os << "DLIE"; return;
|
||||
case DNN_BACKEND_OPENCV: *os << "OCV"; return;
|
||||
} // don't use "default:" to emit compiler warnings
|
||||
*os << "DNN_BACKEND_UNKNOWN(" << v << ")";
|
||||
}
|
||||
|
||||
static inline void PrintTo(const cv::dnn::Target& v, std::ostream* os)
|
||||
{
|
||||
switch (v) {
|
||||
case DNN_TARGET_CPU: *os << "CPU"; return;
|
||||
case DNN_TARGET_OPENCL: *os << "OCL"; return;
|
||||
case DNN_TARGET_OPENCL_FP16: *os << "OCL_FP16"; return;
|
||||
case DNN_TARGET_MYRIAD: *os << "MYRIAD"; return;
|
||||
} // don't use "default:" to emit compiler warnings
|
||||
*os << "DNN_TARGET_UNKNOWN(" << v << ")";
|
||||
}
|
||||
|
||||
using opencv_test::tuple;
|
||||
using opencv_test::get;
|
||||
static inline void PrintTo(const tuple<cv::dnn::Backend, cv::dnn::Target> v, std::ostream* os)
|
||||
{
|
||||
PrintTo(get<0>(v), os);
|
||||
*os << "/";
|
||||
PrintTo(get<1>(v), os);
|
||||
}
|
||||
|
||||
CV__DNN_INLINE_NS_END
|
||||
}} // namespace
|
||||
|
||||
|
||||
static inline const std::string &getOpenCVExtraDir()
|
||||
{
|
||||
return cvtest::TS::ptr()->get_data_path();
|
||||
@ -190,4 +231,54 @@ static inline bool readFileInMemory(const std::string& filename, std::string& co
|
||||
return true;
|
||||
}
|
||||
|
||||
namespace opencv_test {
|
||||
|
||||
using namespace cv::dnn;
|
||||
|
||||
static testing::internal::ParamGenerator<tuple<Backend, Target> > dnnBackendsAndTargets(
|
||||
bool withInferenceEngine = true,
|
||||
bool withHalide = false,
|
||||
bool withCpuOCV = true
|
||||
)
|
||||
{
|
||||
std::vector<tuple<Backend, Target> > targets;
|
||||
#ifdef HAVE_HALIDE
|
||||
if (withHalide)
|
||||
{
|
||||
targets.push_back(make_tuple(DNN_BACKEND_HALIDE, DNN_TARGET_CPU));
|
||||
#ifdef HAVE_OPENCL
|
||||
if (cv::ocl::useOpenCL())
|
||||
targets.push_back(make_tuple(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL));
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
if (withInferenceEngine)
|
||||
{
|
||||
targets.push_back(make_tuple(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU));
|
||||
#ifdef HAVE_OPENCL
|
||||
if (cv::ocl::useOpenCL())
|
||||
{
|
||||
targets.push_back(make_tuple(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL));
|
||||
targets.push_back(make_tuple(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16));
|
||||
}
|
||||
#endif
|
||||
if (checkMyriadTarget())
|
||||
targets.push_back(make_tuple(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD));
|
||||
}
|
||||
#endif
|
||||
if (withCpuOCV)
|
||||
targets.push_back(make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU));
|
||||
#ifdef HAVE_OPENCL
|
||||
if (cv::ocl::useOpenCL())
|
||||
{
|
||||
targets.push_back(make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL));
|
||||
targets.push_back(make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16));
|
||||
}
|
||||
#endif
|
||||
return testing::ValuesIn(targets);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif
|
||||
|
@ -44,23 +44,9 @@ static void test(LayerParams& params, Mat& input, Backend backendId, Target targ
|
||||
test(input, net, backendId, targetId, skipCheck);
|
||||
}
|
||||
|
||||
static testing::internal::ParamGenerator<tuple<Backend, Target> > dnnBackendsAndTargetsWithHalide()
|
||||
static inline testing::internal::ParamGenerator<tuple<Backend, Target> > dnnBackendsAndTargetsWithHalide()
|
||||
{
|
||||
static const tuple<Backend, Target> testCases[] = {
|
||||
#ifdef HAVE_HALIDE
|
||||
tuple<Backend, Target>(DNN_BACKEND_HALIDE, DNN_TARGET_CPU),
|
||||
tuple<Backend, Target>(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL),
|
||||
#endif
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU),
|
||||
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL),
|
||||
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16),
|
||||
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD),
|
||||
#endif
|
||||
tuple<Backend, Target>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL),
|
||||
tuple<Backend, Target>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16)
|
||||
};
|
||||
return testing::ValuesIn(testCases);
|
||||
return dnnBackendsAndTargets(true, true, false); // OpenCV/CPU is used as reference
|
||||
}
|
||||
|
||||
class Test_Halide_layers : public DNNTestLayer {};
|
||||
|
@ -177,10 +177,6 @@ TEST_P(DNNTestOpenVINO, models)
|
||||
Target target = (dnn::Target)(int)get<0>(GetParam());
|
||||
std::string modelName = get<1>(GetParam());
|
||||
|
||||
if ((modelName == "semantic-segmentation-adas-0001" && target == DNN_TARGET_OPENCL_FP16) ||
|
||||
(modelName == "vehicle-license-plate-detection-barrier-0106"))
|
||||
throw SkipTestException("");
|
||||
|
||||
std::string precision = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? "FP16" : "FP32";
|
||||
std::string prefix = utils::fs::join("intel_models",
|
||||
utils::fs::join(modelName,
|
||||
|
@ -49,35 +49,6 @@
|
||||
#include "opencv2/dnn.hpp"
|
||||
#include "test_common.hpp"
|
||||
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
static inline void PrintTo(const cv::dnn::Backend& v, std::ostream* os)
|
||||
{
|
||||
switch (v) {
|
||||
case DNN_BACKEND_DEFAULT: *os << "DNN_BACKEND_DEFAULT"; return;
|
||||
case DNN_BACKEND_HALIDE: *os << "DNN_BACKEND_HALIDE"; return;
|
||||
case DNN_BACKEND_INFERENCE_ENGINE: *os << "DNN_BACKEND_INFERENCE_ENGINE"; return;
|
||||
case DNN_BACKEND_OPENCV: *os << "DNN_BACKEND_OPENCV"; return;
|
||||
} // don't use "default:" to emit compiler warnings
|
||||
*os << "DNN_BACKEND_UNKNOWN(" << v << ")";
|
||||
}
|
||||
|
||||
static inline void PrintTo(const cv::dnn::Target& v, std::ostream* os)
|
||||
{
|
||||
switch (v) {
|
||||
case DNN_TARGET_CPU: *os << "DNN_TARGET_CPU"; return;
|
||||
case DNN_TARGET_OPENCL: *os << "DNN_TARGET_OPENCL"; return;
|
||||
case DNN_TARGET_OPENCL_FP16: *os << "DNN_TARGET_OPENCL_FP16"; return;
|
||||
case DNN_TARGET_MYRIAD: *os << "DNN_TARGET_MYRIAD"; return;
|
||||
} // don't use "default:" to emit compiler warnings
|
||||
*os << "DNN_TARGET_UNKNOWN(" << v << ")";
|
||||
}
|
||||
|
||||
CV__DNN_INLINE_NS_END
|
||||
}} // namespace
|
||||
|
||||
namespace opencv_test {
|
||||
using namespace cv::dnn;
|
||||
|
||||
@ -95,22 +66,6 @@ static testing::internal::ParamGenerator<Target> availableDnnTargets()
|
||||
return testing::ValuesIn(targets);
|
||||
}
|
||||
|
||||
static testing::internal::ParamGenerator<tuple<Backend, Target> > dnnBackendsAndTargets()
|
||||
{
|
||||
static const tuple<Backend, Target> testCases[] = {
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU),
|
||||
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL),
|
||||
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16),
|
||||
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD),
|
||||
#endif
|
||||
tuple<Backend, Target>(DNN_BACKEND_OPENCV, DNN_TARGET_CPU),
|
||||
tuple<Backend, Target>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL),
|
||||
tuple<Backend, Target>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16)
|
||||
};
|
||||
return testing::ValuesIn(testCases);
|
||||
}
|
||||
|
||||
class DNNTestLayer : public TestWithParam<tuple<Backend, Target> >
|
||||
{
|
||||
public:
|
||||
|
@ -296,7 +296,7 @@ TEST_P(Test_TensorFlow_nets, Inception_v2_SSD)
|
||||
|
||||
Net net = readNetFromTensorflow(model, proto);
|
||||
Mat img = imread(findDataFile("dnn/street.png", false));
|
||||
Mat blob = blobFromImage(img, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), true, false);
|
||||
Mat blob = blobFromImage(img, 1.0f, Size(300, 300), Scalar(), true, false);
|
||||
|
||||
net.setPreferableBackend(backend);
|
||||
net.setPreferableTarget(target);
|
||||
@ -310,32 +310,61 @@ TEST_P(Test_TensorFlow_nets, Inception_v2_SSD)
|
||||
0, 3, 0.75838411, 0.44668293, 0.45907149, 0.49459291, 0.52197015,
|
||||
0, 10, 0.95932811, 0.38349164, 0.32528657, 0.40387636, 0.39165527,
|
||||
0, 10, 0.93973452, 0.66561931, 0.37841269, 0.68074018, 0.42907384);
|
||||
double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 5e-3 : default_l1;
|
||||
double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.0097 : default_l1;
|
||||
double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.09 : default_lInf;
|
||||
normAssertDetections(ref, out, "", 0.5, scoreDiff, iouDiff);
|
||||
}
|
||||
|
||||
TEST_P(Test_TensorFlow_nets, Inception_v2_Faster_RCNN)
|
||||
TEST_P(Test_TensorFlow_nets, MobileNet_v1_SSD)
|
||||
{
|
||||
checkBackend();
|
||||
|
||||
std::string model = findDataFile("dnn/ssd_mobilenet_v1_coco_2017_11_17.pb", false);
|
||||
std::string proto = findDataFile("dnn/ssd_mobilenet_v1_coco_2017_11_17.pbtxt", false);
|
||||
|
||||
Net net = readNetFromTensorflow(model, proto);
|
||||
Mat img = imread(findDataFile("dnn/dog416.png", false));
|
||||
Mat blob = blobFromImage(img, 1.0f, Size(300, 300), Scalar(), true, false);
|
||||
|
||||
net.setPreferableBackend(backend);
|
||||
net.setPreferableTarget(target);
|
||||
|
||||
net.setInput(blob);
|
||||
Mat out = net.forward();
|
||||
|
||||
Mat ref = blobFromNPY(findDataFile("dnn/tensorflow/ssd_mobilenet_v1_coco_2017_11_17.detection_out.npy"));
|
||||
float scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 7e-3 : 1e-5;
|
||||
float iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.0098 : 1e-3;
|
||||
normAssertDetections(ref, out, "", 0.3, scoreDiff, iouDiff);
|
||||
}
|
||||
|
||||
TEST_P(Test_TensorFlow_nets, Faster_RCNN)
|
||||
{
|
||||
static std::string names[] = {"faster_rcnn_inception_v2_coco_2018_01_28",
|
||||
"faster_rcnn_resnet50_coco_2018_01_28"};
|
||||
|
||||
checkBackend();
|
||||
if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU) ||
|
||||
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
|
||||
throw SkipTestException("");
|
||||
|
||||
std::string proto = findDataFile("dnn/faster_rcnn_inception_v2_coco_2018_01_28.pbtxt", false);
|
||||
std::string model = findDataFile("dnn/faster_rcnn_inception_v2_coco_2018_01_28.pb", false);
|
||||
for (int i = 1; i < 2; ++i)
|
||||
{
|
||||
std::string proto = findDataFile("dnn/" + names[i] + ".pbtxt", false);
|
||||
std::string model = findDataFile("dnn/" + names[i] + ".pb", false);
|
||||
|
||||
Net net = readNetFromTensorflow(model, proto);
|
||||
net.setPreferableBackend(backend);
|
||||
net.setPreferableTarget(target);
|
||||
Mat img = imread(findDataFile("dnn/dog416.png", false));
|
||||
Mat blob = blobFromImage(img, 1.0f / 127.5, Size(800, 600), Scalar(127.5, 127.5, 127.5), true, false);
|
||||
Net net = readNetFromTensorflow(model, proto);
|
||||
net.setPreferableBackend(backend);
|
||||
net.setPreferableTarget(target);
|
||||
Mat img = imread(findDataFile("dnn/dog416.png", false));
|
||||
Mat blob = blobFromImage(img, 1.0f, Size(800, 600), Scalar(), true, false);
|
||||
|
||||
net.setInput(blob);
|
||||
Mat out = net.forward();
|
||||
net.setInput(blob);
|
||||
Mat out = net.forward();
|
||||
|
||||
Mat ref = blobFromNPY(findDataFile("dnn/tensorflow/faster_rcnn_inception_v2_coco_2018_01_28.detection_out.npy"));
|
||||
normAssertDetections(ref, out, "", 0.3);
|
||||
Mat ref = blobFromNPY(findDataFile("dnn/tensorflow/" + names[i] + ".detection_out.npy"));
|
||||
normAssertDetections(ref, out, names[i].c_str(), 0.3);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(Test_TensorFlow_nets, MobileNet_v1_SSD_PPN)
|
||||
@ -347,15 +376,17 @@ TEST_P(Test_TensorFlow_nets, MobileNet_v1_SSD_PPN)
|
||||
Net net = readNetFromTensorflow(model, proto);
|
||||
Mat img = imread(findDataFile("dnn/dog416.png", false));
|
||||
Mat ref = blobFromNPY(findDataFile("dnn/tensorflow/ssd_mobilenet_v1_ppn_coco.detection_out.npy", false));
|
||||
Mat blob = blobFromImage(img, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), true, false);
|
||||
Mat blob = blobFromImage(img, 1.0f, Size(300, 300), Scalar(), true, false);
|
||||
|
||||
net.setPreferableBackend(backend);
|
||||
net.setPreferableTarget(target);
|
||||
|
||||
net.setInput(blob);
|
||||
Mat out = net.forward();
|
||||
double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.006 : default_l1;
|
||||
normAssertDetections(ref, out, "", 0.4, scoreDiff, default_lInf);
|
||||
|
||||
double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.011 : default_l1;
|
||||
double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.021 : default_lInf;
|
||||
normAssertDetections(ref, out, "", 0.4, scoreDiff, iouDiff);
|
||||
}
|
||||
|
||||
TEST_P(Test_TensorFlow_nets, opencv_face_detector_uint8)
|
||||
|
@ -301,14 +301,14 @@ TEST_P(Test_Torch_nets, ENet_accuracy)
|
||||
// Due to numerical instability in Pooling-Unpooling layers (indexes jittering)
|
||||
// thresholds for ENet must be changed. Accuracy of results was checked on
|
||||
// Cityscapes dataset and difference in mIOU with Torch is 10E-4%
|
||||
normAssert(ref, out, "", 0.00044, 0.44);
|
||||
normAssert(ref, out, "", 0.00044, target == DNN_TARGET_CPU ? 0.453 : 0.44);
|
||||
|
||||
const int N = 3;
|
||||
for (int i = 0; i < N; i++)
|
||||
{
|
||||
net.setInput(inputBlob, "");
|
||||
Mat out = net.forward();
|
||||
normAssert(ref, out, "", 0.00044, 0.44);
|
||||
normAssert(ref, out, "", 0.00044, target == DNN_TARGET_CPU ? 0.453 : 0.44);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -54,15 +54,21 @@
|
||||
|
||||
#include "opencv2/imgproc.hpp"
|
||||
|
||||
const size_t WEBP_HEADER_SIZE = 32;
|
||||
#include <opencv2/core/utils/configuration.private.hpp>
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
// 64Mb limit to avoid memory DDOS
|
||||
static size_t param_maxFileSize = utils::getConfigurationParameterSizeT("OPENCV_IMGCODECS_WEBP_MAX_FILE_SIZE", 64*1024*1024);
|
||||
|
||||
static const size_t WEBP_HEADER_SIZE = 32;
|
||||
|
||||
WebPDecoder::WebPDecoder()
|
||||
{
|
||||
m_buf_supported = true;
|
||||
channels = 0;
|
||||
fs_size = 0;
|
||||
}
|
||||
|
||||
WebPDecoder::~WebPDecoder() {}
|
||||
@ -96,48 +102,29 @@ ImageDecoder WebPDecoder::newDecoder() const
|
||||
|
||||
bool WebPDecoder::readHeader()
|
||||
{
|
||||
uint8_t header[WEBP_HEADER_SIZE] = { 0 };
|
||||
if (m_buf.empty())
|
||||
{
|
||||
FILE * wfile = NULL;
|
||||
fs.open(m_filename.c_str(), std::ios::binary);
|
||||
fs.seekg(0, std::ios::end);
|
||||
fs_size = safeCastToSizeT(fs.tellg(), "File is too large");
|
||||
fs.seekg(0, std::ios::beg);
|
||||
CV_Assert(fs && "File stream error");
|
||||
CV_CheckGE(fs_size, WEBP_HEADER_SIZE, "File is too small");
|
||||
CV_CheckLE(fs_size, param_maxFileSize, "File is too large. Increase OPENCV_IMGCODECS_WEBP_MAX_FILE_SIZE parameter if you want to process large files");
|
||||
|
||||
wfile = fopen(m_filename.c_str(), "rb");
|
||||
|
||||
if(wfile == NULL)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
fseek(wfile, 0, SEEK_END);
|
||||
long int wfile_size = ftell(wfile);
|
||||
fseek(wfile, 0, SEEK_SET);
|
||||
|
||||
if(wfile_size > static_cast<long int>(INT_MAX))
|
||||
{
|
||||
fclose(wfile);
|
||||
return false;
|
||||
}
|
||||
|
||||
data.create(1, (int)wfile_size, CV_8U);
|
||||
|
||||
size_t data_size = fread(data.ptr(), 1, wfile_size, wfile);
|
||||
|
||||
if(wfile)
|
||||
{
|
||||
fclose(wfile);
|
||||
}
|
||||
|
||||
if(static_cast<long int>(data_size) != wfile_size)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
fs.read((char*)header, sizeof(header));
|
||||
CV_Assert(fs && "Can't read WEBP_HEADER_SIZE bytes");
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_CheckGE(m_buf.total(), WEBP_HEADER_SIZE, "Buffer is too small");
|
||||
memcpy(header, m_buf.ptr(), sizeof(header));
|
||||
data = m_buf;
|
||||
}
|
||||
|
||||
WebPBitstreamFeatures features;
|
||||
if(VP8_STATUS_OK == WebPGetFeatures(data.ptr(), WEBP_HEADER_SIZE, &features))
|
||||
if (VP8_STATUS_OK == WebPGetFeatures(header, sizeof(header), &features))
|
||||
{
|
||||
m_width = features.width;
|
||||
m_height = features.height;
|
||||
@ -161,41 +148,75 @@ bool WebPDecoder::readHeader()
|
||||
|
||||
bool WebPDecoder::readData(Mat &img)
|
||||
{
|
||||
if( m_width > 0 && m_height > 0 )
|
||||
{
|
||||
bool convert_grayscale = (img.type() == CV_8UC1); // IMREAD_GRAYSCALE requested
|
||||
CV_CheckGE(m_width, 0, ""); CV_CheckGE(m_height, 0, "");
|
||||
|
||||
if (img.cols != m_width || img.rows != m_height || img.type() != m_type)
|
||||
CV_CheckEQ(img.cols, m_width, "");
|
||||
CV_CheckEQ(img.rows, m_height, "");
|
||||
|
||||
if (m_buf.empty())
|
||||
{
|
||||
fs.seekg(0, std::ios::beg); CV_Assert(fs && "File stream error");
|
||||
data.create(1, validateToInt(fs_size), CV_8UC1);
|
||||
fs.read((char*)data.ptr(), fs_size);
|
||||
CV_Assert(fs && "Can't read file data");
|
||||
fs.close();
|
||||
}
|
||||
CV_Assert(data.type() == CV_8UC1); CV_Assert(data.rows == 1);
|
||||
|
||||
{
|
||||
Mat read_img;
|
||||
CV_CheckType(img.type(), img.type() == CV_8UC1 || img.type() == CV_8UC3 || img.type() == CV_8UC4, "");
|
||||
if (img.type() != m_type)
|
||||
{
|
||||
img.create(m_height, m_width, m_type);
|
||||
read_img.create(m_height, m_width, m_type);
|
||||
}
|
||||
else
|
||||
{
|
||||
read_img = img; // copy header
|
||||
}
|
||||
|
||||
uchar* out_data = img.ptr();
|
||||
size_t out_data_size = img.cols * img.rows * img.elemSize();
|
||||
uchar* out_data = read_img.ptr();
|
||||
size_t out_data_size = read_img.dataend - out_data;
|
||||
|
||||
uchar *res_ptr = 0;
|
||||
uchar *res_ptr = NULL;
|
||||
if (channels == 3)
|
||||
{
|
||||
CV_CheckTypeEQ(read_img.type(), CV_8UC3, "");
|
||||
res_ptr = WebPDecodeBGRInto(data.ptr(), data.total(), out_data,
|
||||
(int)out_data_size, (int)img.step);
|
||||
(int)out_data_size, (int)read_img.step);
|
||||
}
|
||||
else if (channels == 4)
|
||||
{
|
||||
CV_CheckTypeEQ(read_img.type(), CV_8UC4, "");
|
||||
res_ptr = WebPDecodeBGRAInto(data.ptr(), data.total(), out_data,
|
||||
(int)out_data_size, (int)img.step);
|
||||
(int)out_data_size, (int)read_img.step);
|
||||
}
|
||||
|
||||
if(res_ptr == out_data)
|
||||
if (res_ptr != out_data)
|
||||
return false;
|
||||
|
||||
if (read_img.data == img.data && img.type() == m_type)
|
||||
{
|
||||
if (convert_grayscale)
|
||||
{
|
||||
cvtColor(img, img, COLOR_BGR2GRAY);
|
||||
}
|
||||
return true;
|
||||
// nothing
|
||||
}
|
||||
else if (img.type() == CV_8UC1)
|
||||
{
|
||||
cvtColor(read_img, img, COLOR_BGR2GRAY);
|
||||
}
|
||||
else if (img.type() == CV_8UC3 && m_type == CV_8UC4)
|
||||
{
|
||||
cvtColor(read_img, img, COLOR_BGRA2BGR);
|
||||
}
|
||||
else if (img.type() == CV_8UC3 && m_type == CV_8UC4)
|
||||
{
|
||||
cvtColor(read_img, img, COLOR_BGRA2BGR);
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Error(Error::StsInternal, "");
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
WebPEncoder::WebPEncoder()
|
||||
@ -213,12 +234,9 @@ ImageEncoder WebPEncoder::newEncoder() const
|
||||
|
||||
bool WebPEncoder::write(const Mat& img, const std::vector<int>& params)
|
||||
{
|
||||
int channels = img.channels(), depth = img.depth();
|
||||
int width = img.cols, height = img.rows;
|
||||
CV_CheckDepthEQ(img.depth(), CV_8U, "WebP codec supports 8U images only");
|
||||
|
||||
const Mat *image = &img;
|
||||
Mat temp;
|
||||
size_t size = 0;
|
||||
const int width = img.cols, height = img.rows;
|
||||
|
||||
bool comp_lossless = true;
|
||||
float quality = 100.0f;
|
||||
@ -240,69 +258,64 @@ bool WebPEncoder::write(const Mat& img, const std::vector<int>& params)
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t *out = NULL;
|
||||
int channels = img.channels();
|
||||
CV_Check(channels, channels == 1 || channels == 3 || channels == 4, "");
|
||||
|
||||
if(depth != CV_8U)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
const Mat *image = &img;
|
||||
Mat temp;
|
||||
|
||||
if(channels == 1)
|
||||
if (channels == 1)
|
||||
{
|
||||
cvtColor(*image, temp, CV_GRAY2BGR);
|
||||
image = &temp;
|
||||
channels = 3;
|
||||
}
|
||||
else if (channels == 2)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
uint8_t *out = NULL;
|
||||
size_t size = 0;
|
||||
if (comp_lossless)
|
||||
{
|
||||
if(channels == 3)
|
||||
if (channels == 3)
|
||||
{
|
||||
size = WebPEncodeLosslessBGR(image->ptr(), width, height, (int)image->step, &out);
|
||||
}
|
||||
else if(channels == 4)
|
||||
else if (channels == 4)
|
||||
{
|
||||
size = WebPEncodeLosslessBGRA(image->ptr(), width, height, (int)image->step, &out);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(channels == 3)
|
||||
if (channels == 3)
|
||||
{
|
||||
size = WebPEncodeBGR(image->ptr(), width, height, (int)image->step, quality, &out);
|
||||
}
|
||||
else if(channels == 4)
|
||||
else if (channels == 4)
|
||||
{
|
||||
size = WebPEncodeBGRA(image->ptr(), width, height, (int)image->step, quality, &out);
|
||||
}
|
||||
}
|
||||
#if WEBP_DECODER_ABI_VERSION >= 0x0206
|
||||
Ptr<uint8_t> out_cleaner(out, WebPFree);
|
||||
#else
|
||||
Ptr<uint8_t> out_cleaner(out, free);
|
||||
#endif
|
||||
|
||||
if(size > 0)
|
||||
CV_Assert(size > 0);
|
||||
|
||||
if (m_buf)
|
||||
{
|
||||
if(m_buf)
|
||||
{
|
||||
m_buf->resize(size);
|
||||
memcpy(&(*m_buf)[0], out, size);
|
||||
}
|
||||
else
|
||||
{
|
||||
FILE *fd = fopen(m_filename.c_str(), "wb");
|
||||
if(fd != NULL)
|
||||
{
|
||||
fwrite(out, size, sizeof(uint8_t), fd);
|
||||
fclose(fd); fd = NULL;
|
||||
}
|
||||
}
|
||||
m_buf->resize(size);
|
||||
memcpy(&(*m_buf)[0], out, size);
|
||||
}
|
||||
|
||||
if (out != NULL)
|
||||
else
|
||||
{
|
||||
free(out);
|
||||
out = NULL;
|
||||
FILE *fd = fopen(m_filename.c_str(), "wb");
|
||||
if (fd != NULL)
|
||||
{
|
||||
fwrite(out, size, sizeof(uint8_t), fd);
|
||||
fclose(fd); fd = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return size > 0;
|
||||
|
@ -47,7 +47,7 @@
|
||||
|
||||
#ifdef HAVE_WEBP
|
||||
|
||||
|
||||
#include <fstream>
|
||||
|
||||
namespace cv
|
||||
{
|
||||
@ -61,7 +61,6 @@ public:
|
||||
|
||||
bool readData( Mat& img ) CV_OVERRIDE;
|
||||
bool readHeader() CV_OVERRIDE;
|
||||
void close();
|
||||
|
||||
size_t signatureLength() const CV_OVERRIDE;
|
||||
bool checkSignature( const String& signature) const CV_OVERRIDE;
|
||||
@ -69,6 +68,8 @@ public:
|
||||
ImageDecoder newDecoder() const CV_OVERRIDE;
|
||||
|
||||
protected:
|
||||
std::ifstream fs;
|
||||
size_t fs_size;
|
||||
Mat data;
|
||||
int channels;
|
||||
};
|
||||
|
@ -400,6 +400,8 @@ static void ApplyExifOrientation(const Mat& buf, Mat& img)
|
||||
static void*
|
||||
imread_( const String& filename, int flags, int hdrtype, Mat* mat=0 )
|
||||
{
|
||||
CV_Assert(mat || hdrtype != LOAD_MAT); // mat is required in LOAD_MAT case
|
||||
|
||||
IplImage* image = 0;
|
||||
CvMat *matrix = 0;
|
||||
Mat temp, *data = &temp;
|
||||
@ -711,11 +713,22 @@ static bool imwrite_( const String& filename, const std::vector<Mat>& img_vec,
|
||||
|
||||
encoder->setDestination( filename );
|
||||
CV_Assert(params.size() <= CV_IO_MAX_IMAGE_PARAMS*2);
|
||||
bool code;
|
||||
if (!isMultiImg)
|
||||
code = encoder->write( write_vec[0], params );
|
||||
else
|
||||
code = encoder->writemulti( write_vec, params ); //to be implemented
|
||||
bool code = false;
|
||||
try
|
||||
{
|
||||
if (!isMultiImg)
|
||||
code = encoder->write( write_vec[0], params );
|
||||
else
|
||||
code = encoder->writemulti( write_vec, params ); //to be implemented
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
std::cerr << "imwrite_('" << filename << "'): can't write data: " << e.what() << std::endl << std::flush;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
std::cerr << "imwrite_('" << filename << "'): can't write data: unknown exception" << std::endl << std::flush;
|
||||
}
|
||||
|
||||
// CV_Assert( code );
|
||||
return code;
|
||||
|
@ -44,6 +44,15 @@
|
||||
|
||||
int validateToInt(size_t step);
|
||||
|
||||
template <typename _Tp> static inline
|
||||
size_t safeCastToSizeT(const _Tp v_origin, const char* msg)
|
||||
{
|
||||
const size_t value_cast = (size_t)v_origin;
|
||||
if ((_Tp)value_cast != v_origin)
|
||||
CV_Error(cv::Error::StsError, msg ? msg : "Can't cast value into size_t");
|
||||
return value_cast;
|
||||
}
|
||||
|
||||
struct PaletteEntry
|
||||
{
|
||||
unsigned char b, g, r, a;
|
||||
|
@ -96,12 +96,17 @@ TEST(Imgcodecs_WebP, encode_decode_with_alpha_webp)
|
||||
string output = cv::tempfile(".webp");
|
||||
|
||||
EXPECT_NO_THROW(cv::imwrite(output, img));
|
||||
cv::Mat img_webp = cv::imread(output);
|
||||
cv::Mat img_webp = cv::imread(output, IMREAD_UNCHANGED);
|
||||
cv::Mat img_webp_bgr = cv::imread(output); // IMREAD_COLOR by default
|
||||
EXPECT_EQ(0, remove(output.c_str()));
|
||||
EXPECT_FALSE(img_webp.empty());
|
||||
EXPECT_EQ(4, img_webp.channels());
|
||||
EXPECT_EQ(512, img_webp.cols);
|
||||
EXPECT_EQ(512, img_webp.rows);
|
||||
EXPECT_FALSE(img_webp_bgr.empty());
|
||||
EXPECT_EQ(3, img_webp_bgr.channels());
|
||||
EXPECT_EQ(512, img_webp_bgr.cols);
|
||||
EXPECT_EQ(512, img_webp_bgr.rows);
|
||||
}
|
||||
|
||||
#endif // HAVE_WEBP
|
||||
|
@ -52,7 +52,7 @@ public class Subdiv2DTest extends OpenCVTestCase {
|
||||
s2d.insert( new Point(10, 20) );
|
||||
MatOfFloat6 triangles = new MatOfFloat6();
|
||||
s2d.getTriangleList(triangles);
|
||||
assertEquals(10, triangles.rows());
|
||||
assertEquals(2, triangles.rows());
|
||||
/*
|
||||
int cnt = triangles.rows();
|
||||
float buff[] = new float[cnt*6];
|
||||
|
@ -332,7 +332,7 @@ void cv::accumulate( InputArray _src, InputOutputArray _dst, InputArray _mask )
|
||||
CV_Assert( func != 0 );
|
||||
|
||||
const Mat* arrays[] = {&src, &dst, &mask, 0};
|
||||
uchar* ptrs[3]{};
|
||||
uchar* ptrs[3] = {};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
int len = (int)it.size;
|
||||
|
||||
@ -430,7 +430,7 @@ void cv::accumulateSquare( InputArray _src, InputOutputArray _dst, InputArray _m
|
||||
CV_Assert( func != 0 );
|
||||
|
||||
const Mat* arrays[] = {&src, &dst, &mask, 0};
|
||||
uchar* ptrs[3]{};
|
||||
uchar* ptrs[3] = {};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
int len = (int)it.size;
|
||||
|
||||
@ -533,7 +533,7 @@ void cv::accumulateProduct( InputArray _src1, InputArray _src2,
|
||||
CV_Assert( func != 0 );
|
||||
|
||||
const Mat* arrays[] = {&src1, &src2, &dst, &mask, 0};
|
||||
uchar* ptrs[4]{};
|
||||
uchar* ptrs[4] = {};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
int len = (int)it.size;
|
||||
|
||||
@ -635,7 +635,7 @@ void cv::accumulateWeighted( InputArray _src, InputOutputArray _dst,
|
||||
CV_Assert( func != 0 );
|
||||
|
||||
const Mat* arrays[] = {&src, &dst, &mask, 0};
|
||||
uchar* ptrs[3]{};
|
||||
uchar* ptrs[3] = {};
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
int len = (int)it.size;
|
||||
|
||||
|
@ -1123,7 +1123,6 @@ cvFindNextContour( CvContourScanner scanner )
|
||||
#endif
|
||||
{
|
||||
_CvContourInfo *par_info = 0;
|
||||
_CvContourInfo *l_cinfo = 0;
|
||||
CvSeq *seq = 0;
|
||||
int is_hole = 0;
|
||||
CvPoint origin;
|
||||
@ -1215,6 +1214,7 @@ cvFindNextContour( CvContourScanner scanner )
|
||||
seq->flags |= is_hole ? CV_SEQ_FLAG_HOLE : 0;
|
||||
|
||||
/* initialize header */
|
||||
_CvContourInfo *l_cinfo = 0;
|
||||
if( mode <= 1 )
|
||||
{
|
||||
l_cinfo = &(scanner->cinfo_temp);
|
||||
@ -1225,10 +1225,8 @@ cvFindNextContour( CvContourScanner scanner )
|
||||
}
|
||||
else
|
||||
{
|
||||
union { _CvContourInfo* ci; CvSetElem* se; } v;
|
||||
v.ci = l_cinfo;
|
||||
cvSetAdd( scanner->cinfo_set, 0, &v.se );
|
||||
l_cinfo = v.ci;
|
||||
cvSetAdd(scanner->cinfo_set, 0, (CvSetElem**)&l_cinfo);
|
||||
CV_Assert(l_cinfo);
|
||||
int lval;
|
||||
|
||||
if( img_i )
|
||||
@ -1298,16 +1296,16 @@ cvFindNextContour( CvContourScanner scanner )
|
||||
scanner->img = (schar *) img;
|
||||
scanner->nbd = nbd;
|
||||
return l_cinfo->contour;
|
||||
|
||||
resume_scan:
|
||||
|
||||
}
|
||||
resume_scan:
|
||||
{
|
||||
prev = p;
|
||||
/* update lnbd */
|
||||
if( prev & -2 )
|
||||
{
|
||||
lnbd.x = x;
|
||||
}
|
||||
} /* end of prev != p */
|
||||
}
|
||||
} /* end of loop on x */
|
||||
|
||||
lnbd.x = 0;
|
||||
|
@ -45,7 +45,8 @@ namespace cv
|
||||
{
|
||||
|
||||
static const int DIST_SHIFT = 16;
|
||||
static const int INIT_DIST0 = (INT_MAX >> 2);
|
||||
static const int INIT_DIST0 = INT_MAX;
|
||||
static const int DIST_MAX = (INT_MAX >> 2);
|
||||
#define CV_FLT_TO_FIX(x,n) cvRound((x)*(1<<(n)))
|
||||
|
||||
static void
|
||||
@ -71,8 +72,8 @@ distanceTransform_3x3( const Mat& _src, Mat& _temp, Mat& _dist, const float* met
|
||||
{
|
||||
const int BORDER = 1;
|
||||
int i, j;
|
||||
const int HV_DIST = CV_FLT_TO_FIX( metrics[0], DIST_SHIFT );
|
||||
const int DIAG_DIST = CV_FLT_TO_FIX( metrics[1], DIST_SHIFT );
|
||||
const unsigned int HV_DIST = CV_FLT_TO_FIX( metrics[0], DIST_SHIFT );
|
||||
const unsigned int DIAG_DIST = CV_FLT_TO_FIX( metrics[1], DIST_SHIFT );
|
||||
const float scale = 1.f/(1 << DIST_SHIFT);
|
||||
|
||||
const uchar* src = _src.ptr();
|
||||
@ -89,7 +90,7 @@ distanceTransform_3x3( const Mat& _src, Mat& _temp, Mat& _dist, const float* met
|
||||
for( i = 0; i < size.height; i++ )
|
||||
{
|
||||
const uchar* s = src + i*srcstep;
|
||||
int* tmp = (int*)(temp + (i+BORDER)*step) + BORDER;
|
||||
unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER;
|
||||
|
||||
for( j = 0; j < BORDER; j++ )
|
||||
tmp[-j-1] = tmp[size.width + j] = INIT_DIST0;
|
||||
@ -100,8 +101,8 @@ distanceTransform_3x3( const Mat& _src, Mat& _temp, Mat& _dist, const float* met
|
||||
tmp[j] = 0;
|
||||
else
|
||||
{
|
||||
int t0 = tmp[j-step-1] + DIAG_DIST;
|
||||
int t = tmp[j-step] + HV_DIST;
|
||||
unsigned int t0 = tmp[j-step-1] + DIAG_DIST;
|
||||
unsigned int t = tmp[j-step] + HV_DIST;
|
||||
if( t0 > t ) t0 = t;
|
||||
t = tmp[j-step+1] + DIAG_DIST;
|
||||
if( t0 > t ) t0 = t;
|
||||
@ -116,14 +117,14 @@ distanceTransform_3x3( const Mat& _src, Mat& _temp, Mat& _dist, const float* met
|
||||
for( i = size.height - 1; i >= 0; i-- )
|
||||
{
|
||||
float* d = (float*)(dist + i*dststep);
|
||||
int* tmp = (int*)(temp + (i+BORDER)*step) + BORDER;
|
||||
unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER;
|
||||
|
||||
for( j = size.width - 1; j >= 0; j-- )
|
||||
{
|
||||
int t0 = tmp[j];
|
||||
unsigned int t0 = tmp[j];
|
||||
if( t0 > HV_DIST )
|
||||
{
|
||||
int t = tmp[j+step+1] + DIAG_DIST;
|
||||
unsigned int t = tmp[j+step+1] + DIAG_DIST;
|
||||
if( t0 > t ) t0 = t;
|
||||
t = tmp[j+step] + HV_DIST;
|
||||
if( t0 > t ) t0 = t;
|
||||
@ -133,6 +134,7 @@ distanceTransform_3x3( const Mat& _src, Mat& _temp, Mat& _dist, const float* met
|
||||
if( t0 > t ) t0 = t;
|
||||
tmp[j] = t0;
|
||||
}
|
||||
t0 = (t0 > DIST_MAX) ? DIST_MAX : t0;
|
||||
d[j] = (float)(t0 * scale);
|
||||
}
|
||||
}
|
||||
@ -144,9 +146,9 @@ distanceTransform_5x5( const Mat& _src, Mat& _temp, Mat& _dist, const float* met
|
||||
{
|
||||
const int BORDER = 2;
|
||||
int i, j;
|
||||
const int HV_DIST = CV_FLT_TO_FIX( metrics[0], DIST_SHIFT );
|
||||
const int DIAG_DIST = CV_FLT_TO_FIX( metrics[1], DIST_SHIFT );
|
||||
const int LONG_DIST = CV_FLT_TO_FIX( metrics[2], DIST_SHIFT );
|
||||
const unsigned int HV_DIST = CV_FLT_TO_FIX( metrics[0], DIST_SHIFT );
|
||||
const unsigned int DIAG_DIST = CV_FLT_TO_FIX( metrics[1], DIST_SHIFT );
|
||||
const unsigned int LONG_DIST = CV_FLT_TO_FIX( metrics[2], DIST_SHIFT );
|
||||
const float scale = 1.f/(1 << DIST_SHIFT);
|
||||
|
||||
const uchar* src = _src.ptr();
|
||||
@ -163,7 +165,7 @@ distanceTransform_5x5( const Mat& _src, Mat& _temp, Mat& _dist, const float* met
|
||||
for( i = 0; i < size.height; i++ )
|
||||
{
|
||||
const uchar* s = src + i*srcstep;
|
||||
int* tmp = (int*)(temp + (i+BORDER)*step) + BORDER;
|
||||
unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER;
|
||||
|
||||
for( j = 0; j < BORDER; j++ )
|
||||
tmp[-j-1] = tmp[size.width + j] = INIT_DIST0;
|
||||
@ -174,8 +176,8 @@ distanceTransform_5x5( const Mat& _src, Mat& _temp, Mat& _dist, const float* met
|
||||
tmp[j] = 0;
|
||||
else
|
||||
{
|
||||
int t0 = tmp[j-step*2-1] + LONG_DIST;
|
||||
int t = tmp[j-step*2+1] + LONG_DIST;
|
||||
unsigned int t0 = tmp[j-step*2-1] + LONG_DIST;
|
||||
unsigned int t = tmp[j-step*2+1] + LONG_DIST;
|
||||
if( t0 > t ) t0 = t;
|
||||
t = tmp[j-step-2] + LONG_DIST;
|
||||
if( t0 > t ) t0 = t;
|
||||
@ -198,14 +200,14 @@ distanceTransform_5x5( const Mat& _src, Mat& _temp, Mat& _dist, const float* met
|
||||
for( i = size.height - 1; i >= 0; i-- )
|
||||
{
|
||||
float* d = (float*)(dist + i*dststep);
|
||||
int* tmp = (int*)(temp + (i+BORDER)*step) + BORDER;
|
||||
unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER;
|
||||
|
||||
for( j = size.width - 1; j >= 0; j-- )
|
||||
{
|
||||
int t0 = tmp[j];
|
||||
unsigned int t0 = tmp[j];
|
||||
if( t0 > HV_DIST )
|
||||
{
|
||||
int t = tmp[j+step*2+1] + LONG_DIST;
|
||||
unsigned int t = tmp[j+step*2+1] + LONG_DIST;
|
||||
if( t0 > t ) t0 = t;
|
||||
t = tmp[j+step*2-1] + LONG_DIST;
|
||||
if( t0 > t ) t0 = t;
|
||||
@ -223,6 +225,7 @@ distanceTransform_5x5( const Mat& _src, Mat& _temp, Mat& _dist, const float* met
|
||||
if( t0 > t ) t0 = t;
|
||||
tmp[j] = t0;
|
||||
}
|
||||
t0 = (t0 > DIST_MAX) ? DIST_MAX : t0;
|
||||
d[j] = (float)(t0 * scale);
|
||||
}
|
||||
}
|
||||
@ -235,9 +238,9 @@ distanceTransformEx_5x5( const Mat& _src, Mat& _temp, Mat& _dist, Mat& _labels,
|
||||
const int BORDER = 2;
|
||||
|
||||
int i, j;
|
||||
const int HV_DIST = CV_FLT_TO_FIX( metrics[0], DIST_SHIFT );
|
||||
const int DIAG_DIST = CV_FLT_TO_FIX( metrics[1], DIST_SHIFT );
|
||||
const int LONG_DIST = CV_FLT_TO_FIX( metrics[2], DIST_SHIFT );
|
||||
const unsigned int HV_DIST = CV_FLT_TO_FIX( metrics[0], DIST_SHIFT );
|
||||
const unsigned int DIAG_DIST = CV_FLT_TO_FIX( metrics[1], DIST_SHIFT );
|
||||
const unsigned int LONG_DIST = CV_FLT_TO_FIX( metrics[2], DIST_SHIFT );
|
||||
const float scale = 1.f/(1 << DIST_SHIFT);
|
||||
|
||||
const uchar* src = _src.ptr();
|
||||
@ -247,7 +250,7 @@ distanceTransformEx_5x5( const Mat& _src, Mat& _temp, Mat& _dist, Mat& _labels,
|
||||
int srcstep = (int)(_src.step/sizeof(src[0]));
|
||||
int step = (int)(_temp.step/sizeof(temp[0]));
|
||||
int dststep = (int)(_dist.step/sizeof(dist[0]));
|
||||
int lstep = (int)(_labels.step/sizeof(dist[0]));
|
||||
int lstep = (int)(_labels.step/sizeof(labels[0]));
|
||||
Size size = _src.size();
|
||||
|
||||
initTopBottom( _temp, BORDER );
|
||||
@ -256,7 +259,7 @@ distanceTransformEx_5x5( const Mat& _src, Mat& _temp, Mat& _dist, Mat& _labels,
|
||||
for( i = 0; i < size.height; i++ )
|
||||
{
|
||||
const uchar* s = src + i*srcstep;
|
||||
int* tmp = (int*)(temp + (i+BORDER)*step) + BORDER;
|
||||
unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER;
|
||||
int* lls = (int*)(labels + i*lstep);
|
||||
|
||||
for( j = 0; j < BORDER; j++ )
|
||||
@ -271,7 +274,7 @@ distanceTransformEx_5x5( const Mat& _src, Mat& _temp, Mat& _dist, Mat& _labels,
|
||||
}
|
||||
else
|
||||
{
|
||||
int t0 = INIT_DIST0, t;
|
||||
unsigned int t0 = INIT_DIST0, t;
|
||||
int l0 = 0;
|
||||
|
||||
t = tmp[j-step*2-1] + LONG_DIST;
|
||||
@ -333,16 +336,16 @@ distanceTransformEx_5x5( const Mat& _src, Mat& _temp, Mat& _dist, Mat& _labels,
|
||||
for( i = size.height - 1; i >= 0; i-- )
|
||||
{
|
||||
float* d = (float*)(dist + i*dststep);
|
||||
int* tmp = (int*)(temp + (i+BORDER)*step) + BORDER;
|
||||
unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER;
|
||||
int* lls = (int*)(labels + i*lstep);
|
||||
|
||||
for( j = size.width - 1; j >= 0; j-- )
|
||||
{
|
||||
int t0 = tmp[j];
|
||||
unsigned int t0 = tmp[j];
|
||||
int l0 = lls[j];
|
||||
if( t0 > HV_DIST )
|
||||
{
|
||||
int t = tmp[j+step*2+1] + LONG_DIST;
|
||||
unsigned int t = tmp[j+step*2+1] + LONG_DIST;
|
||||
if( t0 > t )
|
||||
{
|
||||
t0 = t;
|
||||
@ -393,6 +396,7 @@ distanceTransformEx_5x5( const Mat& _src, Mat& _temp, Mat& _dist, Mat& _labels,
|
||||
tmp[j] = t0;
|
||||
lls[j] = l0;
|
||||
}
|
||||
t0 = (t0 > DIST_MAX) ? DIST_MAX : t0;
|
||||
d[j] = (float)(t0 * scale);
|
||||
}
|
||||
}
|
||||
|
@ -340,51 +340,199 @@ static void hlineResizeCn(ET* src, int cn, int *ofst, FT* m, FT* dst, int dst_mi
|
||||
hline<ET, FT, n, mulall, cncnt>::ResizeCn(src, cn, ofst, m, dst, dst_min, dst_max, dst_width);
|
||||
};
|
||||
|
||||
#if CV_SIMD512
|
||||
inline void v_load_indexed1(uint8_t* src, int *ofst, v_uint16 &v_src0, v_uint16 &v_src1)
|
||||
{
|
||||
v_expand(v_reinterpret_as_u8(v_uint16(
|
||||
*((uint16_t*)(src + ofst[ 0])), *((uint16_t*)(src + ofst[ 1])), *((uint16_t*)(src + ofst[ 2])), *((uint16_t*)(src + ofst[ 3])),
|
||||
*((uint16_t*)(src + ofst[ 4])), *((uint16_t*)(src + ofst[ 5])), *((uint16_t*)(src + ofst[ 6])), *((uint16_t*)(src + ofst[ 7])),
|
||||
*((uint16_t*)(src + ofst[ 8])), *((uint16_t*)(src + ofst[ 9])), *((uint16_t*)(src + ofst[10])), *((uint16_t*)(src + ofst[11])),
|
||||
*((uint16_t*)(src + ofst[12])), *((uint16_t*)(src + ofst[13])), *((uint16_t*)(src + ofst[14])), *((uint16_t*)(src + ofst[15])),
|
||||
*((uint16_t*)(src + ofst[16])), *((uint16_t*)(src + ofst[17])), *((uint16_t*)(src + ofst[14])), *((uint16_t*)(src + ofst[15])),
|
||||
*((uint16_t*)(src + ofst[20])), *((uint16_t*)(src + ofst[21])), *((uint16_t*)(src + ofst[14])), *((uint16_t*)(src + ofst[15])),
|
||||
*((uint16_t*)(src + ofst[24])), *((uint16_t*)(src + ofst[25])), *((uint16_t*)(src + ofst[14])), *((uint16_t*)(src + ofst[15])),
|
||||
*((uint16_t*)(src + ofst[28])), *((uint16_t*)(src + ofst[29])), *((uint16_t*)(src + ofst[14])), *((uint16_t*)(src + ofst[15])))),
|
||||
v_src0, v_src1);
|
||||
}
|
||||
inline void v_load_indexed2(uint8_t* src, int *ofst, v_uint16 &v_src0, v_uint16 &v_src1)
|
||||
{
|
||||
v_expand(v_reinterpret_as_u8(v_uint32(
|
||||
*((uint32_t*)(src + 2 * ofst[ 0])), *((uint32_t*)(src + 2 * ofst[ 1])), *((uint32_t*)(src + 2 * ofst[ 2])), *((uint32_t*)(src + 2 * ofst[ 3])),
|
||||
*((uint32_t*)(src + 2 * ofst[ 4])), *((uint32_t*)(src + 2 * ofst[ 5])), *((uint32_t*)(src + 2 * ofst[ 6])), *((uint32_t*)(src + 2 * ofst[ 7])),
|
||||
*((uint32_t*)(src + 2 * ofst[ 8])), *((uint32_t*)(src + 2 * ofst[ 9])), *((uint32_t*)(src + 2 * ofst[10])), *((uint32_t*)(src + 2 * ofst[11])),
|
||||
*((uint32_t*)(src + 2 * ofst[12])), *((uint32_t*)(src + 2 * ofst[13])), *((uint32_t*)(src + 2 * ofst[14])), *((uint32_t*)(src + 2 * ofst[15])))),
|
||||
v_src0, v_src1);
|
||||
v_uint32 v_tmp0, v_tmp1, v_tmp2, v_tmp3;
|
||||
v_zip(v_reinterpret_as_u32(v_src0), v_reinterpret_as_u32(v_src1), v_tmp2, v_tmp3);
|
||||
v_zip(v_tmp2, v_tmp3, v_tmp0, v_tmp1);
|
||||
v_zip(v_tmp0, v_tmp1, v_tmp2, v_tmp3);
|
||||
v_zip(v_tmp2, v_tmp3, v_tmp0, v_tmp1);
|
||||
v_zip(v_reinterpret_as_u16(v_tmp0), v_reinterpret_as_u16(v_tmp1), v_src0, v_src1);
|
||||
}
|
||||
inline void v_load_indexed4(uint8_t* src, int *ofst, v_uint16 &v_src0, v_uint16 &v_src1)
|
||||
{
|
||||
v_expand(v_reinterpret_as_u8(v_uint64(
|
||||
*((uint64_t*)(src + 4 * ofst[0])), *((uint64_t*)(src + 4 * ofst[1])), *((uint64_t*)(src + 4 * ofst[2])), *((uint64_t*)(src + 4 * ofst[3])),
|
||||
*((uint64_t*)(src + 4 * ofst[4])), *((uint64_t*)(src + 4 * ofst[5])), *((uint64_t*)(src + 4 * ofst[6])), *((uint64_t*)(src + 4 * ofst[7])))),
|
||||
v_src0, v_src1);
|
||||
v_uint64 v_tmp0, v_tmp1, v_tmp2, v_tmp3;
|
||||
v_zip(v_reinterpret_as_u64(v_src0), v_reinterpret_as_u64(v_src1), v_tmp2, v_tmp3);
|
||||
v_zip(v_tmp2, v_tmp3, v_tmp0, v_tmp1);
|
||||
v_zip(v_tmp0, v_tmp1, v_tmp2, v_tmp3);
|
||||
v_zip(v_reinterpret_as_u16(v_tmp2), v_reinterpret_as_u16(v_tmp3), v_src0, v_src1);
|
||||
}
|
||||
inline void v_load_indexed_deinterleave(uint16_t* src, int *ofst, v_uint32 &v_src0, v_uint32 &v_src1)
|
||||
{
|
||||
v_expand(v_reinterpret_as_u16(v_uint32(
|
||||
*((uint32_t*)(src + ofst[ 0])), *((uint32_t*)(src + ofst[ 1])), *((uint32_t*)(src + ofst[ 2])), *((uint32_t*)(src + ofst[ 3])),
|
||||
*((uint32_t*)(src + ofst[ 4])), *((uint32_t*)(src + ofst[ 5])), *((uint32_t*)(src + ofst[ 6])), *((uint32_t*)(src + ofst[ 7])),
|
||||
*((uint32_t*)(src + ofst[ 8])), *((uint32_t*)(src + ofst[ 9])), *((uint32_t*)(src + ofst[10])), *((uint32_t*)(src + ofst[11])),
|
||||
*((uint32_t*)(src + ofst[12])), *((uint32_t*)(src + ofst[13])), *((uint32_t*)(src + ofst[14])), *((uint32_t*)(src + ofst[15])))),
|
||||
v_src0, v_src1);
|
||||
v_uint32 v_tmp0, v_tmp1;
|
||||
v_zip(v_src0, v_src1, v_tmp0, v_tmp1);
|
||||
v_zip(v_tmp0, v_tmp1, v_src0, v_src1);
|
||||
v_zip(v_src0, v_src1, v_tmp0, v_tmp1);
|
||||
v_zip(v_tmp0, v_tmp1, v_src0, v_src1);
|
||||
}
|
||||
#elif CV_SIMD256
|
||||
inline void v_load_indexed1(uint8_t* src, int *ofst, v_uint16 &v_src0, v_uint16 &v_src1)
|
||||
{
|
||||
v_expand(v_reinterpret_as_u8(v_uint16(
|
||||
*((uint16_t*)(src + ofst[ 0])), *((uint16_t*)(src + ofst[ 1])), *((uint16_t*)(src + ofst[ 2])), *((uint16_t*)(src + ofst[ 3])),
|
||||
*((uint16_t*)(src + ofst[ 4])), *((uint16_t*)(src + ofst[ 5])), *((uint16_t*)(src + ofst[ 6])), *((uint16_t*)(src + ofst[ 7])),
|
||||
*((uint16_t*)(src + ofst[ 8])), *((uint16_t*)(src + ofst[ 9])), *((uint16_t*)(src + ofst[10])), *((uint16_t*)(src + ofst[11])),
|
||||
*((uint16_t*)(src + ofst[12])), *((uint16_t*)(src + ofst[13])), *((uint16_t*)(src + ofst[14])), *((uint16_t*)(src + ofst[15])))),
|
||||
v_src0, v_src1);
|
||||
}
|
||||
inline void v_load_indexed2(uint8_t* src, int *ofst, v_uint16 &v_src0, v_uint16 &v_src1)
|
||||
{
|
||||
v_expand(v_reinterpret_as_u8(v_uint32(
|
||||
*((uint32_t*)(src + 2 * ofst[0])), *((uint32_t*)(src + 2 * ofst[1])), *((uint32_t*)(src + 2 * ofst[2])), *((uint32_t*)(src + 2 * ofst[3])),
|
||||
*((uint32_t*)(src + 2 * ofst[4])), *((uint32_t*)(src + 2 * ofst[5])), *((uint32_t*)(src + 2 * ofst[6])), *((uint32_t*)(src + 2 * ofst[7])))),
|
||||
v_src0, v_src1);
|
||||
v_uint32 v_tmp0, v_tmp1, v_tmp2, v_tmp3;
|
||||
v_zip(v_reinterpret_as_u32(v_src0), v_reinterpret_as_u32(v_src1), v_tmp2, v_tmp3);
|
||||
v_zip(v_tmp2, v_tmp3, v_tmp0, v_tmp1);
|
||||
v_zip(v_tmp0, v_tmp1, v_tmp2, v_tmp3);
|
||||
v_zip(v_reinterpret_as_u16(v_tmp2), v_reinterpret_as_u16(v_tmp3), v_src0, v_src1);
|
||||
}
|
||||
inline void v_load_indexed4(uint8_t* src, int *ofst, v_uint16 &v_src0, v_uint16 &v_src1)
|
||||
{
|
||||
v_expand(v_reinterpret_as_u8(v_uint64(
|
||||
*((uint64_t*)(src + 4 * ofst[0])), *((uint64_t*)(src + 4 * ofst[1])), *((uint64_t*)(src + 4 * ofst[2])), *((uint64_t*)(src + 4 * ofst[3])))),
|
||||
v_src0, v_src1);
|
||||
v_uint64 v_tmp0, v_tmp1, v_tmp2, v_tmp3;
|
||||
v_zip(v_reinterpret_as_u64(v_src0), v_reinterpret_as_u64(v_src1), v_tmp2, v_tmp3);
|
||||
v_zip(v_tmp2, v_tmp3, v_tmp0, v_tmp1);
|
||||
v_zip(v_reinterpret_as_u16(v_tmp0), v_reinterpret_as_u16(v_tmp1), v_src0, v_src1);
|
||||
}
|
||||
inline void v_load_indexed_deinterleave(uint16_t* src, int *ofst, v_uint32 &v_src0, v_uint32 &v_src1)
|
||||
{
|
||||
v_uint32 v_tmp0, v_tmp1;
|
||||
v_expand(v_reinterpret_as_u16(v_uint32(
|
||||
*((uint32_t*)(src + ofst[0])), *((uint32_t*)(src + ofst[1])), *((uint32_t*)(src + ofst[2])), *((uint32_t*)(src + ofst[3])),
|
||||
*((uint32_t*)(src + ofst[4])), *((uint32_t*)(src + ofst[5])), *((uint32_t*)(src + ofst[6])), *((uint32_t*)(src + ofst[7])))),
|
||||
v_tmp0, v_tmp1);
|
||||
v_zip(v_tmp0, v_tmp1, v_src0, v_src1);
|
||||
v_zip(v_src0, v_src1, v_tmp0, v_tmp1);
|
||||
v_zip(v_tmp0, v_tmp1, v_src0, v_src1);
|
||||
}
|
||||
#elif CV_SIMD128
|
||||
inline void v_load_indexed1(uint8_t* src, int *ofst, v_uint16 &v_src0, v_uint16 &v_src1)
|
||||
{
|
||||
uint16_t buf[8];
|
||||
buf[0] = *((uint16_t*)(src + ofst[0]));
|
||||
buf[1] = *((uint16_t*)(src + ofst[1]));
|
||||
buf[2] = *((uint16_t*)(src + ofst[2]));
|
||||
buf[3] = *((uint16_t*)(src + ofst[3]));
|
||||
buf[4] = *((uint16_t*)(src + ofst[4]));
|
||||
buf[5] = *((uint16_t*)(src + ofst[5]));
|
||||
buf[6] = *((uint16_t*)(src + ofst[6]));
|
||||
buf[7] = *((uint16_t*)(src + ofst[7]));
|
||||
v_src0 = vx_load_expand((uint8_t*)buf);
|
||||
v_src1 = vx_load_expand((uint8_t*)buf + 8);
|
||||
}
|
||||
inline void v_load_indexed2(uint8_t* src, int *ofst, v_uint16 &v_src0, v_uint16 &v_src1)
|
||||
{
|
||||
uint32_t buf[4];
|
||||
buf[0] = *((uint32_t*)(src + 2 * ofst[0]));
|
||||
buf[1] = *((uint32_t*)(src + 2 * ofst[1]));
|
||||
buf[2] = *((uint32_t*)(src + 2 * ofst[2]));
|
||||
buf[3] = *((uint32_t*)(src + 2 * ofst[3]));
|
||||
v_uint32 v_tmp0, v_tmp1, v_tmp2, v_tmp3;
|
||||
v_tmp0 = v_reinterpret_as_u32(vx_load_expand((uint8_t*)buf));
|
||||
v_tmp1 = v_reinterpret_as_u32(vx_load_expand((uint8_t*)buf + 8));
|
||||
v_zip(v_tmp0, v_tmp1, v_tmp2, v_tmp3);
|
||||
v_zip(v_tmp2, v_tmp3, v_tmp0, v_tmp1);
|
||||
v_zip(v_reinterpret_as_u16(v_tmp0), v_reinterpret_as_u16(v_tmp1), v_src0, v_src1);
|
||||
}
|
||||
inline void v_load_indexed4(uint8_t* src, int *ofst, v_uint16 &v_src0, v_uint16 &v_src1)
|
||||
{
|
||||
v_uint16 v_tmp0, v_tmp1;
|
||||
v_src0 = vx_load_expand(src + 4 * ofst[0]);
|
||||
v_src1 = vx_load_expand(src + 4 * ofst[1]);
|
||||
v_recombine(v_src0, v_src1, v_tmp0, v_tmp1);
|
||||
v_zip(v_tmp0, v_tmp1, v_src0, v_src1);
|
||||
}
|
||||
inline void v_load_indexed_deinterleave(uint16_t* src, int *ofst, v_uint32 &v_src0, v_uint32 &v_src1)
|
||||
{
|
||||
uint32_t buf[4];
|
||||
buf[0] = *((uint32_t*)(src + ofst[0]));
|
||||
buf[1] = *((uint32_t*)(src + ofst[1]));
|
||||
buf[2] = *((uint32_t*)(src + ofst[2]));
|
||||
buf[3] = *((uint32_t*)(src + ofst[3]));
|
||||
v_src0 = vx_load_expand((uint16_t*)buf);
|
||||
v_src1 = vx_load_expand((uint16_t*)buf + 4);
|
||||
v_uint32 v_tmp0, v_tmp1;
|
||||
v_zip(v_src0, v_src1, v_tmp0, v_tmp1);
|
||||
v_zip(v_tmp0, v_tmp1, v_src0, v_src1);
|
||||
}
|
||||
#endif
|
||||
template <>
|
||||
void hlineResizeCn<uint8_t, ufixedpoint16, 2, true, 1>(uint8_t* src, int, int *ofst, ufixedpoint16* m, ufixedpoint16* dst, int dst_min, int dst_max, int dst_width)
|
||||
{
|
||||
int i = 0;
|
||||
ufixedpoint16 src_0(src[0]);
|
||||
v_uint16x8 v_src_0 = v_setall_u16(*((uint16_t*)&src_0));
|
||||
for (; i < dst_min - 7; i += 8, m += 16, dst += 8) // Points that fall left from src image so became equal to leftmost src point
|
||||
#if CV_SIMD
|
||||
const int VECSZ = v_uint16::nlanes;
|
||||
v_uint16 v_src_0 = vx_setall_u16(*((uint16_t*)&src_0));
|
||||
for (; i <= dst_min - VECSZ; i += VECSZ, m += 2*VECSZ, dst += VECSZ) // Points that fall left from src image so became equal to leftmost src point
|
||||
{
|
||||
v_store((uint16_t*)dst, v_src_0);
|
||||
}
|
||||
#endif
|
||||
for (; i < dst_min; i++, m += 2)
|
||||
{
|
||||
*(dst++) = src_0;
|
||||
}
|
||||
for (; i < dst_max - 7 && ofst[i + 7] + 15 <= ofst[dst_width - 1]; i += 8, m += 16, dst += 8)
|
||||
#if CV_SIMD
|
||||
for (; i <= dst_max - VECSZ; i += VECSZ, m += 2*VECSZ, dst += VECSZ)
|
||||
{
|
||||
v_uint32x4 v_src01 = v_combine_low(v_reinterpret_as_u32(v_load_expand(src + ofst[i ])), v_reinterpret_as_u32(v_load_expand(src + ofst[i + 1])));
|
||||
v_uint32x4 v_src23 = v_combine_low(v_reinterpret_as_u32(v_load_expand(src + ofst[i + 2])), v_reinterpret_as_u32(v_load_expand(src + ofst[i + 3])));
|
||||
v_uint32x4 v_src45 = v_combine_low(v_reinterpret_as_u32(v_load_expand(src + ofst[i + 4])), v_reinterpret_as_u32(v_load_expand(src + ofst[i + 5])));
|
||||
v_uint32x4 v_src67 = v_combine_low(v_reinterpret_as_u32(v_load_expand(src + ofst[i + 6])), v_reinterpret_as_u32(v_load_expand(src + ofst[i + 7])));
|
||||
v_uint16 v_src0, v_src1;
|
||||
v_load_indexed1(src, ofst + i, v_src0, v_src1);
|
||||
|
||||
v_uint32x4 v_zip02, v_zip13, v_zip46, v_zip57;
|
||||
v_zip(v_src01, v_src23, v_zip02, v_zip13);
|
||||
v_zip(v_src45, v_src67, v_zip46, v_zip57);
|
||||
|
||||
v_uint32x4 v_src0, v_src1;
|
||||
v_zip(v_combine_low(v_zip02, v_zip46), v_combine_low(v_zip13, v_zip57), v_src0, v_src1);
|
||||
|
||||
v_int16x8 v_mul0 = v_load((int16_t*)m);
|
||||
v_int16x8 v_mul1 = v_load((int16_t*)m + 8);
|
||||
v_uint32x4 v_res0 = v_reinterpret_as_u32(v_dotprod(v_reinterpret_as_s16(v_src0), v_mul0));
|
||||
v_uint32x4 v_res1 = v_reinterpret_as_u32(v_dotprod(v_reinterpret_as_s16(v_src1), v_mul1));
|
||||
v_int16 v_mul0 = vx_load((int16_t*)m);
|
||||
v_int16 v_mul1 = vx_load((int16_t*)m + VECSZ);
|
||||
v_uint32 v_res0 = v_reinterpret_as_u32(v_dotprod(v_reinterpret_as_s16(v_src0), v_mul0));
|
||||
v_uint32 v_res1 = v_reinterpret_as_u32(v_dotprod(v_reinterpret_as_s16(v_src1), v_mul1));
|
||||
v_store((uint16_t*)dst, v_pack(v_res0, v_res1));
|
||||
}
|
||||
#endif
|
||||
for (; i < dst_max; i += 1, m += 2)
|
||||
{
|
||||
uint8_t* px = src + ofst[i];
|
||||
*(dst++) = m[0] * px[0] + m[1] * px[1];
|
||||
}
|
||||
src_0 = (src + ofst[dst_width - 1])[0];
|
||||
v_src_0 = v_setall_u16(*((uint16_t*)&src_0));
|
||||
for (; i < dst_width - 7; i += 8, dst += 8) // Points that fall left from src image so became equal to leftmost src point
|
||||
#if CV_SIMD
|
||||
v_src_0 = vx_setall_u16(*((uint16_t*)&src_0));
|
||||
for (; i <= dst_width - VECSZ; i += VECSZ, dst += VECSZ) // Points that fall left from src image so became equal to leftmost src point
|
||||
{
|
||||
v_store((uint16_t*)dst, v_src_0);
|
||||
}
|
||||
vx_cleanup();
|
||||
#endif
|
||||
for (; i < dst_width; i++)
|
||||
{
|
||||
*(dst++) = src_0;
|
||||
@ -394,87 +542,109 @@ template <>
|
||||
void hlineResizeCn<uint8_t, ufixedpoint16, 2, true, 2>(uint8_t* src, int, int *ofst, ufixedpoint16* m, ufixedpoint16* dst, int dst_min, int dst_max, int dst_width)
|
||||
{
|
||||
int i = 0;
|
||||
ufixedpoint16 srccn[8] = { src[0], src[1], src[0], src[1], src[0], src[1], src[0], src[1] };
|
||||
v_uint16x8 v_srccn = v_load((uint16_t*)srccn);
|
||||
for (; i < dst_min - 3; i += 4, m += 8, dst += 8) // Points that fall left from src image so became equal to leftmost src point
|
||||
union {
|
||||
uint32_t d;
|
||||
uint16_t w[2];
|
||||
} srccn;
|
||||
((ufixedpoint16*)(srccn.w))[0] = src[0];
|
||||
((ufixedpoint16*)(srccn.w))[1] = src[1];
|
||||
#if CV_SIMD
|
||||
const int VECSZ = v_uint16::nlanes;
|
||||
v_uint16 v_srccn = v_reinterpret_as_u16(vx_setall_u32(srccn.d));
|
||||
for (; i <= dst_min - VECSZ/2; i += VECSZ/2, m += VECSZ, dst += VECSZ) // Points that fall left from src image so became equal to leftmost src point
|
||||
{
|
||||
v_store((uint16_t*)dst, v_srccn);
|
||||
}
|
||||
#endif
|
||||
for (; i < dst_min; i++, m += 2)
|
||||
{
|
||||
*(dst++) = srccn[0];
|
||||
*(dst++) = srccn[1];
|
||||
*(dst++) = ((ufixedpoint16*)(srccn.w))[0];
|
||||
*(dst++) = ((ufixedpoint16*)(srccn.w))[1];
|
||||
}
|
||||
for (; i < dst_max - 3 && ofst[i + 3] + 7 <= ofst[dst_width - 1]; i += 4, m += 8, dst += 8)
|
||||
#if CV_SIMD
|
||||
for (; i <= dst_max - VECSZ/2; i += VECSZ/2, m += VECSZ, dst += VECSZ)
|
||||
{
|
||||
v_uint32x4 v_src0 = v_combine_low(v_reinterpret_as_u32(v_load_expand(src + 2 * ofst[i ])), v_reinterpret_as_u32(v_load_expand(src + 2 * ofst[i + 1])));
|
||||
v_uint32x4 v_src1 = v_combine_low(v_reinterpret_as_u32(v_load_expand(src + 2 * ofst[i + 2])), v_reinterpret_as_u32(v_load_expand(src + 2 * ofst[i + 3])));
|
||||
v_uint16 v_src0, v_src1;
|
||||
v_load_indexed2(src, ofst + i, v_src0, v_src1);
|
||||
|
||||
v_uint32x4 v_zip0, v_zip1;
|
||||
v_zip(v_src0, v_src1, v_zip0, v_zip1);
|
||||
v_zip(v_zip0, v_zip1, v_src0, v_src1);
|
||||
|
||||
v_int16x8 v_src0123, v_src4567;
|
||||
v_zip(v_reinterpret_as_s16(v_src0), v_reinterpret_as_s16(v_src1), v_src0123, v_src4567);
|
||||
|
||||
v_uint32x4 v_mul = v_load((uint32_t*)m);//AaBbCcDd
|
||||
v_uint32 v_mul = vx_load((uint32_t*)m);//AaBbCcDd
|
||||
v_uint32 v_zip0, v_zip1;
|
||||
v_zip(v_mul, v_mul, v_zip0, v_zip1);//AaAaBbBb CcCcDdDd
|
||||
v_uint32x4 v_res0 = v_reinterpret_as_u32(v_dotprod(v_src0123, v_reinterpret_as_s16(v_zip0)));
|
||||
v_uint32x4 v_res1 = v_reinterpret_as_u32(v_dotprod(v_src4567, v_reinterpret_as_s16(v_zip1)));
|
||||
v_uint32 v_res0 = v_reinterpret_as_u32(v_dotprod(v_reinterpret_as_s16(v_src0), v_reinterpret_as_s16(v_zip0)));
|
||||
v_uint32 v_res1 = v_reinterpret_as_u32(v_dotprod(v_reinterpret_as_s16(v_src1), v_reinterpret_as_s16(v_zip1)));
|
||||
v_store((uint16_t*)dst, v_pack(v_res0, v_res1));//AB1AB2CD1CD2
|
||||
}
|
||||
#endif
|
||||
for (; i < dst_max; i += 1, m += 2)
|
||||
{
|
||||
uint8_t* px = src + 2 * ofst[i];
|
||||
*(dst++) = m[0] * px[0] + m[1] * px[2];
|
||||
*(dst++) = m[0] * px[1] + m[1] * px[3];
|
||||
}
|
||||
srccn[0] = (src + 2 * ofst[dst_width - 1])[0]; srccn[1] = (src + 2 * ofst[dst_width - 1])[1]; srccn[2] = (src + 2 * ofst[dst_width - 1])[0]; srccn[3] = (src + 2 * ofst[dst_width - 1])[1];
|
||||
srccn[4] = (src + 2 * ofst[dst_width - 1])[0]; srccn[5] = (src + 2 * ofst[dst_width - 1])[1]; srccn[6] = (src + 2 * ofst[dst_width - 1])[0]; srccn[7] = (src + 2 * ofst[dst_width - 1])[1];
|
||||
v_srccn = v_load((uint16_t*)srccn);
|
||||
for (; i < dst_width - 3; i += 4, dst += 8) // Points that fall left from src image so became equal to leftmost src point
|
||||
((ufixedpoint16*)(srccn.w))[0] = (src + 2 * ofst[dst_width - 1])[0]; ((ufixedpoint16*)(srccn.w))[1] = (src + 2 * ofst[dst_width - 1])[1];
|
||||
#if CV_SIMD
|
||||
v_srccn = v_reinterpret_as_u16(vx_setall_u32(srccn.d));
|
||||
for (; i <= dst_width - VECSZ/2; i += VECSZ/2, dst += VECSZ) // Points that fall left from src image so became equal to leftmost src point
|
||||
{
|
||||
v_store((uint16_t*)dst, v_srccn);
|
||||
}
|
||||
vx_cleanup();
|
||||
#endif
|
||||
for (; i < dst_width; i++)
|
||||
{
|
||||
*(dst++) = srccn[0];
|
||||
*(dst++) = srccn[1];
|
||||
*(dst++) = ((ufixedpoint16*)(srccn.w))[0];
|
||||
*(dst++) = ((ufixedpoint16*)(srccn.w))[1];
|
||||
}
|
||||
}
|
||||
template <>
|
||||
void hlineResizeCn<uint8_t, ufixedpoint16, 2, true, 4>(uint8_t* src, int, int *ofst, ufixedpoint16* m, ufixedpoint16* dst, int dst_min, int dst_max, int dst_width)
|
||||
{
|
||||
int i = 0;
|
||||
ufixedpoint16 srccn[8] = { src[0], src[1], src[2], src[3], src[0], src[1], src[2], src[3] };
|
||||
v_uint16x8 v_srccn = v_load((uint16_t*)srccn);
|
||||
for (; i < dst_min - 1; i += 2, m += 4, dst += 8) // Points that fall left from src image so became equal to leftmost src point
|
||||
union {
|
||||
uint64_t q;
|
||||
uint16_t w[4];
|
||||
} srccn;
|
||||
((ufixedpoint16*)(srccn.w))[0] = src[0];
|
||||
((ufixedpoint16*)(srccn.w))[1] = src[1];
|
||||
((ufixedpoint16*)(srccn.w))[2] = src[2];
|
||||
((ufixedpoint16*)(srccn.w))[3] = src[3];
|
||||
#if CV_SIMD
|
||||
const int VECSZ = v_uint16::nlanes;
|
||||
v_uint16 v_srccn = v_reinterpret_as_u16(vx_setall_u64(srccn.q));
|
||||
for (; i <= dst_min - VECSZ/4; i += VECSZ/4, m += VECSZ/2, dst += VECSZ) // Points that fall left from src image so became equal to leftmost src point
|
||||
{
|
||||
v_store((uint16_t*)dst, v_srccn);
|
||||
}
|
||||
#endif
|
||||
if (i < dst_min) // Points that fall left from src image so became equal to leftmost src point
|
||||
{
|
||||
*(dst++) = srccn[0];
|
||||
*(dst++) = srccn[1];
|
||||
*(dst++) = srccn[2];
|
||||
*(dst++) = srccn[3];
|
||||
*(dst++) = ((ufixedpoint16*)(srccn.w))[0];
|
||||
*(dst++) = ((ufixedpoint16*)(srccn.w))[1];
|
||||
*(dst++) = ((ufixedpoint16*)(srccn.w))[2];
|
||||
*(dst++) = ((ufixedpoint16*)(srccn.w))[3];
|
||||
i++; m += 2;
|
||||
}
|
||||
for (; i < dst_max - 1 && ofst[i + 1] + 3 <= ofst[dst_width - 1]; i += 2, m += 4, dst += 8)
|
||||
#if CV_SIMD
|
||||
for (; i <= dst_max - VECSZ/2; i += VECSZ/2, m += VECSZ, dst += 2*VECSZ)
|
||||
{
|
||||
v_int16x8 v_src01 = v_reinterpret_as_s16(v_load_expand(src + 4 * ofst[i ]));
|
||||
v_int16x8 v_src23 = v_reinterpret_as_s16(v_load_expand(src + 4 * ofst[i + 1]));
|
||||
v_uint16 v_src0, v_src1, v_src2, v_src3;
|
||||
v_load_indexed4(src, ofst + i, v_src0, v_src1);
|
||||
v_load_indexed4(src, ofst + i + VECSZ/4, v_src2, v_src3);
|
||||
|
||||
v_int16x8 v_tmp0, v_tmp1;
|
||||
v_recombine(v_src01, v_src23, v_tmp0, v_tmp1);
|
||||
v_zip(v_tmp0, v_tmp1, v_src01, v_src23);
|
||||
v_uint32 v_mul0, v_mul1, v_mul2, v_mul3, v_tmp;
|
||||
v_mul0 = vx_load((uint32_t*)m);//AaBbCcDd
|
||||
v_zip(v_mul0, v_mul0, v_mul3, v_tmp );//AaAaBbBb CcCcDdDd
|
||||
v_zip(v_mul3, v_mul3, v_mul0, v_mul1);//AaAaAaAa BbBbBbBb
|
||||
v_zip(v_tmp , v_tmp , v_mul2, v_mul3);//CcCcCcCc DdDdDdDd
|
||||
|
||||
v_int16x8 v_mul01 = v_reinterpret_as_s16(v_setall_u32(((uint32_t*)m)[0]));//AaAaAaAa
|
||||
v_int16x8 v_mul23 = v_reinterpret_as_s16(v_setall_u32(((uint32_t*)m)[1]));//BbBbBbBb
|
||||
v_uint32x4 v_res0 = v_reinterpret_as_u32(v_dotprod(v_src01, v_mul01));
|
||||
v_uint32x4 v_res1 = v_reinterpret_as_u32(v_dotprod(v_src23, v_mul23));
|
||||
v_store((uint16_t*)dst, v_pack(v_res0, v_res1));//AB1AB2CD1CD2
|
||||
v_uint32 v_res0 = v_reinterpret_as_u32(v_dotprod(v_reinterpret_as_s16(v_src0), v_reinterpret_as_s16(v_mul0)));
|
||||
v_uint32 v_res1 = v_reinterpret_as_u32(v_dotprod(v_reinterpret_as_s16(v_src1), v_reinterpret_as_s16(v_mul1)));
|
||||
v_uint32 v_res2 = v_reinterpret_as_u32(v_dotprod(v_reinterpret_as_s16(v_src2), v_reinterpret_as_s16(v_mul2)));
|
||||
v_uint32 v_res3 = v_reinterpret_as_u32(v_dotprod(v_reinterpret_as_s16(v_src3), v_reinterpret_as_s16(v_mul3)));
|
||||
v_store((uint16_t*)dst , v_pack(v_res0, v_res1));
|
||||
v_store((uint16_t*)dst + VECSZ, v_pack(v_res2, v_res3));
|
||||
}
|
||||
#endif
|
||||
for (; i < dst_max; i += 1, m += 2)
|
||||
{
|
||||
uint8_t* px = src + 4 * ofst[i];
|
||||
@ -483,19 +653,22 @@ void hlineResizeCn<uint8_t, ufixedpoint16, 2, true, 4>(uint8_t* src, int, int *o
|
||||
*(dst++) = m[0] * px[2] + m[1] * px[6];
|
||||
*(dst++) = m[0] * px[3] + m[1] * px[7];
|
||||
}
|
||||
srccn[0] = (src + 4 * ofst[dst_width - 1])[0]; srccn[1] = (src + 4 * ofst[dst_width - 1])[1]; srccn[2] = (src + 4 * ofst[dst_width - 1])[2]; srccn[3] = (src + 4 * ofst[dst_width - 1])[3];
|
||||
srccn[4] = (src + 4 * ofst[dst_width - 1])[0]; srccn[5] = (src + 4 * ofst[dst_width - 1])[1]; srccn[6] = (src + 4 * ofst[dst_width - 1])[2]; srccn[7] = (src + 4 * ofst[dst_width - 1])[3];
|
||||
v_srccn = v_load((uint16_t*)srccn);
|
||||
for (; i < dst_width - 1; i += 2, dst += 8) // Points that fall right from src image so became equal to rightmost src point
|
||||
((ufixedpoint16*)(srccn.w))[0] = (src + 4 * ofst[dst_width - 1])[0]; ((ufixedpoint16*)(srccn.w))[1] = (src + 4 * ofst[dst_width - 1])[1];
|
||||
((ufixedpoint16*)(srccn.w))[2] = (src + 4 * ofst[dst_width - 1])[2]; ((ufixedpoint16*)(srccn.w))[3] = (src + 4 * ofst[dst_width - 1])[3];
|
||||
#if CV_SIMD
|
||||
v_srccn = v_reinterpret_as_u16(vx_setall_u64(srccn.q));
|
||||
for (; i <= dst_width - VECSZ/4; i += VECSZ/4, dst += VECSZ) // Points that fall right from src image so became equal to rightmost src point
|
||||
{
|
||||
v_store((uint16_t*)dst, v_srccn);
|
||||
}
|
||||
vx_cleanup();
|
||||
#endif
|
||||
if (i < dst_width)
|
||||
{
|
||||
*(dst++) = srccn[0];
|
||||
*(dst++) = srccn[1];
|
||||
*(dst++) = srccn[2];
|
||||
*(dst++) = srccn[3];
|
||||
*(dst++) = ((ufixedpoint16*)(srccn.w))[0];
|
||||
*(dst++) = ((ufixedpoint16*)(srccn.w))[1];
|
||||
*(dst++) = ((ufixedpoint16*)(srccn.w))[2];
|
||||
*(dst++) = ((ufixedpoint16*)(srccn.w))[3];
|
||||
}
|
||||
}
|
||||
template <>
|
||||
@ -503,40 +676,42 @@ void hlineResizeCn<uint16_t, ufixedpoint32, 2, true, 1>(uint16_t* src, int, int
|
||||
{
|
||||
int i = 0;
|
||||
ufixedpoint32 src_0(src[0]);
|
||||
v_uint32x4 v_src_0 = v_setall_u32(*((uint32_t*)&src_0));
|
||||
for (; i < dst_min - 3; i += 4, m += 8, dst += 4) // Points that fall left from src image so became equal to leftmost src point
|
||||
#if CV_SIMD
|
||||
const int VECSZ = v_uint32::nlanes;
|
||||
v_uint32 v_src_0 = vx_setall_u32(*((uint32_t*)&src_0));
|
||||
for (; i <= dst_min - VECSZ; i += VECSZ, m += 2*VECSZ, dst += VECSZ) // Points that fall left from src image so became equal to leftmost src point
|
||||
{
|
||||
v_store((uint32_t*)dst, v_src_0);
|
||||
}
|
||||
#endif
|
||||
for (; i < dst_min; i++, m += 2)
|
||||
{
|
||||
*(dst++) = src_0;
|
||||
}
|
||||
for (; i < dst_max - 3 && ofst[i + 3] + 8 <= ofst[dst_width - 1]; i += 4, m += 8, dst += 4)
|
||||
#if CV_SIMD
|
||||
for (; i <= dst_max - VECSZ; i += VECSZ, m += 2*VECSZ, dst += VECSZ)
|
||||
{
|
||||
v_uint32x4 v_src0 = v_combine_low(v_load_expand(src + ofst[i]), v_load_expand(src + ofst[i + 1]));
|
||||
v_uint32x4 v_mul0 = v_load((uint32_t*)m);
|
||||
v_uint32x4 v_src1 = v_combine_low(v_load_expand(src + ofst[i + 2]), v_load_expand(src + ofst[i + 3]));
|
||||
v_uint32x4 v_mul1 = v_load((uint32_t*)m + 4);
|
||||
v_uint32x4 v_res0 = v_src0 * v_mul0;//a1a2b1b2
|
||||
v_uint32x4 v_res1 = v_src1 * v_mul1;//c1c2d1d2
|
||||
v_uint32x4 v_tmp0, v_tmp1;
|
||||
v_recombine(v_res0, v_res1, v_tmp0, v_tmp1);//a1a2c1c2 b1b2d1d2
|
||||
v_zip(v_tmp0, v_tmp1, v_res0, v_res1);//a1b1a2b2 c1d1c2d2
|
||||
v_recombine(v_res0, v_res1, v_tmp0, v_tmp1);//a1b1c1d1 a2b2c2d2
|
||||
v_store((uint32_t*)dst, v_tmp0 + v_tmp1);//abcd
|
||||
v_uint32 v_src0, v_src1;
|
||||
v_load_indexed_deinterleave(src, ofst + i, v_src0, v_src1);
|
||||
v_uint32 v_mul0, v_mul1;
|
||||
v_load_deinterleave((uint32_t*)m, v_mul0, v_mul1);
|
||||
v_store((uint32_t*)dst, v_src0 * v_mul0 + v_src1 * v_mul1);//abcd
|
||||
}
|
||||
#endif
|
||||
for (; i < dst_max; i += 1, m += 2)
|
||||
{
|
||||
uint16_t* px = src + ofst[i];
|
||||
*(dst++) = m[0] * px[0] + m[1] * px[1];
|
||||
}
|
||||
src_0 = (src + ofst[dst_width - 1])[0];
|
||||
v_src_0 = v_setall_u32(*((uint32_t*)&src_0));
|
||||
for (; i < dst_width - 3; i += 4, dst += 4)
|
||||
#if CV_SIMD
|
||||
v_src_0 = vx_setall_u32(*((uint32_t*)&src_0));
|
||||
for (; i <= dst_width - VECSZ; i += VECSZ, dst += VECSZ)
|
||||
{
|
||||
v_store((uint32_t*)dst, v_src_0);
|
||||
}
|
||||
vx_cleanup();
|
||||
#endif
|
||||
for (; i < dst_width; i++)
|
||||
{
|
||||
*(dst++) = src_0;
|
||||
@ -552,18 +727,22 @@ void vlineSet(FT* src, ET* dst, int dst_width)
|
||||
template <>
|
||||
void vlineSet<uint8_t, ufixedpoint16>(ufixedpoint16* src, uint8_t* dst, int dst_width)
|
||||
{
|
||||
static const v_uint16x8 v_fixedRound = v_setall_u16((uint16_t)((1U << 8) >> 1));
|
||||
int i = 0;
|
||||
for (; i < dst_width - 15; i += 16, src += 16, dst += 16)
|
||||
#if CV_SIMD
|
||||
const int VECSZ = v_uint8::nlanes;
|
||||
static const v_uint16 v_fixedRound = vx_setall_u16((uint16_t)((1U << 8) >> 1));
|
||||
for (; i <= dst_width - VECSZ; i += VECSZ, src += VECSZ, dst += VECSZ)
|
||||
{
|
||||
v_uint16x8 v_src0 = v_load((uint16_t*)src);
|
||||
v_uint16x8 v_src1 = v_load((uint16_t*)src + 8);
|
||||
v_uint16 v_src0 = vx_load((uint16_t*)src);
|
||||
v_uint16 v_src1 = vx_load((uint16_t*)src + VECSZ/2);
|
||||
|
||||
v_uint16x8 v_res0 = (v_src0 + v_fixedRound) >> 8;
|
||||
v_uint16x8 v_res1 = (v_src1 + v_fixedRound) >> 8;
|
||||
v_uint16 v_res0 = (v_src0 + v_fixedRound) >> 8;
|
||||
v_uint16 v_res1 = (v_src1 + v_fixedRound) >> 8;
|
||||
|
||||
v_store(dst, v_pack(v_res0, v_res1));
|
||||
}
|
||||
vx_cleanup();
|
||||
#endif
|
||||
for (; i < dst_width; i++)
|
||||
*(dst++) = *(src++);
|
||||
}
|
||||
@ -582,36 +761,40 @@ void vlineResize(FT* src, size_t src_step, FT* m, ET* dst, int dst_width)
|
||||
template <>
|
||||
void vlineResize<uint8_t, ufixedpoint16, 2>(ufixedpoint16* src, size_t src_step, ufixedpoint16* m, uint8_t* dst, int dst_width)
|
||||
{
|
||||
static const v_int32x4 v_fixedRound = v_setall_s32((int32_t)((1 << 16) >> 1));
|
||||
static const v_int16x8 v_128 = v_reinterpret_as_s16(v_setall_u16((uint16_t)1<<15));
|
||||
static const v_int8x16 v_128_16 = v_reinterpret_as_s8 (v_setall_u8 ((uint8_t) 1<<7));
|
||||
|
||||
int i = 0;
|
||||
ufixedpoint16* src1 = src + src_step;
|
||||
v_int16x8 v_mul = v_reinterpret_as_s16(v_setall_u32(((uint32_t*)m)[0]));
|
||||
for (; i < dst_width - 15; i += 16, src += 16, src1 += 16, dst += 16)
|
||||
#if CV_SIMD
|
||||
const int VECSZ = v_uint8::nlanes;
|
||||
static const v_int32 v_fixedRound = vx_setall_s32((int32_t)((1 << 16) >> 1));
|
||||
static const v_int16 v_128 = v_reinterpret_as_s16(vx_setall_u16((uint16_t)1<<15));
|
||||
static const v_int8 v_128_16 = v_reinterpret_as_s8 (vx_setall_u8 ((uint8_t) 1<<7));
|
||||
|
||||
v_int16 v_mul = v_reinterpret_as_s16(vx_setall_u32(((uint32_t*)m)[0]));
|
||||
for (; i <= dst_width - VECSZ; i += VECSZ, src += VECSZ, src1 += VECSZ, dst += VECSZ)
|
||||
{
|
||||
v_int16x8 v_src00 = v_load((int16_t*)src);
|
||||
v_int16x8 v_src10 = v_load((int16_t*)src1);
|
||||
v_int16x8 v_tmp0, v_tmp1;
|
||||
v_int16 v_src00 = vx_load((int16_t*)src);
|
||||
v_int16 v_src10 = vx_load((int16_t*)src1);
|
||||
v_int16 v_tmp0, v_tmp1;
|
||||
v_zip(v_add_wrap(v_src00,v_128), v_add_wrap(v_src10,v_128), v_tmp0, v_tmp1);
|
||||
|
||||
v_int32x4 v_res0 = v_dotprod(v_tmp0, v_mul);
|
||||
v_int32x4 v_res1 = v_dotprod(v_tmp1, v_mul);
|
||||
v_int32 v_res0 = v_dotprod(v_tmp0, v_mul);
|
||||
v_int32 v_res1 = v_dotprod(v_tmp1, v_mul);
|
||||
|
||||
v_int16x8 v_src01 = v_load((int16_t*)src + 8);
|
||||
v_int16x8 v_src11 = v_load((int16_t*)src1 + 8);
|
||||
v_int16 v_src01 = vx_load((int16_t*)src + VECSZ/2);
|
||||
v_int16 v_src11 = vx_load((int16_t*)src1 + VECSZ/2);
|
||||
v_zip(v_add_wrap(v_src01,v_128), v_add_wrap(v_src11,v_128), v_tmp0, v_tmp1);
|
||||
v_int32x4 v_res2 = v_dotprod(v_tmp0, v_mul);
|
||||
v_int32x4 v_res3 = v_dotprod(v_tmp1, v_mul);
|
||||
v_int32 v_res2 = v_dotprod(v_tmp0, v_mul);
|
||||
v_int32 v_res3 = v_dotprod(v_tmp1, v_mul);
|
||||
|
||||
v_int8x16 v_res = v_pack(v_pack((v_res0 + v_fixedRound) >> 16,
|
||||
(v_res1 + v_fixedRound) >> 16),
|
||||
v_pack((v_res2 + v_fixedRound) >> 16,
|
||||
(v_res3 + v_fixedRound) >> 16));
|
||||
v_int8 v_res = v_pack(v_pack((v_res0 + v_fixedRound) >> 16,
|
||||
(v_res1 + v_fixedRound) >> 16),
|
||||
v_pack((v_res2 + v_fixedRound) >> 16,
|
||||
(v_res3 + v_fixedRound) >> 16));
|
||||
|
||||
v_store(dst, v_reinterpret_as_u8(v_sub_wrap(v_res, v_128_16)));
|
||||
}
|
||||
vx_cleanup();
|
||||
#endif
|
||||
for (; i < dst_width; i++)
|
||||
{
|
||||
*(dst++) = (uint8_t)(*(src++) * m[0] + *(src1++) * m[1]);
|
||||
|
@ -407,27 +407,25 @@ void cv::pyrMeanShiftFiltering( InputArray _src, OutputArray _dst,
|
||||
cv::Size size = src.size();
|
||||
const uchar* sptr = src.ptr();
|
||||
int sstep = (int)src.step;
|
||||
uchar* mask = 0;
|
||||
int mstep = 0;
|
||||
uchar* dptr;
|
||||
int dstep;
|
||||
float sp = (float)(sp0 / (1 << level));
|
||||
sp = MAX( sp, 1 );
|
||||
|
||||
cv::Mat m;
|
||||
if( level < max_level )
|
||||
{
|
||||
cv::Size size1 = dst_pyramid[level+1].size();
|
||||
cv::Mat m( size.height, size.width, CV_8UC1, mask0.ptr() );
|
||||
m = cv::Mat(size.height, size.width, CV_8UC1, mask0.ptr());
|
||||
dstep = (int)dst_pyramid[level+1].step;
|
||||
dptr = dst_pyramid[level+1].ptr() + dstep + cn;
|
||||
mstep = (int)m.step;
|
||||
mask = m.ptr() + mstep;
|
||||
//cvResize( dst_pyramid[level+1], dst_pyramid[level], CV_INTER_CUBIC );
|
||||
cv::pyrUp( dst_pyramid[level+1], dst_pyramid[level], dst_pyramid[level].size() );
|
||||
m.setTo(cv::Scalar::all(0));
|
||||
|
||||
for( i = 1; i < size1.height-1; i++, dptr += dstep - (size1.width-2)*3, mask += mstep*2 )
|
||||
for( i = 1; i < size1.height-1; i++, dptr += dstep - (size1.width-2)*3)
|
||||
{
|
||||
uchar* mask = m.ptr(1 + i * 2);
|
||||
for( j = 1; j < size1.width-1; j++, dptr += cn )
|
||||
{
|
||||
int c0 = dptr[0], c1 = dptr[1], c2 = dptr[2];
|
||||
@ -437,16 +435,16 @@ void cv::pyrMeanShiftFiltering( InputArray _src, OutputArray _dst,
|
||||
}
|
||||
|
||||
cv::dilate( m, m, cv::Mat() );
|
||||
mask = m.ptr();
|
||||
}
|
||||
|
||||
dptr = dst_pyramid[level].ptr();
|
||||
dstep = (int)dst_pyramid[level].step;
|
||||
|
||||
for( i = 0; i < size.height; i++, sptr += sstep - size.width*3,
|
||||
dptr += dstep - size.width*3,
|
||||
mask += mstep )
|
||||
dptr += dstep - size.width*3
|
||||
)
|
||||
{
|
||||
uchar* mask = m.empty() ? NULL : m.ptr(i);
|
||||
for( j = 0; j < size.width; j++, sptr += 3, dptr += 3 )
|
||||
{
|
||||
int x0 = j, y0 = i, x1, y1, iter;
|
||||
|
@ -1820,22 +1820,13 @@ template <>
|
||||
void hlineSmooth1N<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, const ufixedpoint16* m, int, ufixedpoint16* dst, int len, int)
|
||||
{
|
||||
int lencn = len*cn;
|
||||
v_uint16x8 v_mul = v_setall_u16(*((uint16_t*)m));
|
||||
int i = 0;
|
||||
for (; i <= lencn - 16; i += 16)
|
||||
{
|
||||
v_uint8x16 v_src = v_load(src + i);
|
||||
v_uint16x8 v_tmp0, v_tmp1;
|
||||
v_expand(v_src, v_tmp0, v_tmp1);
|
||||
v_store((uint16_t*)dst + i, v_mul*v_tmp0);
|
||||
v_store((uint16_t*)dst + i + 8, v_mul*v_tmp1);
|
||||
}
|
||||
if (i <= lencn - 8)
|
||||
{
|
||||
v_uint16x8 v_src = v_load_expand(src + i);
|
||||
v_store((uint16_t*)dst + i, v_mul*v_src);
|
||||
i += 8;
|
||||
}
|
||||
#if CV_SIMD
|
||||
const int VECSZ = v_uint16::nlanes;
|
||||
v_uint16 v_mul = vx_setall_u16(*((uint16_t*)m));
|
||||
for (; i <= lencn - VECSZ; i += VECSZ)
|
||||
v_store((uint16_t*)dst + i, v_mul*vx_load_expand(src + i));
|
||||
#endif
|
||||
for (; i < lencn; i++)
|
||||
dst[i] = m[0] * src[i];
|
||||
}
|
||||
@ -1850,20 +1841,11 @@ void hlineSmooth1N1<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, const uf
|
||||
{
|
||||
int lencn = len*cn;
|
||||
int i = 0;
|
||||
for (; i <= lencn - 16; i += 16)
|
||||
{
|
||||
v_uint8x16 v_src = v_load(src + i);
|
||||
v_uint16x8 v_tmp0, v_tmp1;
|
||||
v_expand(v_src, v_tmp0, v_tmp1);
|
||||
v_store((uint16_t*)dst + i, v_shl<8>(v_tmp0));
|
||||
v_store((uint16_t*)dst + i + 8, v_shl<8>(v_tmp1));
|
||||
}
|
||||
if (i <= lencn - 8)
|
||||
{
|
||||
v_uint16x8 v_src = v_load_expand(src + i);
|
||||
v_store((uint16_t*)dst + i, v_shl<8>(v_src));
|
||||
i += 8;
|
||||
}
|
||||
#if CV_SIMD
|
||||
const int VECSZ = v_uint16::nlanes;
|
||||
for (; i <= lencn - VECSZ; i += VECSZ)
|
||||
v_store((uint16_t*)dst + i, v_shl<8>(vx_load_expand(src + i)));
|
||||
#endif
|
||||
for (; i < lencn; i++)
|
||||
dst[i] = src[i];
|
||||
}
|
||||
@ -1926,18 +1908,15 @@ void hlineSmooth3N<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, const ufi
|
||||
|
||||
src += cn; dst += cn;
|
||||
int i = cn, lencn = (len - 1)*cn;
|
||||
v_uint16x8 v_mul0 = v_setall_u16(*((uint16_t*)m));
|
||||
v_uint16x8 v_mul1 = v_setall_u16(*((uint16_t*)(m + 1)));
|
||||
v_uint16x8 v_mul2 = v_setall_u16(*((uint16_t*)(m + 2)));
|
||||
for (; i <= lencn - 16; i += 16, src += 16, dst += 16)
|
||||
{
|
||||
v_uint16x8 v_src00, v_src01, v_src10, v_src11, v_src20, v_src21;
|
||||
v_expand(v_load(src - cn), v_src00, v_src01);
|
||||
v_expand(v_load(src), v_src10, v_src11);
|
||||
v_expand(v_load(src + cn), v_src20, v_src21);
|
||||
v_store((uint16_t*)dst, v_src00 * v_mul0 + v_src10 * v_mul1 + v_src20 * v_mul2);
|
||||
v_store((uint16_t*)dst + 8, v_src01 * v_mul0 + v_src11 * v_mul1 + v_src21 * v_mul2);
|
||||
}
|
||||
#if CV_SIMD
|
||||
const uint16_t* _m = (const uint16_t*)m;
|
||||
const int VECSZ = v_uint16::nlanes;
|
||||
v_uint16 v_mul0 = vx_setall_u16(_m[0]);
|
||||
v_uint16 v_mul1 = vx_setall_u16(_m[1]);
|
||||
v_uint16 v_mul2 = vx_setall_u16(_m[2]);
|
||||
for (; i <= lencn - VECSZ; i += VECSZ, src += VECSZ, dst += VECSZ)
|
||||
v_store((uint16_t*)dst, vx_load_expand(src - cn) * v_mul0 + vx_load_expand(src) * v_mul1 + vx_load_expand(src + cn) * v_mul2);
|
||||
#endif
|
||||
for (; i < lencn; i++, src++, dst++)
|
||||
*dst = m[0] * src[-cn] + m[1] * src[0] + m[2] * src[cn];
|
||||
|
||||
@ -2017,15 +1996,11 @@ void hlineSmooth3N121<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, const
|
||||
|
||||
src += cn; dst += cn;
|
||||
int i = cn, lencn = (len - 1)*cn;
|
||||
for (; i <= lencn - 16; i += 16, src += 16, dst += 16)
|
||||
{
|
||||
v_uint16x8 v_src00, v_src01, v_src10, v_src11, v_src20, v_src21;
|
||||
v_expand(v_load(src - cn), v_src00, v_src01);
|
||||
v_expand(v_load(src), v_src10, v_src11);
|
||||
v_expand(v_load(src + cn), v_src20, v_src21);
|
||||
v_store((uint16_t*)dst, (v_src00 + v_src20 + (v_src10 << 1)) << 6);
|
||||
v_store((uint16_t*)dst + 8, (v_src01 + v_src21 + (v_src11 << 1)) << 6);
|
||||
}
|
||||
#if CV_SIMD
|
||||
const int VECSZ = v_uint16::nlanes;
|
||||
for (; i <= lencn - VECSZ; i += VECSZ, src += VECSZ, dst += VECSZ)
|
||||
v_store((uint16_t*)dst, (vx_load_expand(src - cn) + vx_load_expand(src + cn) + (vx_load_expand(src) << 1)) << 6);
|
||||
#endif
|
||||
for (; i < lencn; i++, src++, dst++)
|
||||
*((uint16_t*)dst) = (uint16_t(src[-cn]) + uint16_t(src[cn]) + (uint16_t(src[0]) << 1)) << 6;
|
||||
|
||||
@ -2108,17 +2083,14 @@ void hlineSmooth3Naba<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, const
|
||||
|
||||
src += cn; dst += cn;
|
||||
int i = cn, lencn = (len - 1)*cn;
|
||||
v_uint16x8 v_mul0 = v_setall_u16(*((uint16_t*)m));
|
||||
v_uint16x8 v_mul1 = v_setall_u16(*((uint16_t*)m+1));
|
||||
for (; i <= lencn - 16; i += 16, src += 16, dst += 16)
|
||||
{
|
||||
v_uint16x8 v_src00, v_src01, v_src10, v_src11, v_src20, v_src21;
|
||||
v_expand(v_load(src - cn), v_src00, v_src01);
|
||||
v_expand(v_load(src), v_src10, v_src11);
|
||||
v_expand(v_load(src + cn), v_src20, v_src21);
|
||||
v_store((uint16_t*)dst, (v_src00 + v_src20) * v_mul0 + v_src10 * v_mul1);
|
||||
v_store((uint16_t*)dst + 8, (v_src01 + v_src21) * v_mul0 + v_src11 * v_mul1);
|
||||
}
|
||||
#if CV_SIMD
|
||||
const uint16_t* _m = (const uint16_t*)m;
|
||||
const int VECSZ = v_uint16::nlanes;
|
||||
v_uint16 v_mul0 = vx_setall_u16(_m[0]);
|
||||
v_uint16 v_mul1 = vx_setall_u16(_m[1]);
|
||||
for (; i <= lencn - VECSZ; i += VECSZ, src += VECSZ, dst += VECSZ)
|
||||
v_store((uint16_t*)dst, (vx_load_expand(src - cn) + vx_load_expand(src + cn)) * v_mul0 + vx_load_expand(src) * v_mul1);
|
||||
#endif
|
||||
for (; i < lencn; i++, src++, dst++)
|
||||
*((uint16_t*)dst) = ((uint16_t*)m)[1] * src[0] + ((uint16_t*)m)[0] * ((uint16_t)(src[-cn]) + (uint16_t)(src[cn]));
|
||||
|
||||
@ -2304,22 +2276,17 @@ void hlineSmooth5N<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, const ufi
|
||||
|
||||
src += 2 * cn; dst += 2 * cn;
|
||||
int i = 2*cn, lencn = (len - 2)*cn;
|
||||
v_uint16x8 v_mul0 = v_setall_u16(*((uint16_t*)m));
|
||||
v_uint16x8 v_mul1 = v_setall_u16(*((uint16_t*)(m + 1)));
|
||||
v_uint16x8 v_mul2 = v_setall_u16(*((uint16_t*)(m + 2)));
|
||||
v_uint16x8 v_mul3 = v_setall_u16(*((uint16_t*)(m + 3)));
|
||||
v_uint16x8 v_mul4 = v_setall_u16(*((uint16_t*)(m + 4)));
|
||||
for (; i <= lencn - 16; i += 16, src += 16, dst += 16)
|
||||
{
|
||||
v_uint16x8 v_src00, v_src01, v_src10, v_src11, v_src20, v_src21, v_src30, v_src31, v_src40, v_src41;
|
||||
v_expand(v_load(src - 2*cn), v_src00, v_src01);
|
||||
v_expand(v_load(src - cn), v_src10, v_src11);
|
||||
v_expand(v_load(src), v_src20, v_src21);
|
||||
v_expand(v_load(src + cn), v_src30, v_src31);
|
||||
v_expand(v_load(src + 2*cn), v_src40, v_src41);
|
||||
v_store((uint16_t*)dst, v_src00 * v_mul0 + v_src10 * v_mul1 + v_src20 * v_mul2 + v_src30 * v_mul3 + v_src40 * v_mul4);
|
||||
v_store((uint16_t*)dst + 8, v_src01 * v_mul0 + v_src11 * v_mul1 + v_src21 * v_mul2 + v_src31 * v_mul3 + v_src41 * v_mul4);
|
||||
}
|
||||
#if CV_SIMD
|
||||
const uint16_t* _m = (const uint16_t*)m;
|
||||
const int VECSZ = v_uint16::nlanes;
|
||||
v_uint16 v_mul0 = vx_setall_u16(_m[0]);
|
||||
v_uint16 v_mul1 = vx_setall_u16(_m[1]);
|
||||
v_uint16 v_mul2 = vx_setall_u16(_m[2]);
|
||||
v_uint16 v_mul3 = vx_setall_u16(_m[3]);
|
||||
v_uint16 v_mul4 = vx_setall_u16(_m[4]);
|
||||
for (; i <= lencn - VECSZ; i += VECSZ, src += VECSZ, dst += VECSZ)
|
||||
v_store((uint16_t*)dst, vx_load_expand(src - 2 * cn) * v_mul0 + vx_load_expand(src - cn) * v_mul1 + vx_load_expand(src) * v_mul2 + vx_load_expand(src + cn) * v_mul3 + vx_load_expand(src + 2 * cn) * v_mul4);
|
||||
#endif
|
||||
for (; i < lencn; i++, src++, dst++)
|
||||
*dst = m[0] * src[-2*cn] + m[1] * src[-cn] + m[2] * src[0] + m[3] * src[cn] + m[4] * src[2*cn];
|
||||
|
||||
@ -2517,18 +2484,12 @@ void hlineSmooth5N14641<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, cons
|
||||
|
||||
src += 2 * cn; dst += 2 * cn;
|
||||
int i = 2 * cn, lencn = (len - 2)*cn;
|
||||
v_uint16x8 v_6 = v_setall_u16(6);
|
||||
for (; i <= lencn - 16; i += 16, src += 16, dst += 16)
|
||||
{
|
||||
v_uint16x8 v_src00, v_src01, v_src10, v_src11, v_src20, v_src21, v_src30, v_src31, v_src40, v_src41;
|
||||
v_expand(v_load(src - 2*cn), v_src00, v_src01);
|
||||
v_expand(v_load(src - cn), v_src10, v_src11);
|
||||
v_expand(v_load(src), v_src20, v_src21);
|
||||
v_expand(v_load(src + cn), v_src30, v_src31);
|
||||
v_expand(v_load(src + 2*cn), v_src40, v_src41);
|
||||
v_store((uint16_t*)dst, (v_src20 * v_6 + ((v_src10 + v_src30) << 2) + v_src00 + v_src40) << 4);
|
||||
v_store((uint16_t*)dst + 8, (v_src21 * v_6 + ((v_src11 + v_src31) << 2) + v_src01 + v_src41) << 4);
|
||||
}
|
||||
#if CV_SIMD
|
||||
const int VECSZ = v_uint16::nlanes;
|
||||
v_uint16 v_6 = vx_setall_u16(6);
|
||||
for (; i <= lencn - VECSZ; i += VECSZ, src += VECSZ, dst += VECSZ)
|
||||
v_store((uint16_t*)dst, (vx_load_expand(src) * v_6 + ((vx_load_expand(src - cn) + vx_load_expand(src + cn)) << 2) + vx_load_expand(src - 2 * cn) + vx_load_expand(src + 2 * cn)) << 4);
|
||||
#endif
|
||||
for (; i < lencn; i++, src++, dst++)
|
||||
*((uint16_t*)dst) = (uint16_t(src[0]) * 6 + ((uint16_t(src[-cn]) + uint16_t(src[cn])) << 2) + uint16_t(src[-2 * cn]) + uint16_t(src[2 * cn])) << 4;
|
||||
|
||||
@ -2721,20 +2682,15 @@ void hlineSmooth5Nabcba<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, cons
|
||||
|
||||
src += 2 * cn; dst += 2 * cn;
|
||||
int i = 2 * cn, lencn = (len - 2)*cn;
|
||||
v_uint16x8 v_mul0 = v_setall_u16(*((uint16_t*)m));
|
||||
v_uint16x8 v_mul1 = v_setall_u16(*((uint16_t*)(m + 1)));
|
||||
v_uint16x8 v_mul2 = v_setall_u16(*((uint16_t*)(m + 2)));
|
||||
for (; i <= lencn - 16; i += 16, src += 16, dst += 16)
|
||||
{
|
||||
v_uint16x8 v_src00, v_src01, v_src10, v_src11, v_src20, v_src21, v_src30, v_src31, v_src40, v_src41;
|
||||
v_expand(v_load(src - 2 * cn), v_src00, v_src01);
|
||||
v_expand(v_load(src - cn), v_src10, v_src11);
|
||||
v_expand(v_load(src), v_src20, v_src21);
|
||||
v_expand(v_load(src + cn), v_src30, v_src31);
|
||||
v_expand(v_load(src + 2 * cn), v_src40, v_src41);
|
||||
v_store((uint16_t*)dst, (v_src00 + v_src40) * v_mul0 + (v_src10 + v_src30)* v_mul1 + v_src20 * v_mul2);
|
||||
v_store((uint16_t*)dst + 8, (v_src01 + v_src41) * v_mul0 + (v_src11 + v_src31) * v_mul1 + v_src21 * v_mul2);
|
||||
}
|
||||
#if CV_SIMD
|
||||
const uint16_t* _m = (const uint16_t*)m;
|
||||
const int VECSZ = v_uint16::nlanes;
|
||||
v_uint16 v_mul0 = vx_setall_u16(_m[0]);
|
||||
v_uint16 v_mul1 = vx_setall_u16(_m[1]);
|
||||
v_uint16 v_mul2 = vx_setall_u16(_m[2]);
|
||||
for (; i <= lencn - VECSZ; i += VECSZ, src += VECSZ, dst += VECSZ)
|
||||
v_store((uint16_t*)dst, (vx_load_expand(src - 2 * cn) + vx_load_expand(src + 2 * cn)) * v_mul0 + (vx_load_expand(src - cn) + vx_load_expand(src + cn))* v_mul1 + vx_load_expand(src) * v_mul2);
|
||||
#endif
|
||||
for (; i < lencn; i++, src++, dst++)
|
||||
*((uint16_t*)dst) = ((uint16_t*)m)[0] * ((uint16_t)(src[-2 * cn]) + (uint16_t)(src[2 * cn])) + ((uint16_t*)m)[1] * ((uint16_t)(src[-cn]) + (uint16_t)(src[cn])) + ((uint16_t*)m)[2] * src[0];
|
||||
|
||||
@ -2844,23 +2800,16 @@ void hlineSmooth<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, const ufixe
|
||||
}
|
||||
i *= cn;
|
||||
int lencn = (len - post_shift + 1)*cn;
|
||||
for (; i <= lencn - 16; i+=16, src+=16, dst+=16)
|
||||
#if CV_SIMD
|
||||
const int VECSZ = v_uint16::nlanes;
|
||||
for (; i <= lencn - VECSZ; i+=VECSZ, src+=VECSZ, dst+=VECSZ)
|
||||
{
|
||||
v_uint16x8 v_src0, v_src1;
|
||||
v_uint16x8 v_mul = v_setall_u16(*((uint16_t*)m));
|
||||
v_expand(v_load(src), v_src0, v_src1);
|
||||
v_uint16x8 v_res0 = v_src0 * v_mul;
|
||||
v_uint16x8 v_res1 = v_src1 * v_mul;
|
||||
v_uint16 v_res0 = vx_load_expand(src) * vx_setall_u16(*((uint16_t*)m));
|
||||
for (int j = 1; j < n; j++)
|
||||
{
|
||||
v_mul = v_setall_u16(*((uint16_t*)(m + j)));
|
||||
v_expand(v_load(src + j * cn), v_src0, v_src1);
|
||||
v_res0 += v_src0 * v_mul;
|
||||
v_res1 += v_src1 * v_mul;
|
||||
}
|
||||
v_res0 += vx_load_expand(src + j * cn) * vx_setall_u16(*((uint16_t*)(m + j)));
|
||||
v_store((uint16_t*)dst, v_res0);
|
||||
v_store((uint16_t*)dst+8, v_res1);
|
||||
}
|
||||
#endif
|
||||
for (; i < lencn; i++, src++, dst++)
|
||||
{
|
||||
*dst = m[0] * src[0];
|
||||
@ -2970,26 +2919,16 @@ void hlineSmoothONa_yzy_a<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, co
|
||||
}
|
||||
i *= cn;
|
||||
int lencn = (len - post_shift + 1)*cn;
|
||||
for (; i <= lencn - 16; i += 16, src += 16, dst += 16)
|
||||
#if CV_SIMD
|
||||
const int VECSZ = v_uint16::nlanes;
|
||||
for (; i <= lencn - VECSZ; i += VECSZ, src += VECSZ, dst += VECSZ)
|
||||
{
|
||||
v_uint16x8 v_src00, v_src01, v_srcN00, v_srcN01;
|
||||
|
||||
v_uint16x8 v_mul = v_setall_u16(*((uint16_t*)(m + pre_shift)));
|
||||
v_expand(v_load(src + pre_shift * cn), v_src00, v_src01);
|
||||
v_uint16x8 v_res0 = v_src00 * v_mul;
|
||||
v_uint16x8 v_res1 = v_src01 * v_mul;
|
||||
v_uint16 v_res0 = vx_load_expand(src + pre_shift * cn) * vx_setall_u16(*((uint16_t*)(m + pre_shift)));
|
||||
for (int j = 0; j < pre_shift; j ++)
|
||||
{
|
||||
v_mul = v_setall_u16(*((uint16_t*)(m + j)));
|
||||
v_expand(v_load(src + j * cn), v_src00, v_src01);
|
||||
v_expand(v_load(src + (n - 1 - j)*cn), v_srcN00, v_srcN01);
|
||||
v_res0 += (v_src00 + v_srcN00) * v_mul;
|
||||
v_res1 += (v_src01 + v_srcN01) * v_mul;
|
||||
}
|
||||
|
||||
v_res0 += (vx_load_expand(src + j * cn) + vx_load_expand(src + (n - 1 - j)*cn)) * vx_setall_u16(*((uint16_t*)(m + j)));
|
||||
v_store((uint16_t*)dst, v_res0);
|
||||
v_store((uint16_t*)dst + 8, v_res1);
|
||||
}
|
||||
#endif
|
||||
for (; i < lencn; i++, src++, dst++)
|
||||
{
|
||||
*dst = m[pre_shift] * src[pre_shift*cn];
|
||||
@ -3025,28 +2964,13 @@ template <>
|
||||
void vlineSmooth1N<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src, const ufixedpoint16* m, int, uint8_t* dst, int len)
|
||||
{
|
||||
const ufixedpoint16* src0 = src[0];
|
||||
v_uint16x8 v_mul = v_setall_u16(*((uint16_t*)m));
|
||||
#if CV_SSE2
|
||||
v_uint16x8 v_1 = v_setall_u16(1);
|
||||
v_mul += v_mul;
|
||||
#endif
|
||||
int i = 0;
|
||||
for (; i <= len - 16; i += 16)
|
||||
{
|
||||
v_uint16x8 v_src0 = v_load((uint16_t*)src0 + i);
|
||||
v_uint16x8 v_src1 = v_load((uint16_t*)src0 + i + 8);
|
||||
v_uint8x16 v_res;
|
||||
#if CV_SSE2
|
||||
v_res.val = _mm_packus_epi16(_mm_srli_epi16(_mm_add_epi16(v_1.val, _mm_mulhi_epu16(v_src0.val, v_mul.val)),1),
|
||||
_mm_srli_epi16(_mm_add_epi16(v_1.val, _mm_mulhi_epu16(v_src1.val, v_mul.val)),1));
|
||||
#else
|
||||
v_uint32x4 v_res0, v_res1, v_res2, v_res3;
|
||||
v_mul_expand(v_src0, v_mul, v_res0, v_res1);
|
||||
v_mul_expand(v_src1, v_mul, v_res2, v_res3);
|
||||
v_res = v_pack(v_rshr_pack<16>(v_res0, v_res1), v_rshr_pack<16>(v_res2, v_res3));
|
||||
#if CV_SIMD
|
||||
const int VECSZ = v_uint16::nlanes;
|
||||
v_uint16 v_mul = vx_setall_u16(*((uint16_t*)m)<<1);
|
||||
for (; i <= len - VECSZ; i += VECSZ)
|
||||
v_rshr_pack_store<1>(dst + i, v_mul_hi(vx_load((uint16_t*)src0 + i), v_mul));
|
||||
#endif
|
||||
v_store(dst + i, v_res);
|
||||
}
|
||||
for (; i < len; i++)
|
||||
dst[i] = m[0] * src0[i];
|
||||
}
|
||||
@ -3062,8 +2986,11 @@ void vlineSmooth1N1<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src, co
|
||||
{
|
||||
const ufixedpoint16* src0 = src[0];
|
||||
int i = 0;
|
||||
for (; i <= len - 8; i += 8)
|
||||
v_rshr_pack_store<8>(dst + i, v_load((uint16_t*)(src0 + i)));
|
||||
#if CV_SIMD
|
||||
const int VECSZ = v_uint16::nlanes;
|
||||
for (; i <= len - VECSZ; i += VECSZ)
|
||||
v_rshr_pack_store<8>(dst + i, vx_load((uint16_t*)(src0 + i)));
|
||||
#endif
|
||||
for (; i < len; i++)
|
||||
dst[i] = src0[i];
|
||||
}
|
||||
@ -3077,46 +3004,51 @@ template <>
|
||||
void vlineSmooth3N<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src, const ufixedpoint16* m, int, uint8_t* dst, int len)
|
||||
{
|
||||
int i = 0;
|
||||
static const v_int16x8 v_128 = v_reinterpret_as_s16(v_setall_u16((uint16_t)1 << 15));
|
||||
v_int32x4 v_128_4 = v_setall_s32(128 << 16);
|
||||
if (len > 7)
|
||||
#if CV_SIMD
|
||||
static const v_int16 v_128 = v_reinterpret_as_s16(vx_setall_u16((uint16_t)1 << 15));
|
||||
v_int32 v_128_4 = vx_setall_s32(128 << 16);
|
||||
const int VECSZ = v_uint16::nlanes;
|
||||
if (len >= VECSZ)
|
||||
{
|
||||
ufixedpoint32 val[] = { (m[0] + m[1] + m[2]) * ufixedpoint16((uint8_t)128) };
|
||||
v_128_4 = v_setall_s32(*((int32_t*)val));
|
||||
v_128_4 = vx_setall_s32(*((int32_t*)val));
|
||||
}
|
||||
v_int16x8 v_mul01 = v_reinterpret_as_s16(v_setall_u32(*((uint32_t*)m)));
|
||||
v_int16x8 v_mul2 = v_reinterpret_as_s16(v_setall_u16(*((uint16_t*)(m + 2))));
|
||||
for (; i <= len - 32; i += 32)
|
||||
v_int16 v_mul01 = v_reinterpret_as_s16(vx_setall_u32(*((uint32_t*)m)));
|
||||
v_int16 v_mul2 = v_reinterpret_as_s16(vx_setall_u16(*((uint16_t*)(m + 2))));
|
||||
for (; i <= len - 4*VECSZ; i += 4*VECSZ)
|
||||
{
|
||||
v_int16x8 v_src00, v_src10, v_src01, v_src11, v_src02, v_src12, v_src03, v_src13;
|
||||
v_int16x8 v_tmp0, v_tmp1;
|
||||
v_int16 v_src00, v_src10, v_src01, v_src11, v_src02, v_src12, v_src03, v_src13;
|
||||
v_int16 v_tmp0, v_tmp1;
|
||||
|
||||
v_src00 = v_load((int16_t*)(src[0]) + i);
|
||||
v_src01 = v_load((int16_t*)(src[0]) + i + 8);
|
||||
v_src02 = v_load((int16_t*)(src[0]) + i + 16);
|
||||
v_src03 = v_load((int16_t*)(src[0]) + i + 24);
|
||||
v_src10 = v_load((int16_t*)(src[1]) + i);
|
||||
v_src11 = v_load((int16_t*)(src[1]) + i + 8);
|
||||
v_src12 = v_load((int16_t*)(src[1]) + i + 16);
|
||||
v_src13 = v_load((int16_t*)(src[1]) + i + 24);
|
||||
const int16_t* src0 = (const int16_t*)src[0] + i;
|
||||
const int16_t* src1 = (const int16_t*)src[1] + i;
|
||||
v_src00 = vx_load(src0);
|
||||
v_src01 = vx_load(src0 + VECSZ);
|
||||
v_src02 = vx_load(src0 + 2*VECSZ);
|
||||
v_src03 = vx_load(src0 + 3*VECSZ);
|
||||
v_src10 = vx_load(src1);
|
||||
v_src11 = vx_load(src1 + VECSZ);
|
||||
v_src12 = vx_load(src1 + 2*VECSZ);
|
||||
v_src13 = vx_load(src1 + 3*VECSZ);
|
||||
v_zip(v_add_wrap(v_src00, v_128), v_add_wrap(v_src10, v_128), v_tmp0, v_tmp1);
|
||||
v_int32x4 v_res0 = v_dotprod(v_tmp0, v_mul01);
|
||||
v_int32x4 v_res1 = v_dotprod(v_tmp1, v_mul01);
|
||||
v_int32 v_res0 = v_dotprod(v_tmp0, v_mul01);
|
||||
v_int32 v_res1 = v_dotprod(v_tmp1, v_mul01);
|
||||
v_zip(v_add_wrap(v_src01, v_128), v_add_wrap(v_src11, v_128), v_tmp0, v_tmp1);
|
||||
v_int32x4 v_res2 = v_dotprod(v_tmp0, v_mul01);
|
||||
v_int32x4 v_res3 = v_dotprod(v_tmp1, v_mul01);
|
||||
v_int32 v_res2 = v_dotprod(v_tmp0, v_mul01);
|
||||
v_int32 v_res3 = v_dotprod(v_tmp1, v_mul01);
|
||||
v_zip(v_add_wrap(v_src02, v_128), v_add_wrap(v_src12, v_128), v_tmp0, v_tmp1);
|
||||
v_int32x4 v_res4 = v_dotprod(v_tmp0, v_mul01);
|
||||
v_int32x4 v_res5 = v_dotprod(v_tmp1, v_mul01);
|
||||
v_int32 v_res4 = v_dotprod(v_tmp0, v_mul01);
|
||||
v_int32 v_res5 = v_dotprod(v_tmp1, v_mul01);
|
||||
v_zip(v_add_wrap(v_src03, v_128), v_add_wrap(v_src13, v_128), v_tmp0, v_tmp1);
|
||||
v_int32x4 v_res6 = v_dotprod(v_tmp0, v_mul01);
|
||||
v_int32x4 v_res7 = v_dotprod(v_tmp1, v_mul01);
|
||||
v_int32 v_res6 = v_dotprod(v_tmp0, v_mul01);
|
||||
v_int32 v_res7 = v_dotprod(v_tmp1, v_mul01);
|
||||
|
||||
v_int32x4 v_resj0, v_resj1;
|
||||
v_src00 = v_load((int16_t*)(src[2]) + i);
|
||||
v_src01 = v_load((int16_t*)(src[2]) + i + 8);
|
||||
v_src02 = v_load((int16_t*)(src[2]) + i + 16);
|
||||
v_src03 = v_load((int16_t*)(src[2]) + i + 24);
|
||||
v_int32 v_resj0, v_resj1;
|
||||
const int16_t* src2 = (const int16_t*)src[2] + i;
|
||||
v_src00 = vx_load(src2);
|
||||
v_src01 = vx_load(src2 + VECSZ);
|
||||
v_src02 = vx_load(src2 + 2*VECSZ);
|
||||
v_src03 = vx_load(src2 + 3*VECSZ);
|
||||
v_mul_expand(v_add_wrap(v_src00, v_128), v_mul2, v_resj0, v_resj1);
|
||||
v_res0 += v_resj0;
|
||||
v_res1 += v_resj1;
|
||||
@ -3139,11 +3071,12 @@ void vlineSmooth3N<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src, con
|
||||
v_res6 += v_128_4;
|
||||
v_res7 += v_128_4;
|
||||
|
||||
v_store(dst + i , v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res0, v_res1)),
|
||||
v_reinterpret_as_u16(v_rshr_pack<16>(v_res2, v_res3))));
|
||||
v_store(dst + i + 16, v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res4, v_res5)),
|
||||
v_reinterpret_as_u16(v_rshr_pack<16>(v_res6, v_res7))));
|
||||
v_store(dst + i , v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res0, v_res1)),
|
||||
v_reinterpret_as_u16(v_rshr_pack<16>(v_res2, v_res3))));
|
||||
v_store(dst + i + 2*VECSZ, v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res4, v_res5)),
|
||||
v_reinterpret_as_u16(v_rshr_pack<16>(v_res6, v_res7))));
|
||||
}
|
||||
#endif
|
||||
for (; i < len; i++)
|
||||
dst[i] = m[0] * src[0][i] + m[1] * src[1][i] + m[2] * src[2][i];
|
||||
}
|
||||
@ -3157,18 +3090,21 @@ template <>
|
||||
void vlineSmooth3N121<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src, const ufixedpoint16*, int, uint8_t* dst, int len)
|
||||
{
|
||||
int i = 0;
|
||||
for (; i <= len - 16; i += 16)
|
||||
#if CV_SIMD
|
||||
const int VECSZ = v_uint16::nlanes;
|
||||
for (; i <= len - 2*VECSZ; i += 2*VECSZ)
|
||||
{
|
||||
v_uint32x4 v_src00, v_src01, v_src02, v_src03, v_src10, v_src11, v_src12, v_src13, v_src20, v_src21, v_src22, v_src23;
|
||||
v_expand(v_load((uint16_t*)(src[0]) + i), v_src00, v_src01);
|
||||
v_expand(v_load((uint16_t*)(src[0]) + i + 8), v_src02, v_src03);
|
||||
v_expand(v_load((uint16_t*)(src[1]) + i), v_src10, v_src11);
|
||||
v_expand(v_load((uint16_t*)(src[1]) + i + 8), v_src12, v_src13);
|
||||
v_expand(v_load((uint16_t*)(src[2]) + i), v_src20, v_src21);
|
||||
v_expand(v_load((uint16_t*)(src[2]) + i + 8), v_src22, v_src23);
|
||||
v_uint32 v_src00, v_src01, v_src02, v_src03, v_src10, v_src11, v_src12, v_src13, v_src20, v_src21, v_src22, v_src23;
|
||||
v_expand(vx_load((uint16_t*)(src[0]) + i), v_src00, v_src01);
|
||||
v_expand(vx_load((uint16_t*)(src[0]) + i + VECSZ), v_src02, v_src03);
|
||||
v_expand(vx_load((uint16_t*)(src[1]) + i), v_src10, v_src11);
|
||||
v_expand(vx_load((uint16_t*)(src[1]) + i + VECSZ), v_src12, v_src13);
|
||||
v_expand(vx_load((uint16_t*)(src[2]) + i), v_src20, v_src21);
|
||||
v_expand(vx_load((uint16_t*)(src[2]) + i + VECSZ), v_src22, v_src23);
|
||||
v_store(dst + i, v_pack(v_rshr_pack<10>(v_src00 + v_src20 + (v_src10 + v_src10), v_src01 + v_src21 + (v_src11 + v_src11)),
|
||||
v_rshr_pack<10>(v_src02 + v_src22 + (v_src12 + v_src12), v_src03 + v_src23 + (v_src13 + v_src13))));
|
||||
}
|
||||
#endif
|
||||
for (; i < len; i++)
|
||||
dst[i] = (((uint32_t)(((uint16_t*)(src[0]))[i]) + (uint32_t)(((uint16_t*)(src[2]))[i]) + ((uint32_t)(((uint16_t*)(src[1]))[i]) << 1)) + (1 << 9)) >> 10;
|
||||
}
|
||||
@ -3182,95 +3118,102 @@ template <>
|
||||
void vlineSmooth5N<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src, const ufixedpoint16* m, int, uint8_t* dst, int len)
|
||||
{
|
||||
int i = 0;
|
||||
static const v_int16x8 v_128 = v_reinterpret_as_s16(v_setall_u16((uint16_t)1 << 15));
|
||||
v_int32x4 v_128_4 = v_setall_s32(128 << 16);
|
||||
if (len > 7)
|
||||
#if CV_SIMD
|
||||
const int VECSZ = v_uint16::nlanes;
|
||||
if (len >= 4 * VECSZ)
|
||||
{
|
||||
ufixedpoint32 val[] = { (m[0] + m[1] + m[2] + m[3] + m[4]) * ufixedpoint16((uint8_t)128) };
|
||||
v_128_4 = v_setall_s32(*((int32_t*)val));
|
||||
}
|
||||
v_int16x8 v_mul01 = v_reinterpret_as_s16(v_setall_u32(*((uint32_t*)m)));
|
||||
v_int16x8 v_mul23 = v_reinterpret_as_s16(v_setall_u32(*((uint32_t*)(m + 2))));
|
||||
v_int16x8 v_mul4 = v_reinterpret_as_s16(v_setall_u16(*((uint16_t*)(m + 4))));
|
||||
for (; i <= len - 32; i += 32)
|
||||
{
|
||||
v_int16x8 v_src00, v_src10, v_src01, v_src11, v_src02, v_src12, v_src03, v_src13;
|
||||
v_int16x8 v_tmp0, v_tmp1;
|
||||
|
||||
v_src00 = v_load((int16_t*)(src[0]) + i);
|
||||
v_src01 = v_load((int16_t*)(src[0]) + i + 8);
|
||||
v_src02 = v_load((int16_t*)(src[0]) + i + 16);
|
||||
v_src03 = v_load((int16_t*)(src[0]) + i + 24);
|
||||
v_src10 = v_load((int16_t*)(src[1]) + i);
|
||||
v_src11 = v_load((int16_t*)(src[1]) + i + 8);
|
||||
v_src12 = v_load((int16_t*)(src[1]) + i + 16);
|
||||
v_src13 = v_load((int16_t*)(src[1]) + i + 24);
|
||||
v_zip(v_add_wrap(v_src00, v_128), v_add_wrap(v_src10, v_128), v_tmp0, v_tmp1);
|
||||
v_int32x4 v_res0 = v_dotprod(v_tmp0, v_mul01);
|
||||
v_int32x4 v_res1 = v_dotprod(v_tmp1, v_mul01);
|
||||
v_zip(v_add_wrap(v_src01, v_128), v_add_wrap(v_src11, v_128), v_tmp0, v_tmp1);
|
||||
v_int32x4 v_res2 = v_dotprod(v_tmp0, v_mul01);
|
||||
v_int32x4 v_res3 = v_dotprod(v_tmp1, v_mul01);
|
||||
v_zip(v_add_wrap(v_src02, v_128), v_add_wrap(v_src12, v_128), v_tmp0, v_tmp1);
|
||||
v_int32x4 v_res4 = v_dotprod(v_tmp0, v_mul01);
|
||||
v_int32x4 v_res5 = v_dotprod(v_tmp1, v_mul01);
|
||||
v_zip(v_add_wrap(v_src03, v_128), v_add_wrap(v_src13, v_128), v_tmp0, v_tmp1);
|
||||
v_int32x4 v_res6 = v_dotprod(v_tmp0, v_mul01);
|
||||
v_int32x4 v_res7 = v_dotprod(v_tmp1, v_mul01);
|
||||
|
||||
v_src00 = v_load((int16_t*)(src[2]) + i);
|
||||
v_src01 = v_load((int16_t*)(src[2]) + i + 8);
|
||||
v_src02 = v_load((int16_t*)(src[2]) + i + 16);
|
||||
v_src03 = v_load((int16_t*)(src[2]) + i + 24);
|
||||
v_src10 = v_load((int16_t*)(src[3]) + i);
|
||||
v_src11 = v_load((int16_t*)(src[3]) + i + 8);
|
||||
v_src12 = v_load((int16_t*)(src[3]) + i + 16);
|
||||
v_src13 = v_load((int16_t*)(src[3]) + i + 24);
|
||||
v_zip(v_add_wrap(v_src00, v_128), v_add_wrap(v_src10, v_128), v_tmp0, v_tmp1);
|
||||
v_res0 += v_dotprod(v_tmp0, v_mul23);
|
||||
v_res1 += v_dotprod(v_tmp1, v_mul23);
|
||||
v_zip(v_add_wrap(v_src01, v_128), v_add_wrap(v_src11, v_128), v_tmp0, v_tmp1);
|
||||
v_res2 += v_dotprod(v_tmp0, v_mul23);
|
||||
v_res3 += v_dotprod(v_tmp1, v_mul23);
|
||||
v_zip(v_add_wrap(v_src02, v_128), v_add_wrap(v_src12, v_128), v_tmp0, v_tmp1);
|
||||
v_res4 += v_dotprod(v_tmp0, v_mul23);
|
||||
v_res5 += v_dotprod(v_tmp1, v_mul23);
|
||||
v_zip(v_add_wrap(v_src03, v_128), v_add_wrap(v_src13, v_128), v_tmp0, v_tmp1);
|
||||
v_res6 += v_dotprod(v_tmp0, v_mul23);
|
||||
v_res7 += v_dotprod(v_tmp1, v_mul23);
|
||||
|
||||
v_int32x4 v_resj0, v_resj1;
|
||||
v_src00 = v_load((int16_t*)(src[4]) + i);
|
||||
v_src01 = v_load((int16_t*)(src[4]) + i + 8);
|
||||
v_src02 = v_load((int16_t*)(src[4]) + i + 16);
|
||||
v_src03 = v_load((int16_t*)(src[4]) + i + 24);
|
||||
v_mul_expand(v_add_wrap(v_src00, v_128), v_mul4, v_resj0, v_resj1);
|
||||
v_res0 += v_resj0;
|
||||
v_res1 += v_resj1;
|
||||
v_mul_expand(v_add_wrap(v_src01, v_128), v_mul4, v_resj0, v_resj1);
|
||||
v_res2 += v_resj0;
|
||||
v_res3 += v_resj1;
|
||||
v_mul_expand(v_add_wrap(v_src02, v_128), v_mul4, v_resj0, v_resj1);
|
||||
v_res4 += v_resj0;
|
||||
v_res5 += v_resj1;
|
||||
v_mul_expand(v_add_wrap(v_src03, v_128), v_mul4, v_resj0, v_resj1);
|
||||
v_res6 += v_resj0;
|
||||
v_res7 += v_resj1;
|
||||
|
||||
v_res0 += v_128_4;
|
||||
v_res1 += v_128_4;
|
||||
v_res2 += v_128_4;
|
||||
v_res3 += v_128_4;
|
||||
v_res4 += v_128_4;
|
||||
v_res5 += v_128_4;
|
||||
v_res6 += v_128_4;
|
||||
v_res7 += v_128_4;
|
||||
|
||||
v_store(dst + i , v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res0, v_res1)),
|
||||
v_reinterpret_as_u16(v_rshr_pack<16>(v_res2, v_res3))));
|
||||
v_store(dst + i + 16, v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res4, v_res5)),
|
||||
v_reinterpret_as_u16(v_rshr_pack<16>(v_res6, v_res7))));
|
||||
v_int32 v_128_4 = vx_setall_s32(*((int32_t*)val));
|
||||
static const v_int16 v_128 = v_reinterpret_as_s16(vx_setall_u16((uint16_t)1 << 15));
|
||||
v_int16 v_mul01 = v_reinterpret_as_s16(vx_setall_u32(*((uint32_t*)m)));
|
||||
v_int16 v_mul23 = v_reinterpret_as_s16(vx_setall_u32(*((uint32_t*)(m + 2))));
|
||||
v_int16 v_mul4 = v_reinterpret_as_s16(vx_setall_u16(*((uint16_t*)(m + 4))));
|
||||
for (; i <= len - 4*VECSZ; i += 4*VECSZ)
|
||||
{
|
||||
v_int16 v_src00, v_src10, v_src01, v_src11, v_src02, v_src12, v_src03, v_src13;
|
||||
v_int16 v_tmp0, v_tmp1;
|
||||
|
||||
const int16_t* src0 = (const int16_t*)src[0] + i;
|
||||
const int16_t* src1 = (const int16_t*)src[1] + i;
|
||||
v_src00 = vx_load(src0);
|
||||
v_src01 = vx_load(src0 + VECSZ);
|
||||
v_src02 = vx_load(src0 + 2*VECSZ);
|
||||
v_src03 = vx_load(src0 + 3*VECSZ);
|
||||
v_src10 = vx_load(src1);
|
||||
v_src11 = vx_load(src1 + VECSZ);
|
||||
v_src12 = vx_load(src1 + 2*VECSZ);
|
||||
v_src13 = vx_load(src1 + 3*VECSZ);
|
||||
v_zip(v_add_wrap(v_src00, v_128), v_add_wrap(v_src10, v_128), v_tmp0, v_tmp1);
|
||||
v_int32 v_res0 = v_dotprod(v_tmp0, v_mul01);
|
||||
v_int32 v_res1 = v_dotprod(v_tmp1, v_mul01);
|
||||
v_zip(v_add_wrap(v_src01, v_128), v_add_wrap(v_src11, v_128), v_tmp0, v_tmp1);
|
||||
v_int32 v_res2 = v_dotprod(v_tmp0, v_mul01);
|
||||
v_int32 v_res3 = v_dotprod(v_tmp1, v_mul01);
|
||||
v_zip(v_add_wrap(v_src02, v_128), v_add_wrap(v_src12, v_128), v_tmp0, v_tmp1);
|
||||
v_int32 v_res4 = v_dotprod(v_tmp0, v_mul01);
|
||||
v_int32 v_res5 = v_dotprod(v_tmp1, v_mul01);
|
||||
v_zip(v_add_wrap(v_src03, v_128), v_add_wrap(v_src13, v_128), v_tmp0, v_tmp1);
|
||||
v_int32 v_res6 = v_dotprod(v_tmp0, v_mul01);
|
||||
v_int32 v_res7 = v_dotprod(v_tmp1, v_mul01);
|
||||
|
||||
const int16_t* src2 = (const int16_t*)src[2] + i;
|
||||
const int16_t* src3 = (const int16_t*)src[3] + i;
|
||||
v_src00 = vx_load(src2);
|
||||
v_src01 = vx_load(src2 + VECSZ);
|
||||
v_src02 = vx_load(src2 + 2*VECSZ);
|
||||
v_src03 = vx_load(src2 + 3*VECSZ);
|
||||
v_src10 = vx_load(src3);
|
||||
v_src11 = vx_load(src3 + VECSZ);
|
||||
v_src12 = vx_load(src3 + 2*VECSZ);
|
||||
v_src13 = vx_load(src3 + 3*VECSZ);
|
||||
v_zip(v_add_wrap(v_src00, v_128), v_add_wrap(v_src10, v_128), v_tmp0, v_tmp1);
|
||||
v_res0 += v_dotprod(v_tmp0, v_mul23);
|
||||
v_res1 += v_dotprod(v_tmp1, v_mul23);
|
||||
v_zip(v_add_wrap(v_src01, v_128), v_add_wrap(v_src11, v_128), v_tmp0, v_tmp1);
|
||||
v_res2 += v_dotprod(v_tmp0, v_mul23);
|
||||
v_res3 += v_dotprod(v_tmp1, v_mul23);
|
||||
v_zip(v_add_wrap(v_src02, v_128), v_add_wrap(v_src12, v_128), v_tmp0, v_tmp1);
|
||||
v_res4 += v_dotprod(v_tmp0, v_mul23);
|
||||
v_res5 += v_dotprod(v_tmp1, v_mul23);
|
||||
v_zip(v_add_wrap(v_src03, v_128), v_add_wrap(v_src13, v_128), v_tmp0, v_tmp1);
|
||||
v_res6 += v_dotprod(v_tmp0, v_mul23);
|
||||
v_res7 += v_dotprod(v_tmp1, v_mul23);
|
||||
|
||||
v_int32 v_resj0, v_resj1;
|
||||
const int16_t* src4 = (const int16_t*)src[4] + i;
|
||||
v_src00 = vx_load(src4);
|
||||
v_src01 = vx_load(src4 + VECSZ);
|
||||
v_src02 = vx_load(src4 + 2*VECSZ);
|
||||
v_src03 = vx_load(src4 + 3*VECSZ);
|
||||
v_mul_expand(v_add_wrap(v_src00, v_128), v_mul4, v_resj0, v_resj1);
|
||||
v_res0 += v_resj0;
|
||||
v_res1 += v_resj1;
|
||||
v_mul_expand(v_add_wrap(v_src01, v_128), v_mul4, v_resj0, v_resj1);
|
||||
v_res2 += v_resj0;
|
||||
v_res3 += v_resj1;
|
||||
v_mul_expand(v_add_wrap(v_src02, v_128), v_mul4, v_resj0, v_resj1);
|
||||
v_res4 += v_resj0;
|
||||
v_res5 += v_resj1;
|
||||
v_mul_expand(v_add_wrap(v_src03, v_128), v_mul4, v_resj0, v_resj1);
|
||||
v_res6 += v_resj0;
|
||||
v_res7 += v_resj1;
|
||||
|
||||
v_res0 += v_128_4;
|
||||
v_res1 += v_128_4;
|
||||
v_res2 += v_128_4;
|
||||
v_res3 += v_128_4;
|
||||
v_res4 += v_128_4;
|
||||
v_res5 += v_128_4;
|
||||
v_res6 += v_128_4;
|
||||
v_res7 += v_128_4;
|
||||
|
||||
v_store(dst + i , v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res0, v_res1)),
|
||||
v_reinterpret_as_u16(v_rshr_pack<16>(v_res2, v_res3))));
|
||||
v_store(dst + i + 2*VECSZ, v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res4, v_res5)),
|
||||
v_reinterpret_as_u16(v_rshr_pack<16>(v_res6, v_res7))));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for (; i < len; i++)
|
||||
dst[i] = m[0] * src[0][i] + m[1] * src[1][i] + m[2] * src[2][i] + m[3] * src[3][i] + m[4] * src[4][i];
|
||||
}
|
||||
@ -3284,28 +3227,31 @@ template <>
|
||||
void vlineSmooth5N14641<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src, const ufixedpoint16*, int, uint8_t* dst, int len)
|
||||
{
|
||||
int i = 0;
|
||||
v_uint32x4 v_6 = v_setall_u32(6);
|
||||
for (; i <= len - 16; i += 16)
|
||||
#if CV_SIMD
|
||||
v_uint32 v_6 = vx_setall_u32(6);
|
||||
const int VECSZ = v_uint16::nlanes;
|
||||
for (; i <= len - 2*VECSZ; i += 2*VECSZ)
|
||||
{
|
||||
v_uint32x4 v_src00, v_src10, v_src20, v_src30, v_src40;
|
||||
v_uint32x4 v_src01, v_src11, v_src21, v_src31, v_src41;
|
||||
v_uint32x4 v_src02, v_src12, v_src22, v_src32, v_src42;
|
||||
v_uint32x4 v_src03, v_src13, v_src23, v_src33, v_src43;
|
||||
v_expand(v_load((uint16_t*)(src[0]) + i), v_src00, v_src01);
|
||||
v_expand(v_load((uint16_t*)(src[0]) + i + 8), v_src02, v_src03);
|
||||
v_expand(v_load((uint16_t*)(src[1]) + i), v_src10, v_src11);
|
||||
v_expand(v_load((uint16_t*)(src[1]) + i + 8), v_src12, v_src13);
|
||||
v_expand(v_load((uint16_t*)(src[2]) + i), v_src20, v_src21);
|
||||
v_expand(v_load((uint16_t*)(src[2]) + i + 8), v_src22, v_src23);
|
||||
v_expand(v_load((uint16_t*)(src[3]) + i), v_src30, v_src31);
|
||||
v_expand(v_load((uint16_t*)(src[3]) + i + 8), v_src32, v_src33);
|
||||
v_expand(v_load((uint16_t*)(src[4]) + i), v_src40, v_src41);
|
||||
v_expand(v_load((uint16_t*)(src[4]) + i + 8), v_src42, v_src43);
|
||||
v_uint32 v_src00, v_src10, v_src20, v_src30, v_src40;
|
||||
v_uint32 v_src01, v_src11, v_src21, v_src31, v_src41;
|
||||
v_uint32 v_src02, v_src12, v_src22, v_src32, v_src42;
|
||||
v_uint32 v_src03, v_src13, v_src23, v_src33, v_src43;
|
||||
v_expand(vx_load((uint16_t*)(src[0]) + i), v_src00, v_src01);
|
||||
v_expand(vx_load((uint16_t*)(src[0]) + i + VECSZ), v_src02, v_src03);
|
||||
v_expand(vx_load((uint16_t*)(src[1]) + i), v_src10, v_src11);
|
||||
v_expand(vx_load((uint16_t*)(src[1]) + i + VECSZ), v_src12, v_src13);
|
||||
v_expand(vx_load((uint16_t*)(src[2]) + i), v_src20, v_src21);
|
||||
v_expand(vx_load((uint16_t*)(src[2]) + i + VECSZ), v_src22, v_src23);
|
||||
v_expand(vx_load((uint16_t*)(src[3]) + i), v_src30, v_src31);
|
||||
v_expand(vx_load((uint16_t*)(src[3]) + i + VECSZ), v_src32, v_src33);
|
||||
v_expand(vx_load((uint16_t*)(src[4]) + i), v_src40, v_src41);
|
||||
v_expand(vx_load((uint16_t*)(src[4]) + i + VECSZ), v_src42, v_src43);
|
||||
v_store(dst + i, v_pack(v_rshr_pack<12>(v_src20*v_6 + ((v_src10 + v_src30) << 2) + v_src00 + v_src40,
|
||||
v_src21*v_6 + ((v_src11 + v_src31) << 2) + v_src01 + v_src41),
|
||||
v_rshr_pack<12>(v_src22*v_6 + ((v_src12 + v_src32) << 2) + v_src02 + v_src42,
|
||||
v_src23*v_6 + ((v_src13 + v_src33) << 2) + v_src03 + v_src43)));
|
||||
}
|
||||
#endif
|
||||
for (; i < len; i++)
|
||||
dst[i] = ((uint32_t)(((uint16_t*)(src[2]))[i]) * 6 +
|
||||
(((uint32_t)(((uint16_t*)(src[1]))[i]) + (uint32_t)(((uint16_t*)(src[3]))[i])) << 2) +
|
||||
@ -3326,57 +3272,63 @@ template <>
|
||||
void vlineSmooth<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src, const ufixedpoint16* m, int n, uint8_t* dst, int len)
|
||||
{
|
||||
int i = 0;
|
||||
static const v_int16x8 v_128 = v_reinterpret_as_s16(v_setall_u16((uint16_t)1 << 15));
|
||||
v_int32x4 v_128_4 = v_setall_s32(128 << 16);
|
||||
if (len > 7)
|
||||
#if CV_SIMD
|
||||
static const v_int16 v_128 = v_reinterpret_as_s16(vx_setall_u16((uint16_t)1 << 15));
|
||||
v_int32 v_128_4 = vx_setall_s32(128 << 16);
|
||||
const int VECSZ = v_uint16::nlanes;
|
||||
if (len >= VECSZ)
|
||||
{
|
||||
ufixedpoint16 msum = m[0] + m[1];
|
||||
for (int j = 2; j < n; j++)
|
||||
msum = msum + m[j];
|
||||
ufixedpoint32 val[] = { msum * ufixedpoint16((uint8_t)128) };
|
||||
v_128_4 = v_setall_s32(*((int32_t*)val));
|
||||
v_128_4 = vx_setall_s32(*((int32_t*)val));
|
||||
}
|
||||
for (; i <= len - 32; i += 32)
|
||||
for (; i <= len - 4*VECSZ; i += 4*VECSZ)
|
||||
{
|
||||
v_int16x8 v_src00, v_src10, v_src01, v_src11, v_src02, v_src12, v_src03, v_src13;
|
||||
v_int16x8 v_tmp0, v_tmp1;
|
||||
v_int16 v_src00, v_src10, v_src01, v_src11, v_src02, v_src12, v_src03, v_src13;
|
||||
v_int16 v_tmp0, v_tmp1;
|
||||
|
||||
v_int16x8 v_mul = v_reinterpret_as_s16(v_setall_u32(*((uint32_t*)m)));
|
||||
v_int16 v_mul = v_reinterpret_as_s16(vx_setall_u32(*((uint32_t*)m)));
|
||||
|
||||
v_src00 = v_load((int16_t*)(src[0]) + i);
|
||||
v_src01 = v_load((int16_t*)(src[0]) + i + 8);
|
||||
v_src02 = v_load((int16_t*)(src[0]) + i + 16);
|
||||
v_src03 = v_load((int16_t*)(src[0]) + i + 24);
|
||||
v_src10 = v_load((int16_t*)(src[1]) + i);
|
||||
v_src11 = v_load((int16_t*)(src[1]) + i + 8);
|
||||
v_src12 = v_load((int16_t*)(src[1]) + i + 16);
|
||||
v_src13 = v_load((int16_t*)(src[1]) + i + 24);
|
||||
const int16_t* src0 = (const int16_t*)src[0] + i;
|
||||
const int16_t* src1 = (const int16_t*)src[1] + i;
|
||||
v_src00 = vx_load(src0);
|
||||
v_src01 = vx_load(src0 + VECSZ);
|
||||
v_src02 = vx_load(src0 + 2*VECSZ);
|
||||
v_src03 = vx_load(src0 + 3*VECSZ);
|
||||
v_src10 = vx_load(src1);
|
||||
v_src11 = vx_load(src1 + VECSZ);
|
||||
v_src12 = vx_load(src1 + 2*VECSZ);
|
||||
v_src13 = vx_load(src1 + 3*VECSZ);
|
||||
v_zip(v_add_wrap(v_src00, v_128), v_add_wrap(v_src10, v_128), v_tmp0, v_tmp1);
|
||||
v_int32x4 v_res0 = v_dotprod(v_tmp0, v_mul);
|
||||
v_int32x4 v_res1 = v_dotprod(v_tmp1, v_mul);
|
||||
v_int32 v_res0 = v_dotprod(v_tmp0, v_mul);
|
||||
v_int32 v_res1 = v_dotprod(v_tmp1, v_mul);
|
||||
v_zip(v_add_wrap(v_src01, v_128), v_add_wrap(v_src11, v_128), v_tmp0, v_tmp1);
|
||||
v_int32x4 v_res2 = v_dotprod(v_tmp0, v_mul);
|
||||
v_int32x4 v_res3 = v_dotprod(v_tmp1, v_mul);
|
||||
v_int32 v_res2 = v_dotprod(v_tmp0, v_mul);
|
||||
v_int32 v_res3 = v_dotprod(v_tmp1, v_mul);
|
||||
v_zip(v_add_wrap(v_src02, v_128), v_add_wrap(v_src12, v_128), v_tmp0, v_tmp1);
|
||||
v_int32x4 v_res4 = v_dotprod(v_tmp0, v_mul);
|
||||
v_int32x4 v_res5 = v_dotprod(v_tmp1, v_mul);
|
||||
v_int32 v_res4 = v_dotprod(v_tmp0, v_mul);
|
||||
v_int32 v_res5 = v_dotprod(v_tmp1, v_mul);
|
||||
v_zip(v_add_wrap(v_src03, v_128), v_add_wrap(v_src13, v_128), v_tmp0, v_tmp1);
|
||||
v_int32x4 v_res6 = v_dotprod(v_tmp0, v_mul);
|
||||
v_int32x4 v_res7 = v_dotprod(v_tmp1, v_mul);
|
||||
v_int32 v_res6 = v_dotprod(v_tmp0, v_mul);
|
||||
v_int32 v_res7 = v_dotprod(v_tmp1, v_mul);
|
||||
|
||||
int j = 2;
|
||||
for (; j < n - 1; j+=2)
|
||||
{
|
||||
v_mul = v_reinterpret_as_s16(v_setall_u32(*((uint32_t*)(m+j))));
|
||||
v_mul = v_reinterpret_as_s16(vx_setall_u32(*((uint32_t*)(m+j))));
|
||||
|
||||
v_src00 = v_load((int16_t*)(src[j]) + i);
|
||||
v_src01 = v_load((int16_t*)(src[j]) + i + 8);
|
||||
v_src02 = v_load((int16_t*)(src[j]) + i + 16);
|
||||
v_src03 = v_load((int16_t*)(src[j]) + i + 24);
|
||||
v_src10 = v_load((int16_t*)(src[j+1]) + i);
|
||||
v_src11 = v_load((int16_t*)(src[j+1]) + i + 8);
|
||||
v_src12 = v_load((int16_t*)(src[j+1]) + i + 16);
|
||||
v_src13 = v_load((int16_t*)(src[j+1]) + i + 24);
|
||||
const int16_t* srcj0 = (const int16_t*)src[j] + i;
|
||||
const int16_t* srcj1 = (const int16_t*)src[j + 1] + i;
|
||||
v_src00 = vx_load(srcj0);
|
||||
v_src01 = vx_load(srcj0 + VECSZ);
|
||||
v_src02 = vx_load(srcj0 + 2*VECSZ);
|
||||
v_src03 = vx_load(srcj0 + 3*VECSZ);
|
||||
v_src10 = vx_load(srcj1);
|
||||
v_src11 = vx_load(srcj1 + VECSZ);
|
||||
v_src12 = vx_load(srcj1 + 2*VECSZ);
|
||||
v_src13 = vx_load(srcj1 + 3*VECSZ);
|
||||
v_zip(v_add_wrap(v_src00, v_128), v_add_wrap(v_src10, v_128), v_tmp0, v_tmp1);
|
||||
v_res0 += v_dotprod(v_tmp0, v_mul);
|
||||
v_res1 += v_dotprod(v_tmp1, v_mul);
|
||||
@ -3392,12 +3344,13 @@ void vlineSmooth<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src, const
|
||||
}
|
||||
if(j < n)
|
||||
{
|
||||
v_int32x4 v_resj0, v_resj1;
|
||||
v_mul = v_reinterpret_as_s16(v_setall_u16(*((uint16_t*)(m + j))));
|
||||
v_src00 = v_load((int16_t*)(src[j]) + i);
|
||||
v_src01 = v_load((int16_t*)(src[j]) + i + 8);
|
||||
v_src02 = v_load((int16_t*)(src[j]) + i + 16);
|
||||
v_src03 = v_load((int16_t*)(src[j]) + i + 24);
|
||||
v_int32 v_resj0, v_resj1;
|
||||
v_mul = v_reinterpret_as_s16(vx_setall_u16(*((uint16_t*)(m + j))));
|
||||
const int16_t* srcj = (const int16_t*)src[j] + i;
|
||||
v_src00 = vx_load(srcj);
|
||||
v_src01 = vx_load(srcj + VECSZ);
|
||||
v_src02 = vx_load(srcj + 2*VECSZ);
|
||||
v_src03 = vx_load(srcj + 3*VECSZ);
|
||||
v_mul_expand(v_add_wrap(v_src00, v_128), v_mul, v_resj0, v_resj1);
|
||||
v_res0 += v_resj0;
|
||||
v_res1 += v_resj1;
|
||||
@ -3420,11 +3373,12 @@ void vlineSmooth<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src, const
|
||||
v_res6 += v_128_4;
|
||||
v_res7 += v_128_4;
|
||||
|
||||
v_store(dst + i , v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res0, v_res1)),
|
||||
v_reinterpret_as_u16(v_rshr_pack<16>(v_res2, v_res3))));
|
||||
v_store(dst + i + 16, v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res4, v_res5)),
|
||||
v_reinterpret_as_u16(v_rshr_pack<16>(v_res6, v_res7))));
|
||||
v_store(dst + i , v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res0, v_res1)),
|
||||
v_reinterpret_as_u16(v_rshr_pack<16>(v_res2, v_res3))));
|
||||
v_store(dst + i + 2*VECSZ, v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res4, v_res5)),
|
||||
v_reinterpret_as_u16(v_rshr_pack<16>(v_res6, v_res7))));
|
||||
}
|
||||
#endif
|
||||
for (; i < len; i++)
|
||||
{
|
||||
ufixedpoint32 val = m[0] * src[0][i];
|
||||
@ -3450,29 +3404,32 @@ void vlineSmoothONa_yzy_a(const FT* const * src, const FT* m, int n, ET* dst, in
|
||||
template <>
|
||||
void vlineSmoothONa_yzy_a<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src, const ufixedpoint16* m, int n, uint8_t* dst, int len)
|
||||
{
|
||||
int pre_shift = n / 2;
|
||||
int i = 0;
|
||||
static const v_int16x8 v_128 = v_reinterpret_as_s16(v_setall_u16((uint16_t)1 << 15));
|
||||
v_int32x4 v_128_4 = v_setall_s32(128 << 16);
|
||||
if (len > 7)
|
||||
#if CV_SIMD
|
||||
int pre_shift = n / 2;
|
||||
static const v_int16 v_128 = v_reinterpret_as_s16(vx_setall_u16((uint16_t)1 << 15));
|
||||
v_int32 v_128_4 = vx_setall_s32(128 << 16);
|
||||
const int VECSZ = v_uint16::nlanes;
|
||||
if (len >= VECSZ)
|
||||
{
|
||||
ufixedpoint16 msum = m[0] + m[pre_shift] + m[n - 1];
|
||||
for (int j = 1; j < pre_shift; j++)
|
||||
msum = msum + m[j] + m[n - 1 - j];
|
||||
ufixedpoint32 val[] = { msum * ufixedpoint16((uint8_t)128) };
|
||||
v_128_4 = v_setall_s32(*((int32_t*)val));
|
||||
v_128_4 = vx_setall_s32(*((int32_t*)val));
|
||||
}
|
||||
for (; i <= len - 32; i += 32)
|
||||
for (; i <= len - 4*VECSZ; i += 4*VECSZ)
|
||||
{
|
||||
v_int16x8 v_src00, v_src10, v_src20, v_src30, v_src01, v_src11, v_src21, v_src31;
|
||||
v_int32x4 v_res0, v_res1, v_res2, v_res3, v_res4, v_res5, v_res6, v_res7;
|
||||
v_int16x8 v_tmp0, v_tmp1, v_tmp2, v_tmp3, v_tmp4, v_tmp5, v_tmp6, v_tmp7;
|
||||
v_int16 v_src00, v_src10, v_src20, v_src30, v_src01, v_src11, v_src21, v_src31;
|
||||
v_int32 v_res0, v_res1, v_res2, v_res3, v_res4, v_res5, v_res6, v_res7;
|
||||
v_int16 v_tmp0, v_tmp1, v_tmp2, v_tmp3, v_tmp4, v_tmp5, v_tmp6, v_tmp7;
|
||||
|
||||
v_int16x8 v_mul = v_reinterpret_as_s16(v_setall_u16(*((uint16_t*)(m + pre_shift))));
|
||||
v_src00 = v_load((int16_t*)(src[pre_shift]) + i);
|
||||
v_src10 = v_load((int16_t*)(src[pre_shift]) + i + 8);
|
||||
v_src20 = v_load((int16_t*)(src[pre_shift]) + i + 16);
|
||||
v_src30 = v_load((int16_t*)(src[pre_shift]) + i + 24);
|
||||
v_int16 v_mul = v_reinterpret_as_s16(vx_setall_u16(*((uint16_t*)(m + pre_shift))));
|
||||
const int16_t* srcp = (const int16_t*)src[pre_shift] + i;
|
||||
v_src00 = vx_load(srcp);
|
||||
v_src10 = vx_load(srcp + VECSZ);
|
||||
v_src20 = vx_load(srcp + 2*VECSZ);
|
||||
v_src30 = vx_load(srcp + 3*VECSZ);
|
||||
v_mul_expand(v_add_wrap(v_src00, v_128), v_mul, v_res0, v_res1);
|
||||
v_mul_expand(v_add_wrap(v_src10, v_128), v_mul, v_res2, v_res3);
|
||||
v_mul_expand(v_add_wrap(v_src20, v_128), v_mul, v_res4, v_res5);
|
||||
@ -3481,16 +3438,18 @@ void vlineSmoothONa_yzy_a<uint8_t, ufixedpoint16>(const ufixedpoint16* const * s
|
||||
int j = 0;
|
||||
for (; j < pre_shift; j++)
|
||||
{
|
||||
v_mul = v_reinterpret_as_s16(v_setall_u16(*((uint16_t*)(m + j))));
|
||||
v_mul = v_reinterpret_as_s16(vx_setall_u16(*((uint16_t*)(m + j))));
|
||||
|
||||
v_src00 = v_load((int16_t*)(src[j]) + i);
|
||||
v_src10 = v_load((int16_t*)(src[j]) + i + 8);
|
||||
v_src20 = v_load((int16_t*)(src[j]) + i + 16);
|
||||
v_src30 = v_load((int16_t*)(src[j]) + i + 24);
|
||||
v_src01 = v_load((int16_t*)(src[n - 1 - j]) + i);
|
||||
v_src11 = v_load((int16_t*)(src[n - 1 - j]) + i + 8);
|
||||
v_src21 = v_load((int16_t*)(src[n - 1 - j]) + i + 16);
|
||||
v_src31 = v_load((int16_t*)(src[n - 1 - j]) + i + 24);
|
||||
const int16_t* srcj0 = (const int16_t*)src[j] + i;
|
||||
const int16_t* srcj1 = (const int16_t*)src[n - 1 - j] + i;
|
||||
v_src00 = vx_load(srcj0);
|
||||
v_src10 = vx_load(srcj0 + VECSZ);
|
||||
v_src20 = vx_load(srcj0 + 2*VECSZ);
|
||||
v_src30 = vx_load(srcj0 + 3*VECSZ);
|
||||
v_src01 = vx_load(srcj1);
|
||||
v_src11 = vx_load(srcj1 + VECSZ);
|
||||
v_src21 = vx_load(srcj1 + 2*VECSZ);
|
||||
v_src31 = vx_load(srcj1 + 3*VECSZ);
|
||||
v_zip(v_add_wrap(v_src00, v_128), v_add_wrap(v_src01, v_128), v_tmp0, v_tmp1);
|
||||
v_res0 += v_dotprod(v_tmp0, v_mul);
|
||||
v_res1 += v_dotprod(v_tmp1, v_mul);
|
||||
@ -3514,11 +3473,12 @@ void vlineSmoothONa_yzy_a<uint8_t, ufixedpoint16>(const ufixedpoint16* const * s
|
||||
v_res6 += v_128_4;
|
||||
v_res7 += v_128_4;
|
||||
|
||||
v_store(dst + i , v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res0, v_res1)),
|
||||
v_reinterpret_as_u16(v_rshr_pack<16>(v_res2, v_res3))));
|
||||
v_store(dst + i + 16, v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res4, v_res5)),
|
||||
v_reinterpret_as_u16(v_rshr_pack<16>(v_res6, v_res7))));
|
||||
v_store(dst + i , v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res0, v_res1)),
|
||||
v_reinterpret_as_u16(v_rshr_pack<16>(v_res2, v_res3))));
|
||||
v_store(dst + i + 2*VECSZ, v_pack(v_reinterpret_as_u16(v_rshr_pack<16>(v_res4, v_res5)),
|
||||
v_reinterpret_as_u16(v_rshr_pack<16>(v_res6, v_res7))));
|
||||
}
|
||||
#endif
|
||||
for (; i < len; i++)
|
||||
{
|
||||
ufixedpoint32 val = m[0] * src[0][i];
|
||||
@ -3816,8 +3776,8 @@ static void createGaussianKernels( T & kx, T & ky, int type, Size &ksize,
|
||||
if( ksize.height <= 0 && sigma2 > 0 )
|
||||
ksize.height = cvRound(sigma2*(depth == CV_8U ? 3 : 4)*2 + 1)|1;
|
||||
|
||||
CV_Assert( ksize.width > 0 && ksize.width % 2 == 1 &&
|
||||
ksize.height > 0 && ksize.height % 2 == 1 );
|
||||
CV_Assert( ksize.width > 0 && ksize.width % 2 == 1 &&
|
||||
ksize.height > 0 && ksize.height % 2 == 1 );
|
||||
|
||||
sigma1 = std::max( sigma1, 0. );
|
||||
sigma2 = std::max( sigma2, 0. );
|
||||
@ -4146,20 +4106,6 @@ void cv::GaussianBlur( InputArray _src, OutputArray _dst, Size ksize,
|
||||
|
||||
int sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||
|
||||
if(sdepth == CV_8U && ((borderType & BORDER_ISOLATED) || !_src.getMat().isSubmatrix()))
|
||||
{
|
||||
std::vector<ufixedpoint16> fkx, fky;
|
||||
createGaussianKernels(fkx, fky, type, ksize, sigma1, sigma2);
|
||||
Mat src = _src.getMat();
|
||||
Mat dst = _dst.getMat();
|
||||
if (src.data == dst.data)
|
||||
src = src.clone();
|
||||
fixedSmoothInvoker<uint8_t, ufixedpoint16> invoker(src.ptr<uint8_t>(), src.step1(), dst.ptr<uint8_t>(), dst.step1(), dst.cols, dst.rows, dst.channels(), &fkx[0], (int)fkx.size(), &fky[0], (int)fky.size(), borderType & ~BORDER_ISOLATED);
|
||||
parallel_for_(Range(0, dst.rows), invoker, std::max(1, std::min(getNumThreads(), getNumberOfCPUs())));
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
Mat kx, ky;
|
||||
createGaussianKernels(kx, ky, type, ksize, sigma1, sigma2);
|
||||
|
||||
@ -4185,6 +4131,17 @@ void cv::GaussianBlur( InputArray _src, OutputArray _dst, Size ksize,
|
||||
|
||||
CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType));
|
||||
|
||||
if(sdepth == CV_8U && ((borderType & BORDER_ISOLATED) || !_src.getMat().isSubmatrix()))
|
||||
{
|
||||
std::vector<ufixedpoint16> fkx, fky;
|
||||
createGaussianKernels(fkx, fky, type, ksize, sigma1, sigma2);
|
||||
if (src.data == dst.data)
|
||||
src = src.clone();
|
||||
fixedSmoothInvoker<uint8_t, ufixedpoint16> invoker(src.ptr<uint8_t>(), src.step1(), dst.ptr<uint8_t>(), dst.step1(), dst.cols, dst.rows, dst.channels(), &fkx[0], (int)fkx.size(), &fky[0], (int)fky.size(), borderType & ~BORDER_ISOLATED);
|
||||
parallel_for_(Range(0, dst.rows), invoker, std::max(1, std::min(getNumThreads(), getNumberOfCPUs())));
|
||||
return;
|
||||
}
|
||||
|
||||
sepFilter2D(src, dst, sdepth, kx, ky, Point(-1, -1), 0, borderType);
|
||||
}
|
||||
|
||||
|
@ -758,6 +758,7 @@ void Subdiv2D::getTriangleList(std::vector<Vec6f>& triangleList) const
|
||||
triangleList.clear();
|
||||
int i, total = (int)(qedges.size()*4);
|
||||
std::vector<bool> edgemask(total, false);
|
||||
Rect2f rect(topLeft.x, topLeft.y, bottomRight.x, bottomRight.y);
|
||||
|
||||
for( i = 4; i < total; i += 2 )
|
||||
{
|
||||
@ -773,7 +774,8 @@ void Subdiv2D::getTriangleList(std::vector<Vec6f>& triangleList) const
|
||||
edge = getEdge(edge, NEXT_AROUND_LEFT);
|
||||
edgeOrg(edge, &c);
|
||||
edgemask[edge] = true;
|
||||
triangleList.push_back(Vec6f(a.x, a.y, b.x, b.y, c.x, c.y));
|
||||
if( rect.contains(a) && rect.contains(b) && rect.contains(c) )
|
||||
triangleList.push_back(Vec6f(a.x, a.y, b.x, b.y, c.x, c.y));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -283,4 +283,23 @@ void CV_DisTransTest::prepare_to_validation( int /*test_case_idx*/ )
|
||||
|
||||
TEST(Imgproc_DistanceTransform, accuracy) { CV_DisTransTest test; test.safe_run(); }
|
||||
|
||||
BIGDATA_TEST(Imgproc_DistanceTransform, large_image_12218)
|
||||
{
|
||||
const int lls_maxcnt = 79992000; // labels's maximum count
|
||||
const int lls_mincnt = 1; // labels's minimum count
|
||||
int i, j, nz;
|
||||
Mat src(8000, 20000, CV_8UC1), dst, labels;
|
||||
for( i = 0; i < src.rows; i++ )
|
||||
for( j = 0; j < src.cols; j++ )
|
||||
src.at<uchar>(i, j) = (j > (src.cols / 2)) ? 0 : 255;
|
||||
|
||||
distanceTransform(src, dst, labels, cv::DIST_L2, cv::DIST_MASK_3, DIST_LABEL_PIXEL);
|
||||
|
||||
double scale = (double)lls_mincnt / (double)lls_maxcnt;
|
||||
labels.convertTo(labels, CV_32SC1, scale);
|
||||
Size size = labels.size();
|
||||
nz = cv::countNonZero(labels);
|
||||
EXPECT_EQ(nz, (size.height*size.width / 2));
|
||||
}
|
||||
|
||||
}} // namespace
|
||||
|
53
modules/imgproc/test/test_subdivision2d.cpp
Normal file
53
modules/imgproc/test/test_subdivision2d.cpp
Normal file
@ -0,0 +1,53 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//M*/
|
||||
#include "test_precomp.hpp"
|
||||
|
||||
namespace opencv_test { namespace {
|
||||
TEST(Imgproc_Subdiv2D_getTriangleList, regression_5788)
|
||||
{
|
||||
const float points[65][2] = {
|
||||
{ 390, 802}, { 397, 883}, { 414, 963 }, { 439, 1042 }, { 472, 1113},
|
||||
{ 521, 1181}, { 591, 1238}, { 678, 1284 }, { 771, 1292 }, { 853, 1281},
|
||||
{ 921, 1243}, { 982, 1191}, {1030, 1121 }, {1059, 1038 }, {1072, 945},
|
||||
{1081, 849}, {1082, 749}, { 459, 734 }, { 502, 704 }, { 554, 696},
|
||||
{ 609, 698}, { 660, 707}, { 818, 688 }, { 874, 661 }, { 929, 646},
|
||||
{ 982, 653}, {1026, 682}, { 740, 771 }, { 748, 834 }, { 756, 897},
|
||||
{ 762, 960}, { 700, 998}, { 733, 1006 }, { 766, 1011 }, { 797, 999},
|
||||
{ 825, 987}, { 528, 796}, { 566, 766 }, { 617, 763 }, { 659, 794},
|
||||
{ 619, 808}, { 569, 812}, { 834, 777 }, { 870, 735 }, { 918, 729},
|
||||
{ 958, 750}, { 929, 773}, { 882, 780 }, { 652, 1102 }, { 701, 1079},
|
||||
{ 743, 1063}, { 774, 1068}, { 807, 1057 }, { 852, 1065 }, { 896, 1077},
|
||||
{ 860, 1117}, { 820, 1135}, { 783, 1141 }, { 751, 1140 }, { 706, 1130},
|
||||
{ 675, 1102}, { 743, 1094}, { 774, 1094 }, { 809, 1088 }, { 878, 1082}
|
||||
};
|
||||
std::vector<cv::Point2f> pts;
|
||||
cv::Rect rect(0, 0, 1500, 2000);
|
||||
cv::Subdiv2D subdiv(rect);
|
||||
for( int i = 0; i < 65; i++ )
|
||||
{
|
||||
cv::Point2f pt(points[i][0], points[i][1]);
|
||||
pts.push_back(pt);
|
||||
}
|
||||
|
||||
subdiv.insert(pts);
|
||||
|
||||
std::vector<cv::Vec6f> triangles;
|
||||
subdiv.getTriangleList(triangles);
|
||||
|
||||
int trig_cnt = 0;
|
||||
for( std::vector<cv::Vec6f>::const_iterator it = triangles.begin(); it != triangles.end(); it++, trig_cnt++ )
|
||||
{
|
||||
EXPECT_TRUE( (0 <= triangles.at(trig_cnt).val[0] && triangles.at(trig_cnt).val[0] < 1500) &&
|
||||
(0 <= triangles.at(trig_cnt).val[1] && triangles.at(trig_cnt).val[1] < 2000) &&
|
||||
(0 <= triangles.at(trig_cnt).val[2] && triangles.at(trig_cnt).val[2] < 1500) &&
|
||||
(0 <= triangles.at(trig_cnt).val[3] && triangles.at(trig_cnt).val[3] < 2000) &&
|
||||
(0 <= triangles.at(trig_cnt).val[4] && triangles.at(trig_cnt).val[4] < 1500) &&
|
||||
(0 <= triangles.at(trig_cnt).val[5] && triangles.at(trig_cnt).val[5] < 2000) );
|
||||
}
|
||||
EXPECT_EQ(trig_cnt, 105);
|
||||
}
|
||||
|
||||
}};
|
@ -29,7 +29,7 @@
|
||||
|
||||
#define ARRAYLIST(ENV) static_cast<jclass>(ENV->NewGlobalRef(ENV->FindClass("java/util/ArrayList")))
|
||||
#define LIST_ADD(ENV, LIST) ENV->GetMethodID(LIST, "add", "(Ljava/lang/Object;)Z")
|
||||
#define LIST_GET(ENV, LIST) ENV->GetMethodID(LIST, "get", "((I)Ljava/lang/Object;")
|
||||
#define LIST_GET(ENV, LIST) ENV->GetMethodID(LIST, "get", "(I)Ljava/lang/Object;")
|
||||
#define LIST_SIZE(ENV, LIST) ENV->GetMethodID(LIST, "size", "()I")
|
||||
#define LIST_CLEAR(ENV, LIST) ENV->GetMethodID(LIST, "clear", "()V")
|
||||
|
||||
|
@ -56,7 +56,7 @@ add_custom_command(
|
||||
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/src/embindgen.py
|
||||
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/src/templates.py
|
||||
DEPENDS ${scripts_hdr_parser}
|
||||
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/headers.txt
|
||||
#(not needed - generated by CMake) DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/headers.txt
|
||||
DEPENDS ${opencv_hdrs}
|
||||
DEPENDS ${JS_HELPER})
|
||||
|
||||
|
@ -68,15 +68,10 @@
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//M*/
|
||||
|
||||
#include "opencv2/core.hpp"
|
||||
#include "opencv2/imgproc.hpp"
|
||||
#include "opencv2/video/tracking.hpp"
|
||||
#include "opencv2/video/background_segm.hpp"
|
||||
#include "opencv2/objdetect.hpp"
|
||||
#include "opencv2/dnn.hpp"
|
||||
|
||||
#include <emscripten/bind.h>
|
||||
|
||||
@INCLUDES@
|
||||
|
||||
using namespace emscripten;
|
||||
using namespace cv;
|
||||
using namespace dnn;
|
||||
|
@ -733,12 +733,14 @@ class JSWrapperGenerator(object):
|
||||
|
||||
def gen(self, dst_file, src_files, core_bindings):
|
||||
# step 1: scan the headers and extract classes, enums and functions
|
||||
headers = []
|
||||
for hdr in src_files:
|
||||
decls = self.parser.parse(hdr)
|
||||
# print(hdr);
|
||||
# self.print_decls(decls);
|
||||
if len(decls) == 0:
|
||||
continue
|
||||
headers.append(hdr[hdr.rindex('opencv2/'):])
|
||||
for decl in decls:
|
||||
name = decl[0]
|
||||
type = name[:name.find(" ")]
|
||||
@ -890,6 +892,9 @@ class JSWrapperGenerator(object):
|
||||
with open(core_bindings) as f:
|
||||
ret = f.read()
|
||||
|
||||
header_includes = '\n'.join(['#include "{}"'.format(hdr) for hdr in headers])
|
||||
ret = ret.replace('@INCLUDES@', header_includes)
|
||||
|
||||
defis = '\n'.join(self.wrapper_funcs)
|
||||
ret += wrapper_codes_template.substitute(ns=wrapper_namespace, defs=defis)
|
||||
ret += emscripten_binding_template.substitute(binding_name='testBinding', bindings=''.join(self.bindings))
|
||||
|
@ -140,13 +140,12 @@ public:
|
||||
String getModelName() const CV_OVERRIDE { return NAME_BRUTE_FORCE; }
|
||||
int getType() const CV_OVERRIDE { return ml::KNearest::BRUTE_FORCE; }
|
||||
|
||||
void findNearestCore( const Mat& _samples, int k0, const Range& range,
|
||||
void findNearestCore( const Mat& _samples, int k, const Range& range,
|
||||
Mat* results, Mat* neighbor_responses,
|
||||
Mat* dists, float* presult ) const
|
||||
{
|
||||
int testidx, baseidx, i, j, d = samples.cols, nsamples = samples.rows;
|
||||
int testcount = range.end - range.start;
|
||||
int k = std::min(k0, nsamples);
|
||||
|
||||
AutoBuffer<float> buf(testcount*k*2);
|
||||
float* dbuf = buf.data();
|
||||
@ -215,7 +214,7 @@ public:
|
||||
float* nr = neighbor_responses->ptr<float>(testidx + range.start);
|
||||
for( j = 0; j < k; j++ )
|
||||
nr[j] = rbuf[testidx*k + j];
|
||||
for( ; j < k0; j++ )
|
||||
for( ; j < k; j++ )
|
||||
nr[j] = 0.f;
|
||||
}
|
||||
|
||||
@ -224,7 +223,7 @@ public:
|
||||
float* dptr = dists->ptr<float>(testidx + range.start);
|
||||
for( j = 0; j < k; j++ )
|
||||
dptr[j] = dbuf[testidx*k + j];
|
||||
for( ; j < k0; j++ )
|
||||
for( ; j < k; j++ )
|
||||
dptr[j] = 0.f;
|
||||
}
|
||||
|
||||
@ -307,6 +306,7 @@ public:
|
||||
{
|
||||
float result = 0.f;
|
||||
CV_Assert( 0 < k );
|
||||
k = std::min(k, samples.rows);
|
||||
|
||||
Mat test_samples = _samples.getMat();
|
||||
CV_Assert( test_samples.type() == CV_32F && test_samples.cols == samples.cols );
|
||||
@ -363,6 +363,7 @@ public:
|
||||
{
|
||||
float result = 0.f;
|
||||
CV_Assert( 0 < k );
|
||||
k = std::min(k, samples.rows);
|
||||
|
||||
Mat test_samples = _samples.getMat();
|
||||
CV_Assert( test_samples.type() == CV_32F && test_samples.cols == samples.cols );
|
||||
|
@ -702,4 +702,26 @@ TEST(ML_EM, accuracy) { CV_EMTest test; test.safe_run(); }
|
||||
TEST(ML_EM, save_load) { CV_EMTest_SaveLoad test; test.safe_run(); }
|
||||
TEST(ML_EM, classification) { CV_EMTest_Classification test; test.safe_run(); }
|
||||
|
||||
TEST(ML_KNearest, regression_12347)
|
||||
{
|
||||
Mat xTrainData = (Mat_<float>(5,2) << 1, 1.1, 1.1, 1, 2, 2, 2.1, 2, 2.1, 2.1);
|
||||
Mat yTrainLabels = (Mat_<float>(5,1) << 1, 1, 2, 2, 2);
|
||||
Ptr<KNearest> knn = KNearest::create();
|
||||
knn->train(xTrainData, ml::ROW_SAMPLE, yTrainLabels);
|
||||
|
||||
Mat xTestData = (Mat_<float>(2,2) << 1.1, 1.1, 2, 2.2);
|
||||
Mat zBestLabels, neighbours, dist;
|
||||
// check output shapes:
|
||||
int K = 16, Kexp = std::min(K, xTrainData.rows);
|
||||
knn->findNearest(xTestData, K, zBestLabels, neighbours, dist);
|
||||
EXPECT_EQ(xTestData.rows, zBestLabels.rows);
|
||||
EXPECT_EQ(neighbours.cols, Kexp);
|
||||
EXPECT_EQ(dist.cols, Kexp);
|
||||
// see if the result is still correct:
|
||||
K = 2;
|
||||
knn->findNearest(xTestData, K, zBestLabels, neighbours, dist);
|
||||
EXPECT_EQ(1, zBestLabels.at<float>(0,0));
|
||||
EXPECT_EQ(2, zBestLabels.at<float>(1,0));
|
||||
}
|
||||
|
||||
}} // namespace
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -40,6 +40,9 @@ if __name__ == "__main__":
|
||||
parser.add_argument("--valgrind_supp", metavar="FILE", action='append', help="Path to valgrind suppression file (example: --valgrind_supp opencv/platforms/scripts/valgrind.supp)")
|
||||
parser.add_argument("--valgrind_opt", metavar="OPT", action="append", default=[], help="Add command line option to valgrind (example: --valgrind_opt=--leak-check=full)")
|
||||
|
||||
# QEMU
|
||||
parser.add_argument("--qemu", default="", help="Specify qemu binary and base parameters")
|
||||
|
||||
# Android
|
||||
parser.add_argument("--android", action="store_true", default=False, help="Android: force all tests to run on device")
|
||||
parser.add_argument("--android_sdk", metavar="PATH", help="Android: path to SDK to use adb and aapt tools")
|
||||
|
@ -77,7 +77,7 @@ class TestSuite(object):
|
||||
return False
|
||||
return os.access(fullpath, os.X_OK)
|
||||
|
||||
def wrapInValgrind(self, cmd=[]):
|
||||
def wrapCommand(self, cmd, env):
|
||||
if self.options.valgrind:
|
||||
res = ['valgrind']
|
||||
supp = self.options.valgrind_supp or []
|
||||
@ -89,6 +89,14 @@ class TestSuite(object):
|
||||
res.extend(self.options.valgrind_opt)
|
||||
has_gtest_filter = next((True for x in cmd if x.startswith('--gtest_filter=')), False)
|
||||
return res + cmd + ([longTestFilter(LONG_TESTS_DEBUG_VALGRIND)] if not has_gtest_filter else [])
|
||||
elif self.options.qemu:
|
||||
import shlex
|
||||
res = shlex.split(self.options.qemu)
|
||||
for (name, value) in [entry for entry in os.environ.items() if entry[0].startswith('OPENCV') and not entry[0] in env]:
|
||||
res += ['-E', '"{}={}"'.format(name, value)]
|
||||
for (name, value) in env.items():
|
||||
res += ['-E', '"{}={}"'.format(name, value)]
|
||||
return res + ['--'] + cmd
|
||||
return cmd
|
||||
|
||||
def tryCommand(self, cmd, workingDir):
|
||||
@ -125,7 +133,6 @@ class TestSuite(object):
|
||||
else:
|
||||
if isColorEnabled(args):
|
||||
args.append("--gtest_color=yes")
|
||||
cmd = self.wrapInValgrind([exe] + args)
|
||||
env = {}
|
||||
if not self.options.valgrind and self.options.trace:
|
||||
env['OPENCV_TRACE'] = '1'
|
||||
@ -133,6 +140,7 @@ class TestSuite(object):
|
||||
env['OPENCV_TRACE_SYNC_OPENCL'] = '1'
|
||||
tempDir = TempEnvDir('OPENCV_TEMP_PATH', "__opencv_temp.")
|
||||
tempDir.init()
|
||||
cmd = self.wrapCommand([exe] + args, env)
|
||||
log.warning("Run: %s" % " ".join(cmd))
|
||||
ret = execute(cmd, cwd=workingDir, env=env)
|
||||
try:
|
||||
|
@ -721,6 +721,7 @@ void checkIppStatus()
|
||||
}
|
||||
}
|
||||
|
||||
static bool checkTestData = false;
|
||||
bool skipUnstableTests = false;
|
||||
bool runBigDataTests = false;
|
||||
int testThreads = 0;
|
||||
@ -733,6 +734,7 @@ void parseCustomOptions(int argc, char **argv)
|
||||
"{ test_threads |-1 |the number of worker threads, if parallel execution is enabled}"
|
||||
"{ skip_unstable |false |skip unstable tests }"
|
||||
"{ test_bigdata |false |run BigData tests (>=2Gb) }"
|
||||
"{ test_require_data |false |fail on missing non-required test data instead of skip}"
|
||||
"{ h help |false |print help info }";
|
||||
|
||||
cv::CommandLineParser parser(argc, argv, command_line_keys);
|
||||
@ -756,6 +758,7 @@ void parseCustomOptions(int argc, char **argv)
|
||||
|
||||
skipUnstableTests = parser.get<bool>("skip_unstable");
|
||||
runBigDataTests = parser.get<bool>("test_bigdata");
|
||||
checkTestData = parser.get<bool>("test_require_data");
|
||||
}
|
||||
|
||||
|
||||
@ -870,7 +873,7 @@ static std::string findData(const std::string& relative_path, bool required, boo
|
||||
#endif
|
||||
#endif
|
||||
const char* type = findDirectory ? "directory" : "data file";
|
||||
if (required)
|
||||
if (required || checkTestData)
|
||||
CV_Error(cv::Error::StsError, cv::format("OpenCV tests: Can't find required %s: %s", type, relative_path.c_str()));
|
||||
throw SkipTestException(cv::format("OpenCV tests: Can't find %s: %s", type, relative_path.c_str()));
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -616,7 +616,7 @@ class SourceReaderCB : public IMFSourceReaderCallback
|
||||
{
|
||||
public:
|
||||
SourceReaderCB() :
|
||||
m_nRefCount(1), m_hEvent(CreateEvent(NULL, FALSE, FALSE, NULL)), m_bEOS(FALSE), m_hrStatus(S_OK), m_dwStreamIndex(0)
|
||||
m_nRefCount(0), m_hEvent(CreateEvent(NULL, FALSE, FALSE, NULL)), m_bEOS(FALSE), m_hrStatus(S_OK), m_reader(NULL), m_dwStreamIndex(0)
|
||||
{
|
||||
}
|
||||
|
||||
@ -677,7 +677,7 @@ public:
|
||||
BOOL m_bEOS;
|
||||
HRESULT m_hrStatus;
|
||||
|
||||
_ComPtr<IMFSourceReader> m_reader;
|
||||
IMFSourceReader *m_reader;
|
||||
DWORD m_dwStreamIndex;
|
||||
_ComPtr<IMFSample> m_lastSample;
|
||||
};
|
||||
@ -1140,7 +1140,7 @@ bool CvCapture_MSMF::grabFrame()
|
||||
if (!reader->m_reader)
|
||||
{
|
||||
// Initiate capturing with async callback
|
||||
reader->m_reader = videoFileSource;
|
||||
reader->m_reader = videoFileSource.Get();
|
||||
reader->m_dwStreamIndex = dwStreamIndex;
|
||||
if (FAILED(hr = videoFileSource->ReadSample(dwStreamIndex, 0, NULL, NULL, NULL, NULL)))
|
||||
{
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user