mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00
use correct name for NVIDIA
* remove NVidia and Nvidia * replace Cuda with CUDA * keep the letters for API
This commit is contained in:
parent
fdd83e5027
commit
a40354d16f
@ -49,13 +49,13 @@
|
|||||||
/* C= */
|
/* C= */
|
||||||
#cmakedefine HAVE_CSTRIPES
|
#cmakedefine HAVE_CSTRIPES
|
||||||
|
|
||||||
/* NVidia Cuda Basic Linear Algebra Subprograms (BLAS) API*/
|
/* NVIDIA CUDA Basic Linear Algebra Subprograms (BLAS) API*/
|
||||||
#cmakedefine HAVE_CUBLAS
|
#cmakedefine HAVE_CUBLAS
|
||||||
|
|
||||||
/* NVidia Cuda Runtime API*/
|
/* NVIDIA CUDA Runtime API*/
|
||||||
#cmakedefine HAVE_CUDA
|
#cmakedefine HAVE_CUDA
|
||||||
|
|
||||||
/* NVidia Cuda Fast Fourier Transform (FFT) API*/
|
/* NVIDIA CUDA Fast Fourier Transform (FFT) API*/
|
||||||
#cmakedefine HAVE_CUFFT
|
#cmakedefine HAVE_CUFFT
|
||||||
|
|
||||||
/* IEEE1394 capturing support */
|
/* IEEE1394 capturing support */
|
||||||
@ -127,10 +127,10 @@
|
|||||||
/* Microsoft Media Foundation Capture library */
|
/* Microsoft Media Foundation Capture library */
|
||||||
#cmakedefine HAVE_MSMF
|
#cmakedefine HAVE_MSMF
|
||||||
|
|
||||||
/* NVidia Video Decoding API*/
|
/* NVIDIA Video Decoding API*/
|
||||||
#cmakedefine HAVE_NVCUVID
|
#cmakedefine HAVE_NVCUVID
|
||||||
|
|
||||||
/* NVidia Video Encoding API*/
|
/* NVIDIA Video Encoding API*/
|
||||||
#cmakedefine HAVE_NVCUVENC
|
#cmakedefine HAVE_NVCUVENC
|
||||||
|
|
||||||
/* OpenCL Support */
|
/* OpenCL Support */
|
||||||
|
@ -8,7 +8,7 @@ Goal
|
|||||||
In the @ref tutorial_video_input_psnr_ssim tutorial I already presented the PSNR and SSIM methods for checking
|
In the @ref tutorial_video_input_psnr_ssim tutorial I already presented the PSNR and SSIM methods for checking
|
||||||
the similarity between the two images. And as you could see, the execution process takes quite some
|
the similarity between the two images. And as you could see, the execution process takes quite some
|
||||||
time , especially in the case of the SSIM. However, if the performance numbers of an OpenCV
|
time , especially in the case of the SSIM. However, if the performance numbers of an OpenCV
|
||||||
implementation for the CPU do not satisfy you and you happen to have an NVidia CUDA GPU device in
|
implementation for the CPU do not satisfy you and you happen to have an NVIDIA CUDA GPU device in
|
||||||
your system, all is not lost. You may try to port or write your owm algorithm for the video card.
|
your system, all is not lost. You may try to port or write your owm algorithm for the video card.
|
||||||
|
|
||||||
This tutorial will give a good grasp on how to approach coding by using the GPU module of OpenCV. As
|
This tutorial will give a good grasp on how to approach coding by using the GPU module of OpenCV. As
|
||||||
@ -187,7 +187,7 @@ introduce asynchronous OpenCV GPU calls too with the help of the @ref cv::cuda::
|
|||||||
Result and conclusion
|
Result and conclusion
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
On an Intel P8700 laptop CPU paired with a low end NVidia GT220M, here are the performance numbers:
|
On an Intel P8700 laptop CPU paired with a low end NVIDIA GT220M, here are the performance numbers:
|
||||||
@code
|
@code
|
||||||
Time of PSNR CPU (averaged for 10 runs): 41.4122 milliseconds. With result of: 19.2506
|
Time of PSNR CPU (averaged for 10 runs): 41.4122 milliseconds. With result of: 19.2506
|
||||||
Time of PSNR GPU (averaged for 10 runs): 158.977 milliseconds. With result of: 19.2506
|
Time of PSNR GPU (averaged for 10 runs): 158.977 milliseconds. With result of: 19.2506
|
||||||
|
@ -50,7 +50,7 @@ syntax = "proto2";
|
|||||||
|
|
||||||
package opencv_caffe;
|
package opencv_caffe;
|
||||||
|
|
||||||
// NVidia's Caffe feature is used to store fp16 weights, https://github.com/NVIDIA/caffe:
|
// NVIDIA's Caffe feature is used to store fp16 weights, https://github.com/NVIDIA/caffe:
|
||||||
// Math and storage types
|
// Math and storage types
|
||||||
enum Type {
|
enum Type {
|
||||||
DOUBLE = 0;
|
DOUBLE = 0;
|
||||||
@ -72,10 +72,10 @@ message BlobProto {
|
|||||||
repeated double double_data = 8 [packed = true];
|
repeated double double_data = 8 [packed = true];
|
||||||
repeated double double_diff = 9 [packed = true];
|
repeated double double_diff = 9 [packed = true];
|
||||||
|
|
||||||
// NVidia's Caffe fields begin.
|
// NVIDIA's Caffe fields begin.
|
||||||
optional Type raw_data_type = 10;
|
optional Type raw_data_type = 10;
|
||||||
optional bytes raw_data = 12 [packed = false];
|
optional bytes raw_data = 12 [packed = false];
|
||||||
// NVidia's Caffe fields end.
|
// NVIDIA's Caffe fields end.
|
||||||
|
|
||||||
// 4D dimensions -- deprecated. Use "shape" instead.
|
// 4D dimensions -- deprecated. Use "shape" instead.
|
||||||
optional int32 num = 1 [default = 0];
|
optional int32 num = 1 [default = 0];
|
||||||
|
@ -547,7 +547,7 @@ static bool ocl_Laplacian5(InputArray _src, OutputArray _dst,
|
|||||||
size_t src_step = _src.step(), src_offset = _src.offset();
|
size_t src_step = _src.step(), src_offset = _src.offset();
|
||||||
const size_t tileSizeYmax = wgs / tileSizeX;
|
const size_t tileSizeYmax = wgs / tileSizeX;
|
||||||
|
|
||||||
// workaround for Nvidia: 3 channel vector type takes 4*elem_size in local memory
|
// workaround for NVIDIA: 3 channel vector type takes 4*elem_size in local memory
|
||||||
int loc_mem_cn = dev.vendorID() == ocl::Device::VENDOR_NVIDIA && cn == 3 ? 4 : cn;
|
int loc_mem_cn = dev.vendorID() == ocl::Device::VENDOR_NVIDIA && cn == 3 ? 4 : cn;
|
||||||
|
|
||||||
if (((src_offset % src_step) % esz == 0) &&
|
if (((src_offset % src_step) % esz == 0) &&
|
||||||
|
Loading…
Reference in New Issue
Block a user