mirror of
https://github.com/opencv/opencv.git
synced 2024-11-27 20:50:25 +08:00
updated gpu initialization functions, added compile-time error on CC 1.0
This commit is contained in:
parent
6187b97199
commit
574b3f94a1
@ -708,47 +708,36 @@ if(WITH_CUDA)
|
||||
message(STATUS "CUDA detected: " ${CUDA_VERSION})
|
||||
|
||||
set(CUDA_ARCH_GPU "1.3 2.0" CACHE STRING "Specify 'real' GPU architectures to build binaries for")
|
||||
set(CUDA_ARCH_PTX "1.1 1.3" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for")
|
||||
set(CUDA_ARCH_PTX "1.1 1.3" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for")
|
||||
|
||||
# Architectures to be searched for in user's input
|
||||
set (CUDA_ARCH_ALL 1.0 1.1 1.2 1.3 2.0 2.1)
|
||||
# These variables are used in config templates
|
||||
string(REGEX REPLACE "\\." "" ARCH_GPU_NO_POINTS "${CUDA_ARCH_GPU}")
|
||||
string(REGEX REPLACE "\\." "" ARCH_PTX_NO_POINTS "${CUDA_ARCH_PTX}")
|
||||
|
||||
# Parse user's input
|
||||
foreach(ARCH IN LISTS CUDA_ARCH_ALL)
|
||||
string(REGEX MATCH ${ARCH} ARCH_GPU_MATCH "${CUDA_ARCH_GPU}")
|
||||
string(REGEX MATCH ${ARCH} ARCH_PTX_MATCH "${CUDA_ARCH_PTX}")
|
||||
string(REGEX REPLACE "\\." "" ARCH_GPU_AS_NUM "${ARCH_GPU_MATCH}")
|
||||
string(REGEX REPLACE "\\." "" ARCH_PTX_AS_NUM "${ARCH_PTX_MATCH}")
|
||||
|
||||
# Define variables indicating the architectures specified by user
|
||||
if(NOT ${ARCH_GPU_AS_NUM} STREQUAL "")
|
||||
set(OPENCV_ARCH_GPU_${ARCH_GPU_AS_NUM} 1)
|
||||
endif()
|
||||
if(NOT ${ARCH_PTX_AS_NUM} STREQUAL "")
|
||||
set(OPENCV_ARCH_PTX_${ARCH_PTX_AS_NUM} 1)
|
||||
endif()
|
||||
endforeach()
|
||||
# Ckeck if user specified 1.0 compute capability
|
||||
string(REGEX MATCH "1.0" HAS_ARCH_10 "${CUDA_ARCH_GPU} ${CUDA_ARCH_PTX}")
|
||||
if(NOT ${HAS_ARCH_10} STREQUAL "")
|
||||
set(OPENCV_ARCH_GPU_OR_PTX_10 1)
|
||||
endif()
|
||||
|
||||
set(NVCC_FLAGS_EXTRA "")
|
||||
|
||||
# Tell nvcc to add binaries for the specified GPUs
|
||||
string(REGEX REPLACE "\\." "" CUDA_ARCH_GPU "${CUDA_ARCH_GPU}")
|
||||
string(REGEX MATCHALL "[0-9]+" CUDA_ARCH_GPU_LIST "${CUDA_ARCH_GPU}")
|
||||
foreach(ARCH_GPU IN LISTS CUDA_ARCH_GPU_LIST)
|
||||
set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH_GPU},code=sm_${ARCH_GPU})
|
||||
string(REGEX MATCHALL "[0-9]+" ARCH_LIST "${ARCH_GPU_NO_POINTS}")
|
||||
foreach(ARCH IN LISTS ARCH_LIST)
|
||||
set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=sm_${ARCH})
|
||||
endforeach()
|
||||
|
||||
# Tell nvcc to add PTX intermediate code for the specified architectures
|
||||
string(REGEX REPLACE "\\." "" CUDA_ARCH_PTX "${CUDA_ARCH_PTX}")
|
||||
string(REGEX MATCHALL "[0-9]+" CUDA_ARCH_PTX_LIST "${CUDA_ARCH_PTX}")
|
||||
foreach(ARCH_PTX IN LISTS CUDA_ARCH_PTX_LIST)
|
||||
set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH_PTX},code=compute_${ARCH_PTX})
|
||||
string(REGEX MATCHALL "[0-9]+" ARCH_LIST "${ARCH_PTX_NO_POINTS}")
|
||||
foreach(ARCH IN LISTS ARCH_LIST)
|
||||
set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=compute_${ARCH})
|
||||
endforeach()
|
||||
|
||||
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${NVCC_FLAGS_EXTRA})
|
||||
message(STATUS "CUDA NVCC flags: ${CUDA_NVCC_FLAGS}")
|
||||
|
||||
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${NVCC_FLAGS_EXTRA})
|
||||
set(OpenCV_CUDA_CC "${NVCC_FLAGS_EXTRA}")
|
||||
|
||||
message(STATUS "CUDA NVCC flags: ${CUDA_NVCC_FLAGS}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
@ -163,21 +163,14 @@
|
||||
/* NVidia Cuda Runtime API*/
|
||||
#cmakedefine HAVE_CUDA
|
||||
|
||||
/* Compile for 'real' NVIDIA GPU architecture */
|
||||
#cmakedefine OPENCV_ARCH_GPU_10
|
||||
#cmakedefine OPENCV_ARCH_GPU_11
|
||||
#cmakedefine OPENCV_ARCH_GPU_12
|
||||
#cmakedefine OPENCV_ARCH_GPU_13
|
||||
#cmakedefine OPENCV_ARCH_GPU_20
|
||||
#cmakedefine OPENCV_ARCH_GPU_21
|
||||
/* Compile for 'real' NVIDIA GPU architectures */
|
||||
#define OPENCV_ARCH_GPU "${ARCH_GPU_NO_POINTS}"
|
||||
|
||||
/* Compile for 'virtual' NVIDIA PTX architecture */
|
||||
#cmakedefine OPENCV_ARCH_PTX_10
|
||||
#cmakedefine OPENCV_ARCH_PTX_11
|
||||
#cmakedefine OPENCV_ARCH_PTX_12
|
||||
#cmakedefine OPENCV_ARCH_PTX_13
|
||||
#cmakedefine OPENCV_ARCH_PTX_20
|
||||
#cmakedefine OPENCV_ARCH_PTX_21
|
||||
/* Compile for 'virtual' NVIDIA PTX architectures */
|
||||
#define OPENCV_ARCH_PTX "${ARCH_PTX_NO_POINTS}"
|
||||
|
||||
/* Create PTX or CUBIN for 1.0 compute capability */
|
||||
#cmakedefine OPENCV_ARCH_GPU_OR_PTX_10
|
||||
|
||||
/* VideoInput library */
|
||||
#cmakedefine HAVE_VIDEOINPUT
|
||||
|
@ -232,10 +232,10 @@ private:
|
||||
|
||||
\cvCppFunc{gpu::ConvolveBuf::ConvolveBuf}
|
||||
|
||||
\cvdefCpp{ConvolveBuf();}
|
||||
\cvdefCpp{ConvolveBuf::ConvolveBuf();}
|
||||
Constructs an empty buffer which will be properly resized after first call of the convolve function.
|
||||
|
||||
\cvdefCpp{ConvolveBuf(Size image\_size, Size templ\_size);}
|
||||
\cvdefCpp{ConvolveBuf::ConvolveBuf(Size image\_size, Size templ\_size);}
|
||||
Constructs a buffer for the convolve function with respectively arguments.
|
||||
|
||||
|
||||
|
@ -82,13 +82,13 @@ Creates HOG descriptor and detector.
|
||||
\cvCppFunc{gpu::HOGDescriptor::getDescriptorSize}
|
||||
Returns number of coefficients required for the classification.
|
||||
|
||||
\cvdefCpp{size\_t getDescriptorSize() const;}
|
||||
\cvdefCpp{size\_t HOGDescriptor::getDescriptorSize() const;}
|
||||
|
||||
|
||||
\cvCppFunc{gpu::HOGDescriptor::getBlockHistogramSize}
|
||||
Returns block histogram size.
|
||||
|
||||
\cvdefCpp{size\_t getBlockHistogramSize() const;}
|
||||
\cvdefCpp{size\_t HOGDescriptor::getBlockHistogramSize() const;}
|
||||
|
||||
|
||||
\cvCppFunc{gpu::HOGDescriptor::setSVMDetector}
|
||||
@ -100,25 +100,25 @@ Sets coefficients for the linear SVM classifier.
|
||||
\cvCppFunc{gpu::HOGDescriptor::getDefaultPeopleDetector}
|
||||
Returns coefficients of the classifier trained for people detection (for default window size).
|
||||
|
||||
\cvdefCpp{static vector<float> getDefaultPeopleDetector();}
|
||||
\cvdefCpp{static vector<float> HOGDescriptor::getDefaultPeopleDetector();}
|
||||
|
||||
|
||||
\cvCppFunc{gpu::HOGDescriptor::getPeopleDetector48x96}
|
||||
Returns coefficients of the classifier trained for people detection (for 48x96 windows).
|
||||
|
||||
\cvdefCpp{static vector<float> getPeopleDetector48x96();}
|
||||
\cvdefCpp{static vector<float> HOGDescriptor::getPeopleDetector48x96();}
|
||||
|
||||
|
||||
\cvCppFunc{gpu::HOGDescriptor::getPeopleDetector64x128}
|
||||
Returns coefficients of the classifier trained for people detection (for 64x128 windows).
|
||||
|
||||
\cvdefCpp{static vector<float> getPeopleDetector64x128();}
|
||||
\cvdefCpp{static vector<float> HOGDescriptor::getPeopleDetector64x128();}
|
||||
|
||||
|
||||
\cvCppFunc{gpu::HOGDescriptor::detect}
|
||||
Perfroms object detection without multiscale window.
|
||||
|
||||
\cvdefCpp{void detect(const GpuMat\& img, vector<Point>\& found\_locations,\par
|
||||
\cvdefCpp{void HOGDescriptor::detect(const GpuMat\& img, vector<Point>\& found\_locations,\par
|
||||
double hit\_threshold=0, Size win\_stride=Size(),\par
|
||||
Size padding=Size());}
|
||||
|
||||
@ -134,10 +134,10 @@ Perfroms object detection without multiscale window.
|
||||
\cvCppFunc{gpu::HOGDescriptor::detectMultiScale}
|
||||
Perfroms object detection with multiscale window.
|
||||
|
||||
\cvdefCpp{void detectMultiScale(const GpuMat\& img, vector<Rect>\& found\_locations,\par
|
||||
double hit\_threshold=0, Size win\_stride=Size(),\par
|
||||
Size padding=Size(), double scale0=1.05,\par
|
||||
int group\_threshold=2);}
|
||||
\cvdefCpp{void HOGDescriptor::detectMultiScale(const GpuMat\& img,\par
|
||||
vector<Rect>\& found\_locations, double hit\_threshold=0,\par
|
||||
Size win\_stride=Size(), Size padding=Size(),\par
|
||||
double scale0=1.05, int group\_threshold=2);}
|
||||
|
||||
\begin{description}
|
||||
\cvarg{img}{Source image. See \cvCppCross{gpu::HOGDescriptor::detect} for type limitations.}
|
||||
@ -154,9 +154,9 @@ See \cvCppCross{groupRectangles}.}
|
||||
\cvCppFunc{gpu::HOGDescriptor::getDescriptors}
|
||||
Returns block descriptors computed for the whole image. It's mainly used for classifier learning purposes.
|
||||
|
||||
\cvdefCpp{void getDescriptors(const GpuMat\& img, Size win\_stride,\par
|
||||
GpuMat\& descriptors,\par
|
||||
int descr\_format=DESCR\_FORMAT\_COL\_BY\_COL);}
|
||||
\cvdefCpp{void HOGDescriptor::getDescriptors(const GpuMat\& img,\par
|
||||
Size win\_stride, GpuMat\& descriptors,\par
|
||||
int descr\_format=DESCR\_FORMAT\_COL\_BY\_COL);}
|
||||
|
||||
\begin{description}
|
||||
\cvarg{img}{Source image. See \cvCppCross{gpu::HOGDescriptor::detect} for type limitations.}
|
||||
|
@ -41,6 +41,7 @@
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include <functional>
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
@ -58,12 +59,12 @@ CV_EXPORTS void cv::gpu::getGpuMemInfo(size_t& /*free*/, size_t& /*total*/) { t
|
||||
CV_EXPORTS bool cv::gpu::hasNativeDoubleSupport(int /*device*/) { throw_nogpu(); return false; }
|
||||
CV_EXPORTS bool cv::gpu::hasAtomicsSupport(int /*device*/) { throw_nogpu(); return false; }
|
||||
CV_EXPORTS bool cv::gpu::hasPtxVersion(int major, int minor) { throw_nogpu(); return false; }
|
||||
CV_EXPORTS bool cv::gpu::hasLessOrEqualPtxVersion(int major, int minor) { throw_nogpu(); return false; }
|
||||
CV_EXPORTS bool cv::gpu::hasGreaterOrEqualPtxVersion(int major, int minor) { throw_nogpu(); return false; }
|
||||
CV_EXPORTS bool cv::gpu::hasCubinVersion(int major, int minor) { throw_nogpu(); return false; }
|
||||
CV_EXPORTS bool cv::gpu::hasGreaterOrEqualCubinVersion(int major, int minor) { throw_nogpu(); return false; }
|
||||
CV_EXPORTS bool cv::gpu::hasVersion(int major, int minor) { throw_nogpu(); return false; }
|
||||
CV_EXPORTS bool cv::gpu::hasGreaterOrEqualVersion(int major, int minor) { throw_nogpu(); return false; }
|
||||
CV_EXPORTS bool cv::gpu::hasLessOrEqualPtxVersion(int major, int minor) { return false; }
|
||||
CV_EXPORTS bool cv::gpu::hasGreaterOrEqualPtxVersion(int major, int minor) { return false; }
|
||||
CV_EXPORTS bool cv::gpu::hasCubinVersion(int major, int minor) { return false; }
|
||||
CV_EXPORTS bool cv::gpu::hasGreaterOrEqualCubinVersion(int major, int minor) { return false; }
|
||||
CV_EXPORTS bool cv::gpu::hasVersion(int major, int minor) { return false; }
|
||||
CV_EXPORTS bool cv::gpu::hasGreaterOrEqualVersion(int major, int minor) { return false; }
|
||||
CV_EXPORTS bool cv::gpu::isCompatibleWith(int device) { throw_nogpu(); return false; }
|
||||
|
||||
|
||||
@ -142,118 +143,55 @@ CV_EXPORTS bool cv::gpu::hasAtomicsSupport(int device)
|
||||
namespace
|
||||
{
|
||||
template <typename Comparer>
|
||||
bool checkPtxVersion(int major, int minor, Comparer cmp)
|
||||
bool compare(const std::string& str, int x, Comparer cmp)
|
||||
{
|
||||
#ifdef OPENCV_ARCH_PTX_10
|
||||
if (cmp(1, 0, major, minor)) return true;
|
||||
#endif
|
||||
std::stringstream stream(str);
|
||||
|
||||
#ifdef OPENCV_ARCH_PTX_11
|
||||
if (cmp(1, 1, major, minor)) return true;
|
||||
#endif
|
||||
int val;
|
||||
stream >> val;
|
||||
|
||||
#ifdef OPENCV_ARCH_PTX_12
|
||||
if (cmp(1, 2, major, minor)) return true;
|
||||
#endif
|
||||
|
||||
#ifdef OPENCV_ARCH_PTX_13
|
||||
if (cmp(1, 3, major, minor)) return true;
|
||||
#endif
|
||||
|
||||
#ifdef OPENCV_ARCH_PTX_20
|
||||
if (cmp(2, 0, major, minor)) return true;
|
||||
#endif
|
||||
|
||||
#ifdef OPENCV_ARCH_PTX_21
|
||||
if (cmp(2, 1, major, minor)) return true;
|
||||
#endif
|
||||
while (!stream.eof() && !stream.fail())
|
||||
{
|
||||
if (cmp(val, x))
|
||||
return true;
|
||||
stream >> val;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename Comparer>
|
||||
bool checkCubinVersion(int major, int minor, Comparer cmp)
|
||||
{
|
||||
#ifdef OPENCV_ARCH_GPU_10
|
||||
if (cmp(1, 0, major, minor)) return true;
|
||||
#endif
|
||||
|
||||
#ifdef OPENCV_ARCH_GPU_11
|
||||
if (cmp(1, 1, major, minor)) return true;
|
||||
#endif
|
||||
|
||||
#ifdef OPENCV_ARCH_GPU_12
|
||||
if (cmp(1, 2, major, minor)) return true;
|
||||
#endif
|
||||
|
||||
#ifdef OPENCV_ARCH_GPU_13
|
||||
if (cmp(1, 3, major, minor)) return true;
|
||||
#endif
|
||||
|
||||
#ifdef OPENCV_ARCH_GPU_20
|
||||
if (cmp(2, 0, major, minor)) return true;
|
||||
#endif
|
||||
|
||||
#ifdef OPENCV_ARCH_GPU_21
|
||||
if (cmp(2, 1, major, minor)) return true;
|
||||
#endif
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
struct ComparerEqual
|
||||
{
|
||||
bool operator()(int lhs1, int lhs2, int rhs1, int rhs2) const
|
||||
{
|
||||
return lhs1 == rhs1 && lhs2 == rhs2;
|
||||
}
|
||||
};
|
||||
|
||||
struct ComparerLessOrEqual
|
||||
{
|
||||
bool operator()(int lhs1, int lhs2, int rhs1, int rhs2) const
|
||||
{
|
||||
return lhs1 < rhs1 || (lhs1 == rhs1 && lhs2 <= rhs2);
|
||||
}
|
||||
};
|
||||
|
||||
struct ComparerGreaterOrEqual
|
||||
{
|
||||
bool operator()(int lhs1, int lhs2, int rhs1, int rhs2) const
|
||||
{
|
||||
return lhs1 > rhs1 || (lhs1 == rhs1 && lhs2 >= rhs2);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
CV_EXPORTS bool cv::gpu::hasPtxVersion(int major, int minor)
|
||||
{
|
||||
return checkPtxVersion(major, minor, ComparerEqual());
|
||||
return ::compare(OPENCV_ARCH_PTX, major * 10 + minor, std::equal_to<int>());
|
||||
}
|
||||
|
||||
|
||||
CV_EXPORTS bool cv::gpu::hasLessOrEqualPtxVersion(int major, int minor)
|
||||
{
|
||||
return checkPtxVersion(major, minor, ComparerLessOrEqual());
|
||||
return ::compare(OPENCV_ARCH_PTX, major * 10 + minor,
|
||||
std::less_equal<int>());
|
||||
}
|
||||
|
||||
|
||||
CV_EXPORTS bool cv::gpu::hasGreaterOrEqualPtxVersion(int major, int minor)
|
||||
{
|
||||
return checkPtxVersion(major, minor, ComparerGreaterOrEqual());
|
||||
return ::compare(OPENCV_ARCH_PTX, major * 10 + minor,
|
||||
std::greater_equal<int>());
|
||||
}
|
||||
|
||||
|
||||
CV_EXPORTS bool cv::gpu::hasCubinVersion(int major, int minor)
|
||||
{
|
||||
return checkCubinVersion(major, minor, ComparerEqual());
|
||||
return ::compare(OPENCV_ARCH_GPU, major * 10 + minor, std::equal_to<int>());
|
||||
}
|
||||
|
||||
|
||||
CV_EXPORTS bool cv::gpu::hasGreaterOrEqualCubinVersion(int major, int minor)
|
||||
{
|
||||
return checkCubinVersion(major, minor, ComparerGreaterOrEqual());
|
||||
return ::compare(OPENCV_ARCH_GPU, major * 10 + minor,
|
||||
std::greater_equal<int>());
|
||||
}
|
||||
|
||||
|
||||
@ -284,7 +222,7 @@ CV_EXPORTS bool cv::gpu::isCompatibleWith(int device)
|
||||
return true;
|
||||
|
||||
// Check CUBIN compatibilty
|
||||
for (int i = 0; i <= minor; ++i)
|
||||
for (int i = minor; i >= 0; --i)
|
||||
if (hasCubinVersion(major, i))
|
||||
return true;
|
||||
|
||||
|
@ -85,6 +85,10 @@
|
||||
#error "Insufficient NPP version, please update it."
|
||||
#endif
|
||||
|
||||
#if defined(OPENCV_ARCH_GPU_OR_PTX_10)
|
||||
#error "OpenCV GPU module doesn't support NVIDIA compute capability 1.0"
|
||||
#endif
|
||||
|
||||
static inline void throw_nogpu() { CV_Error(CV_GpuNotSupported, "The called functionality is disabled for current build or platform"); }
|
||||
|
||||
#else /* defined(HAVE_CUDA) */
|
||||
|
Loading…
Reference in New Issue
Block a user