mirror of
https://github.com/opencv/opencv.git
synced 2024-11-28 13:10:12 +08:00
CUDA related func tables refactored to remove unneeded dependencies.
This commit is contained in:
parent
6da7c50fb5
commit
64c94cb22c
@ -239,23 +239,23 @@ static DeviceInfoFuncTable* deviceInfoFuncTable()
|
|||||||
|
|
||||||
//////////////////////////////// Initialization & Info ////////////////////////
|
//////////////////////////////// Initialization & Info ////////////////////////
|
||||||
|
|
||||||
int cv::gpu::getCudaEnabledDeviceCount() { return gpuFuncTable()->getCudaEnabledDeviceCount(); }
|
int cv::gpu::getCudaEnabledDeviceCount() { return deviceInfoFuncTable()->getCudaEnabledDeviceCount(); }
|
||||||
|
|
||||||
void cv::gpu::setDevice(int device) { gpuFuncTable()->setDevice(device); }
|
void cv::gpu::setDevice(int device) { deviceInfoFuncTable()->setDevice(device); }
|
||||||
int cv::gpu::getDevice() { return gpuFuncTable()->getDevice(); }
|
int cv::gpu::getDevice() { return deviceInfoFuncTable()->getDevice(); }
|
||||||
|
|
||||||
void cv::gpu::resetDevice() { gpuFuncTable()->resetDevice(); }
|
void cv::gpu::resetDevice() { deviceInfoFuncTable()->resetDevice(); }
|
||||||
|
|
||||||
bool cv::gpu::deviceSupports(FeatureSet feature_set) { return gpuFuncTable()->deviceSupports(feature_set); }
|
bool cv::gpu::deviceSupports(FeatureSet feature_set) { return deviceInfoFuncTable()->deviceSupports(feature_set); }
|
||||||
|
|
||||||
bool cv::gpu::TargetArchs::builtWith(FeatureSet feature_set) { return gpuFuncTable()->builtWith(feature_set); }
|
bool cv::gpu::TargetArchs::builtWith(FeatureSet feature_set) { return deviceInfoFuncTable()->builtWith(feature_set); }
|
||||||
bool cv::gpu::TargetArchs::has(int major, int minor) { return gpuFuncTable()->has(major, minor); }
|
bool cv::gpu::TargetArchs::has(int major, int minor) { return deviceInfoFuncTable()->has(major, minor); }
|
||||||
bool cv::gpu::TargetArchs::hasPtx(int major, int minor) { return gpuFuncTable()->hasPtx(major, minor); }
|
bool cv::gpu::TargetArchs::hasPtx(int major, int minor) { return deviceInfoFuncTable()->hasPtx(major, minor); }
|
||||||
bool cv::gpu::TargetArchs::hasBin(int major, int minor) { return gpuFuncTable()->hasBin(major, minor); }
|
bool cv::gpu::TargetArchs::hasBin(int major, int minor) { return deviceInfoFuncTable()->hasBin(major, minor); }
|
||||||
bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int major, int minor) { return gpuFuncTable()->hasEqualOrLessPtx(major, minor); }
|
bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int major, int minor) { return deviceInfoFuncTable()->hasEqualOrLessPtx(major, minor); }
|
||||||
bool cv::gpu::TargetArchs::hasEqualOrGreater(int major, int minor) { return gpuFuncTable()->hasEqualOrGreater(major, minor); }
|
bool cv::gpu::TargetArchs::hasEqualOrGreater(int major, int minor) { return deviceInfoFuncTable()->hasEqualOrGreater(major, minor); }
|
||||||
bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int major, int minor) { return gpuFuncTable()->hasEqualOrGreaterPtx(major, minor); }
|
bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int major, int minor) { return deviceInfoFuncTable()->hasEqualOrGreaterPtx(major, minor); }
|
||||||
bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int major, int minor) { return gpuFuncTable()->hasEqualOrGreaterBin(major, minor); }
|
bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int major, int minor) { return deviceInfoFuncTable()->hasEqualOrGreaterBin(major, minor); }
|
||||||
|
|
||||||
size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const { return deviceInfoFuncTable()->sharedMemPerBlock(); }
|
size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const { return deviceInfoFuncTable()->sharedMemPerBlock(); }
|
||||||
void cv::gpu::DeviceInfo::queryMemory(size_t& total_memory, size_t& free_memory) const { deviceInfoFuncTable()->queryMemory(total_memory, free_memory); }
|
void cv::gpu::DeviceInfo::queryMemory(size_t& total_memory, size_t& free_memory) const { deviceInfoFuncTable()->queryMemory(total_memory, free_memory); }
|
||||||
@ -270,8 +270,8 @@ std::string cv::gpu::DeviceInfo::name() const { return deviceInfoFuncTable()->na
|
|||||||
int cv::gpu::DeviceInfo::multiProcessorCount() const { return deviceInfoFuncTable()->multiProcessorCount(); }
|
int cv::gpu::DeviceInfo::multiProcessorCount() const { return deviceInfoFuncTable()->multiProcessorCount(); }
|
||||||
void cv::gpu::DeviceInfo::query() { deviceInfoFuncTable()->query(); }
|
void cv::gpu::DeviceInfo::query() { deviceInfoFuncTable()->query(); }
|
||||||
|
|
||||||
void cv::gpu::printCudaDeviceInfo(int device) { gpuFuncTable()->printCudaDeviceInfo(device); }
|
void cv::gpu::printCudaDeviceInfo(int device) { deviceInfoFuncTable()->printCudaDeviceInfo(device); }
|
||||||
void cv::gpu::printShortCudaDeviceInfo(int device) { gpuFuncTable()->printShortCudaDeviceInfo(device); }
|
void cv::gpu::printShortCudaDeviceInfo(int device) { deviceInfoFuncTable()->printShortCudaDeviceInfo(device); }
|
||||||
|
|
||||||
#ifdef HAVE_CUDA
|
#ifdef HAVE_CUDA
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
class DeviceInfoFuncTable
|
class DeviceInfoFuncTable
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
// cv::DeviceInfo
|
||||||
virtual size_t sharedMemPerBlock() const = 0;
|
virtual size_t sharedMemPerBlock() const = 0;
|
||||||
virtual void queryMemory(size_t&, size_t&) const = 0;
|
virtual void queryMemory(size_t&, size_t&) const = 0;
|
||||||
virtual size_t freeMemory() const = 0;
|
virtual size_t freeMemory() const = 0;
|
||||||
@ -16,25 +17,13 @@
|
|||||||
virtual int majorVersion() const = 0;
|
virtual int majorVersion() const = 0;
|
||||||
virtual int minorVersion() const = 0;
|
virtual int minorVersion() const = 0;
|
||||||
virtual int multiProcessorCount() const = 0;
|
virtual int multiProcessorCount() const = 0;
|
||||||
virtual ~DeviceInfoFuncTable() {};
|
|
||||||
};
|
|
||||||
|
|
||||||
class GpuFuncTable
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
virtual ~GpuFuncTable() {}
|
|
||||||
|
|
||||||
// DeviceInfo routines
|
|
||||||
virtual int getCudaEnabledDeviceCount() const = 0;
|
virtual int getCudaEnabledDeviceCount() const = 0;
|
||||||
|
|
||||||
virtual void setDevice(int) const = 0;
|
virtual void setDevice(int) const = 0;
|
||||||
virtual int getDevice() const = 0;
|
virtual int getDevice() const = 0;
|
||||||
|
|
||||||
virtual void resetDevice() const = 0;
|
virtual void resetDevice() const = 0;
|
||||||
|
|
||||||
virtual bool deviceSupports(FeatureSet) const = 0;
|
virtual bool deviceSupports(FeatureSet) const = 0;
|
||||||
|
|
||||||
// TargetArchs
|
// cv::TargetArchs
|
||||||
virtual bool builtWith(FeatureSet) const = 0;
|
virtual bool builtWith(FeatureSet) const = 0;
|
||||||
virtual bool has(int, int) const = 0;
|
virtual bool has(int, int) const = 0;
|
||||||
virtual bool hasPtx(int, int) const = 0;
|
virtual bool hasPtx(int, int) const = 0;
|
||||||
@ -47,6 +36,14 @@
|
|||||||
virtual void printCudaDeviceInfo(int) const = 0;
|
virtual void printCudaDeviceInfo(int) const = 0;
|
||||||
virtual void printShortCudaDeviceInfo(int) const = 0;
|
virtual void printShortCudaDeviceInfo(int) const = 0;
|
||||||
|
|
||||||
|
virtual ~DeviceInfoFuncTable() {};
|
||||||
|
};
|
||||||
|
|
||||||
|
class GpuFuncTable
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
virtual ~GpuFuncTable() {}
|
||||||
|
|
||||||
// GpuMat routines
|
// GpuMat routines
|
||||||
virtual void copy(const Mat& src, GpuMat& dst) const = 0;
|
virtual void copy(const Mat& src, GpuMat& dst) const = 0;
|
||||||
virtual void copy(const GpuMat& src, Mat& dst) const = 0;
|
virtual void copy(const GpuMat& src, Mat& dst) const = 0;
|
||||||
@ -80,13 +77,7 @@
|
|||||||
int majorVersion() const { throw_nogpu; return -1; }
|
int majorVersion() const { throw_nogpu; return -1; }
|
||||||
int minorVersion() const { throw_nogpu; return -1; }
|
int minorVersion() const { throw_nogpu; return -1; }
|
||||||
int multiProcessorCount() const { throw_nogpu; return -1; }
|
int multiProcessorCount() const { throw_nogpu; return -1; }
|
||||||
};
|
|
||||||
|
|
||||||
class EmptyFuncTable : public GpuFuncTable
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
|
|
||||||
// DeviceInfo routines
|
|
||||||
int getCudaEnabledDeviceCount() const { return 0; }
|
int getCudaEnabledDeviceCount() const { return 0; }
|
||||||
|
|
||||||
void setDevice(int) const { throw_nogpu; }
|
void setDevice(int) const { throw_nogpu; }
|
||||||
@ -107,6 +98,11 @@
|
|||||||
|
|
||||||
void printCudaDeviceInfo(int) const { throw_nogpu; }
|
void printCudaDeviceInfo(int) const { throw_nogpu; }
|
||||||
void printShortCudaDeviceInfo(int) const { throw_nogpu; }
|
void printShortCudaDeviceInfo(int) const { throw_nogpu; }
|
||||||
|
};
|
||||||
|
|
||||||
|
class EmptyFuncTable : public GpuFuncTable
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
void copy(const Mat&, GpuMat&) const { throw_nogpu; }
|
void copy(const Mat&, GpuMat&) const { throw_nogpu; }
|
||||||
void copy(const GpuMat&, Mat&) const { throw_nogpu; }
|
void copy(const GpuMat&, Mat&) const { throw_nogpu; }
|
||||||
@ -568,12 +564,12 @@ namespace cv { namespace gpu { namespace device
|
|||||||
bool isCompatible() const
|
bool isCompatible() const
|
||||||
{
|
{
|
||||||
// Check PTX compatibility
|
// Check PTX compatibility
|
||||||
if (TargetArchs::hasEqualOrLessPtx(majorVersion_, minorVersion_))
|
if (hasEqualOrLessPtx(majorVersion_, minorVersion_))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
// Check BIN compatibility
|
// Check BIN compatibility
|
||||||
for (int i = minorVersion_; i >= 0; --i)
|
for (int i = minorVersion_; i >= 0; --i)
|
||||||
if (TargetArchs::hasBin(majorVersion_, i))
|
if (hasBin(majorVersion_, i))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
@ -614,44 +610,6 @@ namespace cv { namespace gpu { namespace device
|
|||||||
return multi_processor_count_;
|
return multi_processor_count_;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
|
||||||
int device_id_;
|
|
||||||
|
|
||||||
std::string name_;
|
|
||||||
int multi_processor_count_;
|
|
||||||
int majorVersion_;
|
|
||||||
int minorVersion_;
|
|
||||||
};
|
|
||||||
|
|
||||||
class CudaFuncTable : public GpuFuncTable
|
|
||||||
{
|
|
||||||
protected:
|
|
||||||
|
|
||||||
const CudaArch cudaArch;
|
|
||||||
|
|
||||||
int convertSMVer2Cores(int major, int minor) const
|
|
||||||
{
|
|
||||||
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
|
|
||||||
typedef struct {
|
|
||||||
int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
|
|
||||||
int Cores;
|
|
||||||
} SMtoCores;
|
|
||||||
|
|
||||||
SMtoCores gpuArchCoresPerSM[] = { { 0x10, 8 }, { 0x11, 8 }, { 0x12, 8 }, { 0x13, 8 }, { 0x20, 32 }, { 0x21, 48 }, {0x30, 192}, {0x35, 192}, { -1, -1 } };
|
|
||||||
|
|
||||||
int index = 0;
|
|
||||||
while (gpuArchCoresPerSM[index].SM != -1)
|
|
||||||
{
|
|
||||||
if (gpuArchCoresPerSM[index].SM == ((major << 4) + minor) )
|
|
||||||
return gpuArchCoresPerSM[index].Cores;
|
|
||||||
index++;
|
|
||||||
}
|
|
||||||
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
public:
|
|
||||||
|
|
||||||
int getCudaEnabledDeviceCount() const
|
int getCudaEnabledDeviceCount() const
|
||||||
{
|
{
|
||||||
int count;
|
int count;
|
||||||
@ -790,11 +748,11 @@ namespace cv { namespace gpu { namespace device
|
|||||||
printf(" GPU Clock Speed: %.2f GHz\n", prop.clockRate * 1e-6f);
|
printf(" GPU Clock Speed: %.2f GHz\n", prop.clockRate * 1e-6f);
|
||||||
|
|
||||||
printf(" Max Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d,%d), 3D=(%d,%d,%d)\n",
|
printf(" Max Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d,%d), 3D=(%d,%d,%d)\n",
|
||||||
prop.maxTexture1D, prop.maxTexture2D[0], prop.maxTexture2D[1],
|
prop.maxTexture1D, prop.maxTexture2D[0], prop.maxTexture2D[1],
|
||||||
prop.maxTexture3D[0], prop.maxTexture3D[1], prop.maxTexture3D[2]);
|
prop.maxTexture3D[0], prop.maxTexture3D[1], prop.maxTexture3D[2]);
|
||||||
printf(" Max Layered Texture Size (dim) x layers 1D=(%d) x %d, 2D=(%d,%d) x %d\n",
|
printf(" Max Layered Texture Size (dim) x layers 1D=(%d) x %d, 2D=(%d,%d) x %d\n",
|
||||||
prop.maxTexture1DLayered[0], prop.maxTexture1DLayered[1],
|
prop.maxTexture1DLayered[0], prop.maxTexture1DLayered[1],
|
||||||
prop.maxTexture2DLayered[0], prop.maxTexture2DLayered[1], prop.maxTexture2DLayered[2]);
|
prop.maxTexture2DLayered[0], prop.maxTexture2DLayered[1], prop.maxTexture2DLayered[2]);
|
||||||
|
|
||||||
printf(" Total amount of constant memory: %u bytes\n", (int)prop.totalConstMem);
|
printf(" Total amount of constant memory: %u bytes\n", (int)prop.totalConstMem);
|
||||||
printf(" Total amount of shared memory per block: %u bytes\n", (int)prop.sharedMemPerBlock);
|
printf(" Total amount of shared memory per block: %u bytes\n", (int)prop.sharedMemPerBlock);
|
||||||
@ -859,6 +817,42 @@ namespace cv { namespace gpu { namespace device
|
|||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
int device_id_;
|
||||||
|
|
||||||
|
std::string name_;
|
||||||
|
int multi_processor_count_;
|
||||||
|
int majorVersion_;
|
||||||
|
int minorVersion_;
|
||||||
|
|
||||||
|
const CudaArch cudaArch;
|
||||||
|
|
||||||
|
int convertSMVer2Cores(int major, int minor) const
|
||||||
|
{
|
||||||
|
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
|
||||||
|
typedef struct {
|
||||||
|
int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
|
||||||
|
int Cores;
|
||||||
|
} SMtoCores;
|
||||||
|
|
||||||
|
SMtoCores gpuArchCoresPerSM[] = { { 0x10, 8 }, { 0x11, 8 }, { 0x12, 8 }, { 0x13, 8 }, { 0x20, 32 }, { 0x21, 48 }, {0x30, 192}, {0x35, 192}, { -1, -1 } };
|
||||||
|
|
||||||
|
int index = 0;
|
||||||
|
while (gpuArchCoresPerSM[index].SM != -1)
|
||||||
|
{
|
||||||
|
if (gpuArchCoresPerSM[index].SM == ((major << 4) + minor) )
|
||||||
|
return gpuArchCoresPerSM[index].Cores;
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class CudaFuncTable : public GpuFuncTable
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
void copy(const Mat& src, GpuMat& dst) const
|
void copy(const Mat& src, GpuMat& dst) const
|
||||||
{
|
{
|
||||||
cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyHostToDevice) );
|
cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyHostToDevice) );
|
||||||
|
Loading…
Reference in New Issue
Block a user