CUDA related func tables refactored to remove unneeded dependencies.

This commit is contained in:
Alexander Smorkalov 2013-12-19 11:18:04 +04:00
parent 6da7c50fb5
commit 64c94cb22c
2 changed files with 204 additions and 210 deletions

View File

@ -239,23 +239,23 @@ static DeviceInfoFuncTable* deviceInfoFuncTable()
//////////////////////////////// Initialization & Info //////////////////////// //////////////////////////////// Initialization & Info ////////////////////////
int cv::gpu::getCudaEnabledDeviceCount() { return gpuFuncTable()->getCudaEnabledDeviceCount(); } int cv::gpu::getCudaEnabledDeviceCount() { return deviceInfoFuncTable()->getCudaEnabledDeviceCount(); }
void cv::gpu::setDevice(int device) { gpuFuncTable()->setDevice(device); } void cv::gpu::setDevice(int device) { deviceInfoFuncTable()->setDevice(device); }
int cv::gpu::getDevice() { return gpuFuncTable()->getDevice(); } int cv::gpu::getDevice() { return deviceInfoFuncTable()->getDevice(); }
void cv::gpu::resetDevice() { gpuFuncTable()->resetDevice(); } void cv::gpu::resetDevice() { deviceInfoFuncTable()->resetDevice(); }
bool cv::gpu::deviceSupports(FeatureSet feature_set) { return gpuFuncTable()->deviceSupports(feature_set); } bool cv::gpu::deviceSupports(FeatureSet feature_set) { return deviceInfoFuncTable()->deviceSupports(feature_set); }
bool cv::gpu::TargetArchs::builtWith(FeatureSet feature_set) { return gpuFuncTable()->builtWith(feature_set); } bool cv::gpu::TargetArchs::builtWith(FeatureSet feature_set) { return deviceInfoFuncTable()->builtWith(feature_set); }
bool cv::gpu::TargetArchs::has(int major, int minor) { return gpuFuncTable()->has(major, minor); } bool cv::gpu::TargetArchs::has(int major, int minor) { return deviceInfoFuncTable()->has(major, minor); }
bool cv::gpu::TargetArchs::hasPtx(int major, int minor) { return gpuFuncTable()->hasPtx(major, minor); } bool cv::gpu::TargetArchs::hasPtx(int major, int minor) { return deviceInfoFuncTable()->hasPtx(major, minor); }
bool cv::gpu::TargetArchs::hasBin(int major, int minor) { return gpuFuncTable()->hasBin(major, minor); } bool cv::gpu::TargetArchs::hasBin(int major, int minor) { return deviceInfoFuncTable()->hasBin(major, minor); }
bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int major, int minor) { return gpuFuncTable()->hasEqualOrLessPtx(major, minor); } bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int major, int minor) { return deviceInfoFuncTable()->hasEqualOrLessPtx(major, minor); }
bool cv::gpu::TargetArchs::hasEqualOrGreater(int major, int minor) { return gpuFuncTable()->hasEqualOrGreater(major, minor); } bool cv::gpu::TargetArchs::hasEqualOrGreater(int major, int minor) { return deviceInfoFuncTable()->hasEqualOrGreater(major, minor); }
bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int major, int minor) { return gpuFuncTable()->hasEqualOrGreaterPtx(major, minor); } bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int major, int minor) { return deviceInfoFuncTable()->hasEqualOrGreaterPtx(major, minor); }
bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int major, int minor) { return gpuFuncTable()->hasEqualOrGreaterBin(major, minor); } bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int major, int minor) { return deviceInfoFuncTable()->hasEqualOrGreaterBin(major, minor); }
size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const { return deviceInfoFuncTable()->sharedMemPerBlock(); } size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const { return deviceInfoFuncTable()->sharedMemPerBlock(); }
void cv::gpu::DeviceInfo::queryMemory(size_t& total_memory, size_t& free_memory) const { deviceInfoFuncTable()->queryMemory(total_memory, free_memory); } void cv::gpu::DeviceInfo::queryMemory(size_t& total_memory, size_t& free_memory) const { deviceInfoFuncTable()->queryMemory(total_memory, free_memory); }
@ -270,8 +270,8 @@ std::string cv::gpu::DeviceInfo::name() const { return deviceInfoFuncTable()->na
int cv::gpu::DeviceInfo::multiProcessorCount() const { return deviceInfoFuncTable()->multiProcessorCount(); } int cv::gpu::DeviceInfo::multiProcessorCount() const { return deviceInfoFuncTable()->multiProcessorCount(); }
void cv::gpu::DeviceInfo::query() { deviceInfoFuncTable()->query(); } void cv::gpu::DeviceInfo::query() { deviceInfoFuncTable()->query(); }
void cv::gpu::printCudaDeviceInfo(int device) { gpuFuncTable()->printCudaDeviceInfo(device); } void cv::gpu::printCudaDeviceInfo(int device) { deviceInfoFuncTable()->printCudaDeviceInfo(device); }
void cv::gpu::printShortCudaDeviceInfo(int device) { gpuFuncTable()->printShortCudaDeviceInfo(device); } void cv::gpu::printShortCudaDeviceInfo(int device) { deviceInfoFuncTable()->printShortCudaDeviceInfo(device); }
#ifdef HAVE_CUDA #ifdef HAVE_CUDA

View File

@ -4,6 +4,7 @@
class DeviceInfoFuncTable class DeviceInfoFuncTable
{ {
public: public:
// cv::DeviceInfo
virtual size_t sharedMemPerBlock() const = 0; virtual size_t sharedMemPerBlock() const = 0;
virtual void queryMemory(size_t&, size_t&) const = 0; virtual void queryMemory(size_t&, size_t&) const = 0;
virtual size_t freeMemory() const = 0; virtual size_t freeMemory() const = 0;
@ -16,25 +17,13 @@
virtual int majorVersion() const = 0; virtual int majorVersion() const = 0;
virtual int minorVersion() const = 0; virtual int minorVersion() const = 0;
virtual int multiProcessorCount() const = 0; virtual int multiProcessorCount() const = 0;
virtual ~DeviceInfoFuncTable() {};
};
class GpuFuncTable
{
public:
virtual ~GpuFuncTable() {}
// DeviceInfo routines
virtual int getCudaEnabledDeviceCount() const = 0; virtual int getCudaEnabledDeviceCount() const = 0;
virtual void setDevice(int) const = 0; virtual void setDevice(int) const = 0;
virtual int getDevice() const = 0; virtual int getDevice() const = 0;
virtual void resetDevice() const = 0; virtual void resetDevice() const = 0;
virtual bool deviceSupports(FeatureSet) const = 0; virtual bool deviceSupports(FeatureSet) const = 0;
// TargetArchs // cv::TargetArchs
virtual bool builtWith(FeatureSet) const = 0; virtual bool builtWith(FeatureSet) const = 0;
virtual bool has(int, int) const = 0; virtual bool has(int, int) const = 0;
virtual bool hasPtx(int, int) const = 0; virtual bool hasPtx(int, int) const = 0;
@ -47,6 +36,14 @@
virtual void printCudaDeviceInfo(int) const = 0; virtual void printCudaDeviceInfo(int) const = 0;
virtual void printShortCudaDeviceInfo(int) const = 0; virtual void printShortCudaDeviceInfo(int) const = 0;
virtual ~DeviceInfoFuncTable() {};
};
class GpuFuncTable
{
public:
virtual ~GpuFuncTable() {}
// GpuMat routines // GpuMat routines
virtual void copy(const Mat& src, GpuMat& dst) const = 0; virtual void copy(const Mat& src, GpuMat& dst) const = 0;
virtual void copy(const GpuMat& src, Mat& dst) const = 0; virtual void copy(const GpuMat& src, Mat& dst) const = 0;
@ -80,13 +77,7 @@
int majorVersion() const { throw_nogpu; return -1; } int majorVersion() const { throw_nogpu; return -1; }
int minorVersion() const { throw_nogpu; return -1; } int minorVersion() const { throw_nogpu; return -1; }
int multiProcessorCount() const { throw_nogpu; return -1; } int multiProcessorCount() const { throw_nogpu; return -1; }
};
class EmptyFuncTable : public GpuFuncTable
{
public:
// DeviceInfo routines
int getCudaEnabledDeviceCount() const { return 0; } int getCudaEnabledDeviceCount() const { return 0; }
void setDevice(int) const { throw_nogpu; } void setDevice(int) const { throw_nogpu; }
@ -107,6 +98,11 @@
void printCudaDeviceInfo(int) const { throw_nogpu; } void printCudaDeviceInfo(int) const { throw_nogpu; }
void printShortCudaDeviceInfo(int) const { throw_nogpu; } void printShortCudaDeviceInfo(int) const { throw_nogpu; }
};
class EmptyFuncTable : public GpuFuncTable
{
public:
void copy(const Mat&, GpuMat&) const { throw_nogpu; } void copy(const Mat&, GpuMat&) const { throw_nogpu; }
void copy(const GpuMat&, Mat&) const { throw_nogpu; } void copy(const GpuMat&, Mat&) const { throw_nogpu; }
@ -568,12 +564,12 @@ namespace cv { namespace gpu { namespace device
bool isCompatible() const bool isCompatible() const
{ {
// Check PTX compatibility // Check PTX compatibility
if (TargetArchs::hasEqualOrLessPtx(majorVersion_, minorVersion_)) if (hasEqualOrLessPtx(majorVersion_, minorVersion_))
return true; return true;
// Check BIN compatibility // Check BIN compatibility
for (int i = minorVersion_; i >= 0; --i) for (int i = minorVersion_; i >= 0; --i)
if (TargetArchs::hasBin(majorVersion_, i)) if (hasBin(majorVersion_, i))
return true; return true;
return false; return false;
@ -614,44 +610,6 @@ namespace cv { namespace gpu { namespace device
return multi_processor_count_; return multi_processor_count_;
} }
private:
int device_id_;
std::string name_;
int multi_processor_count_;
int majorVersion_;
int minorVersion_;
};
class CudaFuncTable : public GpuFuncTable
{
protected:
const CudaArch cudaArch;
int convertSMVer2Cores(int major, int minor) const
{
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
typedef struct {
int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
int Cores;
} SMtoCores;
SMtoCores gpuArchCoresPerSM[] = { { 0x10, 8 }, { 0x11, 8 }, { 0x12, 8 }, { 0x13, 8 }, { 0x20, 32 }, { 0x21, 48 }, {0x30, 192}, {0x35, 192}, { -1, -1 } };
int index = 0;
while (gpuArchCoresPerSM[index].SM != -1)
{
if (gpuArchCoresPerSM[index].SM == ((major << 4) + minor) )
return gpuArchCoresPerSM[index].Cores;
index++;
}
return -1;
}
public:
int getCudaEnabledDeviceCount() const int getCudaEnabledDeviceCount() const
{ {
int count; int count;
@ -790,11 +748,11 @@ namespace cv { namespace gpu { namespace device
printf(" GPU Clock Speed: %.2f GHz\n", prop.clockRate * 1e-6f); printf(" GPU Clock Speed: %.2f GHz\n", prop.clockRate * 1e-6f);
printf(" Max Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d,%d), 3D=(%d,%d,%d)\n", printf(" Max Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d,%d), 3D=(%d,%d,%d)\n",
prop.maxTexture1D, prop.maxTexture2D[0], prop.maxTexture2D[1], prop.maxTexture1D, prop.maxTexture2D[0], prop.maxTexture2D[1],
prop.maxTexture3D[0], prop.maxTexture3D[1], prop.maxTexture3D[2]); prop.maxTexture3D[0], prop.maxTexture3D[1], prop.maxTexture3D[2]);
printf(" Max Layered Texture Size (dim) x layers 1D=(%d) x %d, 2D=(%d,%d) x %d\n", printf(" Max Layered Texture Size (dim) x layers 1D=(%d) x %d, 2D=(%d,%d) x %d\n",
prop.maxTexture1DLayered[0], prop.maxTexture1DLayered[1], prop.maxTexture1DLayered[0], prop.maxTexture1DLayered[1],
prop.maxTexture2DLayered[0], prop.maxTexture2DLayered[1], prop.maxTexture2DLayered[2]); prop.maxTexture2DLayered[0], prop.maxTexture2DLayered[1], prop.maxTexture2DLayered[2]);
printf(" Total amount of constant memory: %u bytes\n", (int)prop.totalConstMem); printf(" Total amount of constant memory: %u bytes\n", (int)prop.totalConstMem);
printf(" Total amount of shared memory per block: %u bytes\n", (int)prop.sharedMemPerBlock); printf(" Total amount of shared memory per block: %u bytes\n", (int)prop.sharedMemPerBlock);
@ -859,6 +817,42 @@ namespace cv { namespace gpu { namespace device
fflush(stdout); fflush(stdout);
} }
private:
int device_id_;
std::string name_;
int multi_processor_count_;
int majorVersion_;
int minorVersion_;
const CudaArch cudaArch;
int convertSMVer2Cores(int major, int minor) const
{
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
typedef struct {
int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
int Cores;
} SMtoCores;
SMtoCores gpuArchCoresPerSM[] = { { 0x10, 8 }, { 0x11, 8 }, { 0x12, 8 }, { 0x13, 8 }, { 0x20, 32 }, { 0x21, 48 }, {0x30, 192}, {0x35, 192}, { -1, -1 } };
int index = 0;
while (gpuArchCoresPerSM[index].SM != -1)
{
if (gpuArchCoresPerSM[index].SM == ((major << 4) + minor) )
return gpuArchCoresPerSM[index].Cores;
index++;
}
return -1;
}
};
class CudaFuncTable : public GpuFuncTable
{
public:
void copy(const Mat& src, GpuMat& dst) const void copy(const Mat& src, GpuMat& dst) const
{ {
cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyHostToDevice) ); cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyHostToDevice) );