minor changes in gpu code (initialization interface)

This commit is contained in:
Anatoly Baksheev 2010-07-15 09:13:15 +00:00
parent 7f6fb6ef97
commit c4ef619ac5
5 changed files with 16 additions and 28 deletions

View File

@ -42,10 +42,6 @@
#include "cuda_shared.hpp" #include "cuda_shared.hpp"
using namespace cv::gpu;
#define cudaSafeCall
#define ROWSperTHREAD 21 // the number of rows a thread will process #define ROWSperTHREAD 21 // the number of rows a thread will process
#define BLOCK_W 128 // the thread block width (464) #define BLOCK_W 128 // the thread block width (464)
#define N_DISPARITIES 8 #define N_DISPARITIES 8
@ -218,7 +214,7 @@ __device__ void InitColSSD(int x_tex, int y_tex, int im_pitch, unsigned char* im
col_ssd[7 * SHARED_MEM_SIZE] = diffa[7]; col_ssd[7 * SHARED_MEM_SIZE] = diffa[7];
} }
extern "C" __global__ void stereoKernel(uchar *left, uchar *right, size_t img_step, uchar* disp, size_t disp_pitch, int maxdisp) extern "C" __global__ void stereoKernel(unsigned char *left, unsigned char *right, size_t img_step, unsigned char* disp, size_t disp_pitch, int maxdisp)
{ {
extern __shared__ unsigned int col_ssd_cache[]; extern __shared__ unsigned int col_ssd_cache[];
unsigned int *col_ssd = col_ssd_cache + BLOCK_W + threadIdx.x; unsigned int *col_ssd = col_ssd_cache + BLOCK_W + threadIdx.x;
@ -231,7 +227,7 @@ extern "C" __global__ void stereoKernel(uchar *left, uchar *right, size_t img_st
//int Y = blockIdx.y * ROWSperTHREAD + RADIUS; //int Y = blockIdx.y * ROWSperTHREAD + RADIUS;
unsigned int* minSSDImage = cminSSDImage + X + Y * cminSSD_step; unsigned int* minSSDImage = cminSSDImage + X + Y * cminSSD_step;
uchar* disparImage = disp + X + Y * disp_pitch; unsigned char* disparImage = disp + X + Y * disp_pitch;
/* if (X < cwidth) /* if (X < cwidth)
{ {
unsigned int *minSSDImage_end = minSSDImage + min(ROWSperTHREAD, cheight - Y) * minssd_step; unsigned int *minSSDImage_end = minSSDImage + min(ROWSperTHREAD, cheight - Y) * minssd_step;
@ -301,6 +297,7 @@ extern "C" void cv::gpu::impl::stereoBM_GPU(const DevMem2D& left, const DevMem2D
size_t smem_size = (BLOCK_W + N_DISPARITIES * SHARED_MEM_SIZE) * sizeof(unsigned int); size_t smem_size = (BLOCK_W + N_DISPARITIES * SHARED_MEM_SIZE) * sizeof(unsigned int);
#define cudaSafeCall
cudaSafeCall( cudaMemset2D(disp.ptr, disp.step, 0, disp.cols, disp. rows) ); cudaSafeCall( cudaMemset2D(disp.ptr, disp.step, 0, disp.cols, disp. rows) );
cudaSafeCall( cudaMemset2D(minSSD_buf.ptr, minSSD_buf.step, 0xFF, minSSD_buf.cols * minSSD_buf.elemSize(), disp. rows) ); cudaSafeCall( cudaMemset2D(minSSD_buf.ptr, minSSD_buf.step, 0xFF, minSSD_buf.cols * minSSD_buf.elemSize(), disp. rows) );

View File

@ -56,9 +56,7 @@ namespace cv
CV_EXPORTS string getDeviceName(int device); CV_EXPORTS string getDeviceName(int device);
CV_EXPORTS void setDevice(int device); CV_EXPORTS void setDevice(int device);
enum { CV_GPU_CC_10, CV_GPU_CC_11, CV_GPU_CC_12, CV_GPU_CC_13, CV_GPU_CC_20 }; CV_EXPORTS void getComputeCapability(int device, int* major, int* minor);
CV_EXPORTS int getComputeCapability(int device);
CV_EXPORTS int getNumberOfSMs(int device); CV_EXPORTS int getNumberOfSMs(int device);
//////////////////////////////// GpuMat //////////////////////////////// //////////////////////////////// GpuMat ////////////////////////////////

View File

@ -64,27 +64,15 @@ CV_EXPORTS void cv::gpu::setDevice(int device)
cudaSafeCall( cudaSetDevice( device ) ); cudaSafeCall( cudaSetDevice( device ) );
} }
CV_EXPORTS int cv::gpu::getComputeCapability(int device) CV_EXPORTS void cv::gpu::getComputeCapability(int device, int* major, int* minor)
{ {
cudaDeviceProp prop; cudaDeviceProp prop;
cudaSafeCall( cudaGetDeviceProperties( &prop, device) ); cudaSafeCall( cudaGetDeviceProperties( &prop, device) );
if (prop.major == 2) *major = prop.major;
return CV_GPU_CC_20; *minor = prop.minor;
if (prop.major == 1)
switch (prop.minor)
{
case 0: return CV_GPU_CC_10;
case 1: return CV_GPU_CC_11;
case 2: return CV_GPU_CC_12;
case 3: return CV_GPU_CC_13;
}
return -1;
} }
CV_EXPORTS int cv::gpu::getNumberOfSMs(int device) CV_EXPORTS int cv::gpu::getNumberOfSMs(int device)
{ {
cudaDeviceProp prop; cudaDeviceProp prop;

View File

@ -55,7 +55,11 @@
#include "cuda_shared.hpp" #include "cuda_shared.hpp"
#include "cuda_runtime.h" #if _MSC_VER >= 1200
#pragma warning (disable : 4100 4211 4201 4408)
#endif
#include "cuda_runtime_api.h"
#define cudaSafeCall(err) __cudaSafeCall(err, __FILE__, __LINE__) #define cudaSafeCall(err) __cudaSafeCall(err, __FILE__, __LINE__)

View File

@ -49,7 +49,8 @@ using namespace cv::gpu;
StereoBM_GPU::StereoBM_GPU() : preset(BASIC_PRESET), ndisp(64) {} StereoBM_GPU::StereoBM_GPU() : preset(BASIC_PRESET), ndisp(64) {}
StereoBM_GPU::StereoBM_GPU(int preset_, int ndisparities_) : preset(preset_), ndisp(ndisparities_) StereoBM_GPU::StereoBM_GPU(int preset_, int ndisparities_) : preset(preset_), ndisp(ndisparities_)
{ {
CV_Assert(ndisp <= std::numeric_limits<unsigned char>::max()); const int max_supported_ndisp = 1 << (sizeof(unsigned char) * 8);
CV_Assert(ndisp <= max_supported_ndisp);
} }
void StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity) const void StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity) const