diff --git a/modules/gpu/src/cuda/matrix_operations.cu b/modules/gpu/src/cuda/matrix_operations.cu index ac7b08fc07..533c780c04 100644 --- a/modules/gpu/src/cuda/matrix_operations.cu +++ b/modules/gpu/src/cuda/matrix_operations.cu @@ -76,19 +76,21 @@ namespace mat_operators }; template - __device__ size_t GetIndex(size_t i, int cols, int rows, int step) + __device__ size_t GetIndex(size_t i, int cols, int step) { - return ((i / static_cast(cols))*static_cast(step) / static_cast(sizeof(T))) + - (i % static_cast(rows))*static_cast(channels) ; + size_t ret = (i / static_cast(cols))*static_cast(step) / static_cast(sizeof(T)) + + (i % static_cast(cols))*static_cast(channels); + return ret; } template __global__ void kernel_set_to_without_mask(T * mat, int cols, int rows, int step) { size_t i = (blockIdx.x * blockDim.x + threadIdx.x); + if (i < cols * rows) { - unroll::unroll_set(mat, GetIndex(i, cols, rows, step)); + unroll::unroll_set(mat, GetIndex(i, cols, step)); } } @@ -97,7 +99,7 @@ namespace mat_operators { size_t i = (blockIdx.x * blockDim.x + threadIdx.x); if (i < cols * rows) - unroll::unroll_set_with_mask(mat, mask[i], GetIndex(i, cols, rows, step)); + unroll::unroll_set_with_mask(mat, mask[i], GetIndex(i, cols, step)); } } @@ -105,10 +107,10 @@ extern "C" void cv::gpu::impl::set_to_with_mask(const DevMem2D& mat, const doubl { // download scalar to constant memory float data[4]; - data[0] = scalar[0]; - data[1] = scalar[1]; - data[2] = scalar[2]; - data[3] = scalar[3]; + data[0] = static_cast(scalar[0]); + data[1] = static_cast(scalar[1]); + data[2] = static_cast(scalar[2]); + data[3] = static_cast(scalar[3]); cudaSafeCall( cudaMemcpyToSymbol(scalar_d, &data, sizeof(data))); dim3 threadsPerBlock(256,1,1); @@ -144,10 +146,10 @@ extern "C" void cv::gpu::impl::set_to_with_mask(const DevMem2D& mat, const doubl extern "C" void cv::gpu::impl::set_to_without_mask(const DevMem2D& mat, const double * scalar, int elemSize1, int channels) { float data[4]; - data[0] = scalar[0]; - data[1] = scalar[1]; - data[2] = scalar[2]; - data[3] = scalar[3]; + data[0] = static_cast(scalar[0]); + data[1] = static_cast(scalar[1]); + data[2] = static_cast(scalar[2]); + data[3] = static_cast(scalar[3]); cudaSafeCall( cudaMemcpyToSymbol(scalar_d, &data, sizeof(data))); dim3 threadsPerBlock(256, 1, 1); diff --git a/tests/gpu/src/operator_set_to.cpp b/tests/gpu/src/operator_set_to.cpp index 73d7cc004e..f5b985d6ca 100644 --- a/tests/gpu/src/operator_set_to.cpp +++ b/tests/gpu/src/operator_set_to.cpp @@ -21,6 +21,8 @@ class CV_GpuMatOpSetTo : public CvTest void print_mat(gpu::GpuMat & mat, std::string name = "gpu mat"); void run(int); + bool compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat); + bool test_cv_8u_c1(); bool test_cv_8u_c2(); bool test_cv_8u_c3(); @@ -34,15 +36,15 @@ class CV_GpuMatOpSetTo : public CvTest bool test_cv_32f_c4(); private: - int w; - int h; + int rows; + int cols; Scalar s; }; CV_GpuMatOpSetTo::CV_GpuMatOpSetTo(): CvTest( "GpuMatOperatorSetTo", "setTo" ) { - w = 100; - h = 100; + rows = 127; + cols = 129; s.val[0] = 128.0; s.val[1] = 128.0; @@ -66,13 +68,16 @@ void CV_GpuMatOpSetTo::print_mat(gpu::GpuMat & mat, std::string name) print_mat(newmat, name); } -bool CV_GpuMatOpSetTo::test_cv_8u_c1() +bool CV_GpuMatOpSetTo::compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat) { - Mat cpumat(w, h, CV_8U, Scalar::all(0)); - GpuMat gpumat(cpumat); - + //int64 time = getTickCount(); cpumat.setTo(s); + //int64 time1 = getTickCount(); gpumat.setTo(s); + //int64 time2 = getTickCount(); + //std::cout << "\ntime cpu:" << double((time1 - time) / getTickFrequency()); + //std::cout << "\ntime gpu:" << double((time2 - time1) / getTickFrequency()); + //std::cout << "\n"; #ifdef PRINT_MATRIX print_mat(cpumat); @@ -82,7 +87,7 @@ bool CV_GpuMatOpSetTo::test_cv_8u_c1() double ret = norm(cpumat, gpumat); - if (ret < 0.1) + if (ret < 1.0) return true; else { @@ -91,205 +96,78 @@ bool CV_GpuMatOpSetTo::test_cv_8u_c1() } } + +bool CV_GpuMatOpSetTo::test_cv_8u_c1() +{ + Mat cpumat(rows, cols, CV_8U, Scalar::all(0)); + GpuMat gpumat(cpumat); + + return compare_matrix(cpumat, gpumat); +} + bool CV_GpuMatOpSetTo::test_cv_8u_c2() { - Mat cpumat(w, h, CV_8UC2, Scalar::all(0)); + Mat cpumat(rows, cols, CV_8UC2, Scalar::all(0)); GpuMat gpumat(cpumat); - cpumat.setTo(s); - gpumat.setTo(s); - -#ifdef PRINT_MATRIX - print_mat(cpumat); - print_mat(gpumat); - cv::waitKey(0); -#endif - - double ret = norm(cpumat, gpumat); - - if (ret < 0.1) - return true; - else - { - std::cout << "return : " << ret << "\n"; - return false; - } + return compare_matrix(cpumat, gpumat); } bool CV_GpuMatOpSetTo::test_cv_8u_c3() { - Mat cpumat(w, h, CV_8UC3, Scalar::all(0)); + Mat cpumat(rows, cols, CV_8UC3, Scalar::all(0)); GpuMat gpumat(cpumat); - cpumat.setTo(s); - gpumat.setTo(s); - -#ifdef PRINT_MATRIX - print_mat(cpumat); - print_mat(gpumat); - cv::waitKey(0); -#endif - - double ret = norm(cpumat, gpumat); - - if (ret < 0.1) - return true; - else - { - std::cout << "return : " << ret << "\n"; - return false; - } + return compare_matrix(cpumat, gpumat); } bool CV_GpuMatOpSetTo::test_cv_8u_c4() { - Mat cpumat(w, h, CV_8UC4, Scalar::all(0)); + Mat cpumat(rows, cols, CV_8UC4, Scalar::all(0)); GpuMat gpumat(cpumat); - cpumat.setTo(s); - gpumat.setTo(s); - -#ifdef PRINT_MATRIX - print_mat(cpumat); - print_mat(gpumat); - cv::waitKey(0); -#endif - - double ret = norm(cpumat, gpumat); - - if (ret < 0.1) - return true; - else - { - std::cout << "return : " << ret << "\n"; - return false; - } + return compare_matrix(cpumat, gpumat); } bool CV_GpuMatOpSetTo::test_cv_16u_c4() { - Mat cpumat(w, h, CV_16UC4, Scalar::all(0)); + Mat cpumat(rows, cols, CV_16UC4, Scalar::all(0)); GpuMat gpumat(cpumat); - cpumat.setTo(s); - gpumat.setTo(s); - -#ifdef PRINT_MATRIX - print_mat(cpumat); - print_mat(gpumat); - cv::waitKey(0); -#endif - - double ret = norm(cpumat, gpumat); - - if (ret < 0.1) - return true; - else - { - std::cout << "return : " << ret << "\n"; - return false; - } + return compare_matrix(cpumat, gpumat); } bool CV_GpuMatOpSetTo::test_cv_32f_c1() { - Mat cpumat(w, h, CV_32F, Scalar::all(0)); + Mat cpumat(rows, cols, CV_32F, Scalar::all(0)); GpuMat gpumat(cpumat); - cpumat.setTo(s); - gpumat.setTo(s); - -#ifdef PRINT_MATRIX - print_mat(cpumat); - print_mat(gpumat); - cv::waitKey(0); -#endif - - double ret = norm(cpumat, gpumat); - - if (ret < 0.1) - return true; - else - { - std::cout << "return : " << ret << "\n"; - return false; - } + return compare_matrix(cpumat, gpumat); } bool CV_GpuMatOpSetTo::test_cv_32f_c2() { - Mat cpumat(w, h, CV_32FC2, Scalar::all(0)); + Mat cpumat(rows, cols, CV_32FC2, Scalar::all(0)); GpuMat gpumat(cpumat); - cpumat.setTo(s); - gpumat.setTo(s); - -#ifdef PRINT_MATRIX - print_mat(cpumat); - print_mat(gpumat); - cv::waitKey(0); -#endif - - double ret = norm(cpumat, gpumat); - - if (ret < 0.1) - return true; - else - { - std::cout << "return : " << ret; - return false; - } + return compare_matrix(cpumat, gpumat); } bool CV_GpuMatOpSetTo::test_cv_32f_c3() { - Mat cpumat(w, h, CV_32FC3, Scalar::all(0)); + Mat cpumat(rows, cols, CV_32FC3, Scalar::all(0)); GpuMat gpumat(cpumat); - cpumat.setTo(s); - gpumat.setTo(s); - -#ifdef PRINT_MATRIX - print_mat(cpumat); - print_mat(gpumat); - cv::waitKey(0); -#endif - - double ret = norm(cpumat, gpumat); - - if (ret < 0.1) - return true; - else - { - std::cout << "return : " << ret; - return false; - } + return compare_matrix(cpumat, gpumat); } bool CV_GpuMatOpSetTo::test_cv_32f_c4() { - Mat cpumat(w, h, CV_32FC4, Scalar::all(0)); + Mat cpumat(rows, cols, CV_32FC4, Scalar::all(0)); GpuMat gpumat(cpumat); - cpumat.setTo(s); - gpumat.setTo(s); - -#ifdef PRINT_MATRIX - print_mat(cpumat); - print_mat(gpumat); - cv::waitKey(0); -#endif - - double ret = norm(cpumat, gpumat); - - if (ret < 0.1) - return true; - else - { - std::cout << "return : " << ret << "\n"; - return false; - } + return compare_matrix(cpumat, gpumat); } void CV_GpuMatOpSetTo::run( int /* start_from */)