From 3940b6163b8370312d2a7fe66c96d43ab6d12efd Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Mon, 17 Mar 2014 18:52:28 +0400 Subject: [PATCH] remove intel guard since the code is 2 times faster on AMD too --- modules/ocl/perf/perf_filters.cpp | 6 +++--- modules/ocl/src/filtering.cpp | 6 +++--- modules/ocl/src/opencl/filtering_sep_filter_singlepass.cl | 2 ++ 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/modules/ocl/perf/perf_filters.cpp b/modules/ocl/perf/perf_filters.cpp index b3ffc51b30..c542d647cf 100644 --- a/modules/ocl/perf/perf_filters.cpp +++ b/modules/ocl/perf/perf_filters.cpp @@ -262,13 +262,13 @@ OCL_PERF_TEST_P(SobelFixture, Sobel, oclDst.download(dst); - SANITY_CHECK(dst); + SANITY_CHECK(dst, 1e-3); } else if (RUN_PLAIN_IMPL) { TEST_CYCLE() cv::Sobel(src, dst, -1, dx, dy); - SANITY_CHECK(dst); + SANITY_CHECK(dst, 1e-3); } else OCL_PERF_ELSE @@ -326,7 +326,7 @@ OCL_PERF_TEST_P(GaussianBlurFixture, GaussianBlur, Mat src(srcSize, type), dst(srcSize, type); declare.in(src, WARMUP_RNG).out(dst); - const double eps = src.depth() == CV_8U ? 1 + DBL_EPSILON : 3e-4; + const double eps = src.depth() == CV_8U ? 1 + DBL_EPSILON : 5e-4; if (RUN_OCL_IMPL) { diff --git a/modules/ocl/src/filtering.cpp b/modules/ocl/src/filtering.cpp index 35aa226de6..77052ffbf3 100644 --- a/modules/ocl/src/filtering.cpp +++ b/modules/ocl/src/filtering.cpp @@ -774,12 +774,12 @@ static void sepFilter2D_SinglePass(const oclMat &src, oclMat &dst, option += " -D KERNEL_MATRIX_X="; for(int i=0; i( &row_kernel.at(i) ) ); + option += cv::format("DIG(0x%x)", *reinterpret_cast( &row_kernel.at(i) ) ); option += "0x0"; option += " -D KERNEL_MATRIX_Y="; for(int i=0; i( &col_kernel.at(i) ) ); + option += cv::format("DIG(0x%x)", *reinterpret_cast( &col_kernel.at(i) ) ); option += "0x0"; switch(src.type()) @@ -1410,7 +1410,7 @@ Ptr cv::ocl::createSeparableLinearFilter_GPU(int srcType, int //if image size is non-degenerate and large enough //and if filter support is reasonable to satisfy larger local memory requirements, //then we can use single pass routine to avoid extra runtime calls overhead - if( clCxt && clCxt->supportsFeature(FEATURE_CL_INTEL_DEVICE) && + if( clCxt && rowKernel.rows <= 21 && columnKernel.rows <= 21 && (rowKernel.rows & 1) == 1 && (columnKernel.rows & 1) == 1 && imgSize.width > optimizedSepFilterLocalSize + (rowKernel.rows>>1) && diff --git a/modules/ocl/src/opencl/filtering_sep_filter_singlepass.cl b/modules/ocl/src/opencl/filtering_sep_filter_singlepass.cl index c6555bff0f..c5f490284e 100644 --- a/modules/ocl/src/opencl/filtering_sep_filter_singlepass.cl +++ b/modules/ocl/src/opencl/filtering_sep_filter_singlepass.cl @@ -84,6 +84,8 @@ #define DST(_x,_y) (((global DSTTYPE*)(Dst+DstOffset+(_y)*DstPitch))[_x]) +#define DIG(a) a, + //horizontal and vertical filter kernels //should be defined on host during compile time to avoid overhead __constant uint mat_kernelX[] = {KERNEL_MATRIX_X};