From 3a4d4080f4a7a682c0a584a693f3be438900ae4b Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Wed, 1 Jan 2014 23:46:19 +0400 Subject: [PATCH] fixed overflow for cv::norm NORM_L2 --- modules/core/perf/opencl/perf_arithm.cpp | 8 ++++---- modules/core/src/opencl/reduce.cl | 2 +- modules/core/src/stat.cpp | 10 ++++++---- modules/core/test/ocl/test_arithm.cpp | 2 +- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/modules/core/perf/opencl/perf_arithm.cpp b/modules/core/perf/opencl/perf_arithm.cpp index f6e62da69c..cb06ac4793 100644 --- a/modules/core/perf/opencl/perf_arithm.cpp +++ b/modules/core/perf/opencl/perf_arithm.cpp @@ -651,13 +651,13 @@ OCL_PERF_TEST_P(SetIdentityFixture, SetIdentity, typedef Size_MatType MeanStdDevFixture; -OCL_PERF_TEST_P(MeanStdDevFixture, DISABLED_MeanStdDev, +OCL_PERF_TEST_P(MeanStdDevFixture, MeanStdDev, ::testing::Combine(OCL_PERF_ENUM(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3), OCL_TEST_TYPES)) { const Size_MatType_t params = GetParam(); const Size srcSize = get<0>(params); const int type = get<1>(params); - const double eps = 1e-5; + const double eps = 2e-5; checkDeviceMaxMemoryAllocSize(srcSize, type); @@ -687,7 +687,7 @@ CV_ENUM(NormType, NORM_INF, NORM_L1, NORM_L2) typedef std::tr1::tuple NormParams; typedef TestBaseWithParam NormFixture; -OCL_PERF_TEST_P(NormFixture, DISABLED_Norm, +OCL_PERF_TEST_P(NormFixture, Norm, ::testing::Combine(OCL_PERF_ENUM(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3), OCL_TEST_TYPES, NormType::all())) { const NormParams params = GetParam(); @@ -703,7 +703,7 @@ OCL_PERF_TEST_P(NormFixture, DISABLED_Norm, OCL_TEST_CYCLE() res = cv::norm(src1, src2, normType); - SANITY_CHECK(res, 1e-6, ERROR_RELATIVE); + SANITY_CHECK(res, 1e-5, ERROR_RELATIVE); } ///////////// Repeat //////////////////////// diff --git a/modules/core/src/opencl/reduce.cl b/modules/core/src/opencl/reduce.cl index 4f0d806708..0f148f3859 100644 --- a/modules/core/src/opencl/reduce.cl +++ b/modules/core/src/opencl/reduce.cl @@ -88,7 +88,7 @@ #define REDUCE_GLOBAL \ accumulator += src[0] == zero ? zero : one #define SET_LOCAL_1 \ - localmem[lid] = accumulator + localmem[lid] = accumulator #define REDUCE_LOCAL_1 \ localmem[lid - WGS2_ALIGNED] += accumulator #define REDUCE_LOCAL_2 \ diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index 2806efeb31..932cad682e 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -479,7 +479,8 @@ static bool ocl_sum( InputArray _src, Scalar & res, int sum_op ) int dbsize = ocl::Device::getDefault().maxComputeUnits(); size_t wgs = ocl::Device::getDefault().maxWorkGroupSize(); - int ddepth = std::max(CV_32S, depth), dtype = CV_MAKE_TYPE(ddepth, cn); + int ddepth = std::max(sum_op == OCL_OP_SUM_SQR ? CV_32F : CV_32S, depth), + dtype = CV_MAKE_TYPE(ddepth, cn); int wgs2_aligned = 1; while (wgs2_aligned < (int)wgs) @@ -501,7 +502,7 @@ static bool ocl_sum( InputArray _src, Scalar & res, int sum_op ) dbsize, ocl::KernelArg::PtrWriteOnly(db)); size_t globalsize = dbsize * wgs; - if (k.run(1, &globalsize, &wgs, true)) + if (k.run(1, &globalsize, &wgs, false)) { typedef Scalar (*part_sum)(Mat m); part_sum funcs[3] = { ocl_part_sum, ocl_part_sum, ocl_part_sum }, @@ -1927,8 +1928,9 @@ static bool ocl_norm( InputArray _src, int normType, double & result ) Scalar s; bool unstype = depth == CV_8U || depth == CV_16U; - ocl_sum(src.reshape(1), s, normType == NORM_L2 ? - OCL_OP_SUM_SQR : (unstype ? OCL_OP_SUM : OCL_OP_SUM_ABS) ); + if ( !ocl_sum(src.reshape(1), s, normType == NORM_L2 ? + OCL_OP_SUM_SQR : (unstype ? OCL_OP_SUM : OCL_OP_SUM_ABS)) ) + return false; result = normType == NORM_L1 ? s[0] : std::sqrt(s[0]); } diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp index 03d8422182..607e906bd2 100644 --- a/modules/core/test/ocl/test_arithm.cpp +++ b/modules/core/test/ocl/test_arithm.cpp @@ -1355,7 +1355,7 @@ OCL_TEST_P(ScaleAdd, Mat) OCL_OFF(cv::scaleAdd(src1_roi, val[0], src2_roi, dst1_roi)); OCL_ON(cv::scaleAdd(usrc1_roi, val[0], usrc2_roi, udst1_roi)); - Near(depth <= CV_32S ? 1 : 1e-6); + Near(depth <= CV_32S ? 1 : 1e-3); } }