From 1c9f956b88f7dca4ed21bbf9b2db4515d101019f Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Thu, 4 Dec 2014 14:35:54 +0800 Subject: [PATCH] Remove unnecesary operations and calculations in loop body. Signed-off-by: Yan Wang --- .../src/opencl/brute_force_match.cl | 42 ++++++++++++------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/modules/features2d/src/opencl/brute_force_match.cl b/modules/features2d/src/opencl/brute_force_match.cl index 7805e4767b..8f0e183799 100644 --- a/modules/features2d/src/opencl/brute_force_match.cl +++ b/modules/features2d/src/opencl/brute_force_match.cl @@ -210,6 +210,8 @@ __kernel void BruteForceMatch_Match( } #else __local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE_ODD * BLOCK_SIZE; + const int s_query_i = mad24(BLOCK_SIZE_ODD, lidy, lidx); + const int s_train_i = mad24(BLOCK_SIZE_ODD, lidx, lidy); #endif float myBestDistance = MAX_FLOAT; @@ -242,13 +244,15 @@ __kernel void BruteForceMatch_Match( { const int loadx = mad24(i, BLOCK_SIZE, lidx); //load query and train into local memory - s_query[mad24(BLOCK_SIZE_ODD, lidy, lidx)] = 0; - s_train[mad24(BLOCK_SIZE_ODD, lidx, lidy)] = 0; - if (loadx < query_cols) { - s_query[mad24(BLOCK_SIZE_ODD, lidy, lidx)] = query_vec[loadx]; - s_train[mad24(BLOCK_SIZE_ODD, lidx, lidy)] = train_vec[loadx]; + s_query[s_query_i] = query_vec[loadx]; + s_train[s_train_i] = train_vec[loadx]; + } + else + { + s_query[s_query_i] = 0; + s_train[s_train_i] = 0; } barrier(CLK_LOCAL_MEM_FENCE); @@ -337,18 +341,22 @@ __kernel void BruteForceMatch_RadiusMatch( __local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE_ODD * BLOCK_SIZE; result_type result = 0; + const int s_query_i = mad24(BLOCK_SIZE_ODD, lidy, lidx); + const int s_train_i = mad24(BLOCK_SIZE_ODD, lidx, lidy); for (int i = 0 ; i < (query_cols + BLOCK_SIZE - 1) / BLOCK_SIZE ; ++i) { //load a BLOCK_SIZE * BLOCK_SIZE block into local train. const int loadx = mad24(BLOCK_SIZE, i, lidx); - s_query[mad24(BLOCK_SIZE_ODD, lidy, lidx)] = 0; - s_train[mad24(BLOCK_SIZE_ODD, lidx, lidy)] = 0; - if (loadx < query_cols) { - s_query[mad24(BLOCK_SIZE_ODD, lidy, lidx)] = query_vec[loadx]; - s_train[mad24(BLOCK_SIZE_ODD, lidx, lidy)] = train_vec[loadx]; + s_query[s_query_i] = query_vec[loadx]; + s_train[s_train_i] = train_vec[loadx]; + } + else + { + s_query[s_query_i] = 0; + s_train[s_train_i] = 0; } //synchronize to make sure each elem for reduceIteration in share memory is written already. @@ -405,6 +413,8 @@ __kernel void BruteForceMatch_knnMatch( } #else __local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE_ODD * BLOCK_SIZE; + const int s_query_i = mad24(BLOCK_SIZE_ODD, lidy, lidx); + const int s_train_i = mad24(BLOCK_SIZE_ODD, lidx, lidy); #endif float myBestDistance1 = MAX_FLOAT; @@ -438,13 +448,15 @@ __kernel void BruteForceMatch_knnMatch( { const int loadx = mad24(BLOCK_SIZE, i, lidx); //load query and train into local memory - s_query[mad24(BLOCK_SIZE_ODD, lidy, lidx)] = 0; - s_train[mad24(BLOCK_SIZE_ODD, lidx, lidy)] = 0; - if (loadx < query_cols) { - s_query[mad24(BLOCK_SIZE_ODD, lidy, lidx)] = query_vec[loadx]; - s_train[mad24(BLOCK_SIZE_ODD, lidx, lidy)] = train_vec[loadx]; + s_query[s_query_i] = query_vec[loadx]; + s_train[s_train_i] = train_vec[loadx]; + } + else + { + s_query[s_query_i] = 0; + s_train[s_train_i] = 0; } barrier(CLK_LOCAL_MEM_FENCE);