From d88fdd0378c280763172652931b8d06abe3ebbb5 Mon Sep 17 00:00:00 2001 From: ElenaGvozdeva Date: Tue, 28 Oct 2014 15:18:31 +0300 Subject: [PATCH] use LOCAL_SIZE+1 --- modules/core/src/opencl/gemm.cl | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/modules/core/src/opencl/gemm.cl b/modules/core/src/opencl/gemm.cl index 0961628a49..fc050547be 100644 --- a/modules/core/src/opencl/gemm.cl +++ b/modules/core/src/opencl/gemm.cl @@ -20,6 +20,8 @@ #define IND_B mad24(x, WTSIZE, B_offset) #define STEP_B B_step / WTSIZE +#define LOCAL_SIZE_ODD (LOCAL_SIZE + 1) + #if cn==2 #if kercn==2 #define MUL(a, b)\ @@ -65,8 +67,8 @@ __kernel void gemm(__global const uchar * A_ptr, int A_step, int A_offset, MUL(A[i], B[i*STEP_B]); #else - __local T a_local[LOCAL_SIZE*LOCAL_SIZE]; - __local WT b_local[LOCAL_SIZE*LOCAL_SIZE]; + __local T a_local[LOCAL_SIZE_ODD*LOCAL_SIZE]; + __local WT b_local[LOCAL_SIZE_ODD*LOCAL_SIZE]; int reps; #if NO_MULT @@ -78,9 +80,9 @@ __kernel void gemm(__global const uchar * A_ptr, int A_step, int A_offset, for (int p = 0; p < reps; ++p) { if (p * LOCAL_SIZE + lidx < n && y < D_rows) - a_local[mad24(lidy, LOCAL_SIZE, lidx)] = A[mad24(p, LOCAL_SIZE, lidx)]; + a_local[mad24(lidy, LOCAL_SIZE_ODD, lidx)] = A[mad24(p, LOCAL_SIZE, lidx)]; if (p * LOCAL_SIZE + lidy < n && x < D_cols) - b_local[mad24(lidy, LOCAL_SIZE, lidx)] = B[mad24(p, LOCAL_SIZE, lidy)*STEP_B]; + b_local[mad24(lidy, LOCAL_SIZE_ODD, lidx)] = B[mad24(p, LOCAL_SIZE, lidy)*STEP_B]; barrier(CLK_LOCAL_MEM_FENCE); @@ -92,7 +94,7 @@ __kernel void gemm(__global const uchar * A_ptr, int A_step, int A_offset, #else for (int i = 0; i < LOCAL_SIZE; ++i) #endif - MUL(a_local[mad24(lidy, LOCAL_SIZE, i)], b_local[mad24(i, LOCAL_SIZE, lidx)]); + MUL(a_local[mad24(lidy, LOCAL_SIZE_ODD, i)], b_local[mad24(i, LOCAL_SIZE_ODD, lidx)]); } barrier(CLK_LOCAL_MEM_FENCE); }