mirror of
https://github.com/opencv/opencv.git
synced 2025-01-19 06:53:50 +08:00
Merge pull request #10754 from dkurt:dnn_ocl_gemv_min_globalsize
This commit is contained in:
commit
9d25bd583f
@ -451,23 +451,27 @@ bool ocl4dnnGEMV<float>(const CBLAS_TRANSPOSE TransA,
|
|||||||
|
|
||||||
uint row_size = M;
|
uint row_size = M;
|
||||||
uint col_size = N;
|
uint col_size = N;
|
||||||
size_t localsize[] = { 128 };
|
|
||||||
size_t globalsize[] = { row_size / 4 * localsize[0] };
|
|
||||||
|
|
||||||
uint argId = 0;
|
if (row_size >= 4)
|
||||||
k.set(argId++, ocl::KernelArg::PtrReadOnly(A));
|
{
|
||||||
k.set(argId++, offA);
|
size_t localsize[] = { 128 };
|
||||||
k.set(argId++, cl_uint(col_size));
|
size_t globalsize[] = { row_size / 4 * localsize[0] };
|
||||||
k.set(argId++, cl_uint(col_size%4));
|
|
||||||
k.set(argId++, ocl::KernelArg::PtrReadOnly(x));
|
|
||||||
k.set(argId++, offx);
|
|
||||||
k.set(argId++, alpha);
|
|
||||||
k.set(argId++, beta);
|
|
||||||
k.set(argId++, ocl::KernelArg::PtrWriteOnly(y));
|
|
||||||
k.set(argId++, offy);
|
|
||||||
k.set(argId++, NULL, localsize[0] * sizeof(cl_float4));
|
|
||||||
|
|
||||||
ret = k.run(1, globalsize, localsize, false);
|
uint argId = 0;
|
||||||
|
k.set(argId++, ocl::KernelArg::PtrReadOnly(A));
|
||||||
|
k.set(argId++, offA);
|
||||||
|
k.set(argId++, cl_uint(col_size));
|
||||||
|
k.set(argId++, cl_uint(col_size%4));
|
||||||
|
k.set(argId++, ocl::KernelArg::PtrReadOnly(x));
|
||||||
|
k.set(argId++, offx);
|
||||||
|
k.set(argId++, alpha);
|
||||||
|
k.set(argId++, beta);
|
||||||
|
k.set(argId++, ocl::KernelArg::PtrWriteOnly(y));
|
||||||
|
k.set(argId++, offy);
|
||||||
|
k.set(argId++, NULL, localsize[0] * sizeof(cl_float4));
|
||||||
|
|
||||||
|
ret = k.run(1, globalsize, localsize, false);
|
||||||
|
}
|
||||||
|
|
||||||
if ((row_size % 4) != 0 && ret)
|
if ((row_size % 4) != 0 && ret)
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user