mirror of
https://github.com/opencv/opencv.git
synced 2025-07-25 22:57:53 +08:00
Fix API compatibility error
This patch fixes a OCV API compatibility error. The error was reported due to the interface changes of Kernel::run. To resolve the issue, An overloaded function of Kernel::run is added. It take a flag indicating whether there are more work to be done with the kernel object without releasing resources related to it. Signed-off-by: Woo, Insoo <insoo.woo@intel.com>
This commit is contained in:
parent
8f5b66ff45
commit
2ef427db91
@ -580,6 +580,20 @@ public:
|
|||||||
@param localsize work-group size for each dimension.
|
@param localsize work-group size for each dimension.
|
||||||
@param sync specify whether to wait for OpenCL computation to finish before return.
|
@param sync specify whether to wait for OpenCL computation to finish before return.
|
||||||
@param q command queue
|
@param q command queue
|
||||||
|
*/
|
||||||
|
bool run(int dims, size_t globalsize[],
|
||||||
|
size_t localsize[], bool sync, const Queue& q=Queue());
|
||||||
|
/**
|
||||||
|
Run the OpenCL kernel.
|
||||||
|
@param dims the work problem dimensions. It is the length of globalsize and localsize. It can be either 1, 2 or 3.
|
||||||
|
@param globalsize work items for each dimension.
|
||||||
|
It is not the final globalsize passed to OpenCL.
|
||||||
|
Each dimension will be adjusted to the nearest integer divisible by the corresponding value in localsize.
|
||||||
|
If localsize is NULL, it will still be adjusted depending on dims.
|
||||||
|
The adjusted values are greater than or equal to the original values.
|
||||||
|
@param localsize work-group size for each dimension.
|
||||||
|
@param sync specify whether to wait for OpenCL computation to finish before return.
|
||||||
|
@param q command queue
|
||||||
@param moreWorkDone specify whether there will the remaining work to be computed (more Kernel::run calls).
|
@param moreWorkDone specify whether there will the remaining work to be computed (more Kernel::run calls).
|
||||||
When a computation requires multiple kernel execution by changing input and output buffer offset to get
|
When a computation requires multiple kernel execution by changing input and output buffer offset to get
|
||||||
the final computation results.
|
the final computation results.
|
||||||
@ -590,7 +604,8 @@ public:
|
|||||||
kernel.run(..., q, false);
|
kernel.run(..., q, false);
|
||||||
*/
|
*/
|
||||||
bool run(int dims, size_t globalsize[],
|
bool run(int dims, size_t globalsize[],
|
||||||
size_t localsize[], bool sync, const Queue& q=Queue(), bool moreWorkDone = false);
|
size_t localsize[], bool sync, bool moreWorkDone, const Queue& q);
|
||||||
|
|
||||||
bool runTask(bool sync, const Queue& q=Queue());
|
bool runTask(bool sync, const Queue& q=Queue());
|
||||||
|
|
||||||
size_t workGroupSize() const;
|
size_t workGroupSize() const;
|
||||||
|
@ -33,6 +33,8 @@
|
|||||||
namespace cv
|
namespace cv
|
||||||
{
|
{
|
||||||
|
|
||||||
|
bool intel_gpu_gemm( UMat A, Size sizeA, UMat B, Size sizeB, UMat D, Size sizeD, double alpha, double beta, bool atrans, bool btrans);
|
||||||
|
|
||||||
bool intel_gpu_gemm(
|
bool intel_gpu_gemm(
|
||||||
UMat A, Size sizeA,
|
UMat A, Size sizeA,
|
||||||
UMat B, Size sizeB,
|
UMat B, Size sizeB,
|
||||||
@ -40,7 +42,7 @@ bool intel_gpu_gemm(
|
|||||||
double alpha, double beta,
|
double alpha, double beta,
|
||||||
bool atrans, bool btrans)
|
bool atrans, bool btrans)
|
||||||
{
|
{
|
||||||
sizeA; sizeB;
|
CV_UNUSED(sizeA); CV_UNUSED(sizeB);
|
||||||
|
|
||||||
int M = sizeD.height, N = sizeD.width, K = ((atrans)? sizeA.height : sizeA.width);
|
int M = sizeD.height, N = sizeD.width, K = ((atrans)? sizeA.height : sizeA.width);
|
||||||
|
|
||||||
@ -105,7 +107,7 @@ bool intel_gpu_gemm(
|
|||||||
ocl::Queue q;
|
ocl::Queue q;
|
||||||
if(!atrans && btrans)
|
if(!atrans && btrans)
|
||||||
{
|
{
|
||||||
ret = k.run(2, global, local, false, q, false);
|
ret = k.run(2, global, local, false, false, q);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -114,12 +116,12 @@ bool intel_gpu_gemm(
|
|||||||
k.set(14, &start_index, sizeof(start_index));
|
k.set(14, &start_index, sizeof(start_index));
|
||||||
if ((start_index + stride) < K)
|
if ((start_index + stride) < K)
|
||||||
{
|
{
|
||||||
ret = k.run(2, global, local, false, q, true);
|
ret = k.run(2, global, local, false, true, q);
|
||||||
if (!ret) return ret;
|
if (!ret) return ret;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ret = k.run(2, global, local, false, q, false);
|
ret = k.run(2, global, local, false, false, q);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3465,7 +3465,7 @@ int Kernel::set(int i, const KernelArg& arg)
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
|
bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
|
||||||
bool sync, const Queue& q, bool moreWorkDone)
|
bool sync, bool moreWorkDone, const Queue& q)
|
||||||
{
|
{
|
||||||
CV_INSTRUMENT_REGION_OPENCL_RUN(p->name.c_str());
|
CV_INSTRUMENT_REGION_OPENCL_RUN(p->name.c_str());
|
||||||
|
|
||||||
@ -3511,6 +3511,12 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
|
|||||||
return retval == CL_SUCCESS;
|
return retval == CL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
|
||||||
|
bool sync, const Queue& q)
|
||||||
|
{
|
||||||
|
return run(dims, _globalsize, _localsize, sync, false, q);
|
||||||
|
}
|
||||||
|
|
||||||
bool Kernel::runTask(bool sync, const Queue& q)
|
bool Kernel::runTask(bool sync, const Queue& q)
|
||||||
{
|
{
|
||||||
if(!p || !p->handle || p->e != 0)
|
if(!p || !p->handle || p->e != 0)
|
||||||
|
Loading…
Reference in New Issue
Block a user