Fix an issue with Kernel object reset release when consecutive Kernel::run calls

Kernel::run launch OCL gpu kernels and set a event callback function
to decreate the ref count of UMat or remove UMat when the lauched workloads
are completed. However, for some OCL kernels requires multiple call of
Kernel::run function with some kernel parameter changes (e.g., input
and output buffer offset) to get the final computation result.
In the case, the current implementation requires unnecessary
synchronization and cleanupMat.

This fix requires the user to specify whether there will be more work or not.
If there is no remaining computation, the Kernel::run will reset the
kernel object

Signed-off-by: Woo, Insoo <insoo.woo@intel.com>
This commit is contained in:
Woo, Insoo 2017-01-25 15:28:32 -08:00
parent 12569dc730
commit cc7f9f5469
2 changed files with 15 additions and 8 deletions

View File

@ -578,9 +578,17 @@ public:
@param localsize work-group size for each dimension.
@param sync specify whether to wait for OpenCL computation to finish before return.
@param q command queue
@param moreWorkDone specify whether there will the remaining work to be computed (more Kernel::run calls).
When a computation requires multiple kernel execution by changing input and output buffer offset to get
the final computation results.
kernel.setArg(0, ..);
kernel.setArg(1, offset);
kernel.run(..., q, true);
kernel.setArg(1, offset+256);
kernel.run(..., q, false);
*/
bool run(int dims, size_t globalsize[],
size_t localsize[], bool sync, const Queue& q=Queue());
size_t localsize[], bool sync, const Queue& q=Queue(), bool moreWorkDone = false);
bool runTask(bool sync, const Queue& q=Queue());
size_t workGroupSize() const;

View File

@ -3185,7 +3185,7 @@ struct Kernel::Impl
void cleanupUMats()
{
for( int i = 0; i < MAX_ARRS; i++ )
for( int i = 0; i < nu; i++ )
if( u[i] )
{
if( CV_XADD(&u[i]->urefcount, -1) == 1 )
@ -3446,9 +3446,8 @@ int Kernel::set(int i, const KernelArg& arg)
return i+1;
}
bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
bool sync, const Queue& q)
bool sync, const Queue& q, bool moreWorkDone)
{
CV_INSTRUMENT_REGION_OPENCL_RUN(p->name.c_str());
@ -3469,11 +3468,11 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
}
if( total == 0 )
return true;
if( p->haveTempDstUMats )
if( p->haveTempDstUMats && !moreWorkDone)
sync = true;
cl_int retval = clEnqueueNDRangeKernel(qq, p->handle, (cl_uint)dims,
offset, globalsize, _localsize, 0, 0,
sync ? 0 : &p->e);
sync ? 0 : (moreWorkDone? 0: &p->e ));
#if CV_OPENCL_SHOW_RUN_ERRORS
if (retval != CL_SUCCESS)
{
@ -3484,9 +3483,9 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
if( sync || retval != CL_SUCCESS )
{
CV_OclDbgAssert(clFinish(qq) == CL_SUCCESS);
p->cleanupUMats();
if (!moreWorkDone) p->cleanupUMats();
}
else
else if (!moreWorkDone)
{
p->addref();
CV_OclDbgAssert(clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p) == CL_SUCCESS);