mirror of
https://github.com/opencv/opencv.git
synced 2024-11-28 13:10:12 +08:00
Fix an issue with Kernel object reset release when consecutive Kernel::run calls
Kernel::run launch OCL gpu kernels and set a event callback function to decreate the ref count of UMat or remove UMat when the lauched workloads are completed. However, for some OCL kernels requires multiple call of Kernel::run function with some kernel parameter changes (e.g., input and output buffer offset) to get the final computation result. In the case, the current implementation requires unnecessary synchronization and cleanupMat. This fix requires the user to specify whether there will be more work or not. If there is no remaining computation, the Kernel::run will reset the kernel object Signed-off-by: Woo, Insoo <insoo.woo@intel.com>
This commit is contained in:
parent
12569dc730
commit
cc7f9f5469
@ -578,9 +578,17 @@ public:
|
||||
@param localsize work-group size for each dimension.
|
||||
@param sync specify whether to wait for OpenCL computation to finish before return.
|
||||
@param q command queue
|
||||
@param moreWorkDone specify whether there will the remaining work to be computed (more Kernel::run calls).
|
||||
When a computation requires multiple kernel execution by changing input and output buffer offset to get
|
||||
the final computation results.
|
||||
kernel.setArg(0, ..);
|
||||
kernel.setArg(1, offset);
|
||||
kernel.run(..., q, true);
|
||||
kernel.setArg(1, offset+256);
|
||||
kernel.run(..., q, false);
|
||||
*/
|
||||
bool run(int dims, size_t globalsize[],
|
||||
size_t localsize[], bool sync, const Queue& q=Queue());
|
||||
size_t localsize[], bool sync, const Queue& q=Queue(), bool moreWorkDone = false);
|
||||
bool runTask(bool sync, const Queue& q=Queue());
|
||||
|
||||
size_t workGroupSize() const;
|
||||
|
@ -3185,7 +3185,7 @@ struct Kernel::Impl
|
||||
|
||||
void cleanupUMats()
|
||||
{
|
||||
for( int i = 0; i < MAX_ARRS; i++ )
|
||||
for( int i = 0; i < nu; i++ )
|
||||
if( u[i] )
|
||||
{
|
||||
if( CV_XADD(&u[i]->urefcount, -1) == 1 )
|
||||
@ -3446,9 +3446,8 @@ int Kernel::set(int i, const KernelArg& arg)
|
||||
return i+1;
|
||||
}
|
||||
|
||||
|
||||
bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
|
||||
bool sync, const Queue& q)
|
||||
bool sync, const Queue& q, bool moreWorkDone)
|
||||
{
|
||||
CV_INSTRUMENT_REGION_OPENCL_RUN(p->name.c_str());
|
||||
|
||||
@ -3469,11 +3468,11 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
|
||||
}
|
||||
if( total == 0 )
|
||||
return true;
|
||||
if( p->haveTempDstUMats )
|
||||
if( p->haveTempDstUMats && !moreWorkDone)
|
||||
sync = true;
|
||||
cl_int retval = clEnqueueNDRangeKernel(qq, p->handle, (cl_uint)dims,
|
||||
offset, globalsize, _localsize, 0, 0,
|
||||
sync ? 0 : &p->e);
|
||||
sync ? 0 : (moreWorkDone? 0: &p->e ));
|
||||
#if CV_OPENCL_SHOW_RUN_ERRORS
|
||||
if (retval != CL_SUCCESS)
|
||||
{
|
||||
@ -3484,9 +3483,9 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
|
||||
if( sync || retval != CL_SUCCESS )
|
||||
{
|
||||
CV_OclDbgAssert(clFinish(qq) == CL_SUCCESS);
|
||||
p->cleanupUMats();
|
||||
if (!moreWorkDone) p->cleanupUMats();
|
||||
}
|
||||
else
|
||||
else if (!moreWorkDone)
|
||||
{
|
||||
p->addref();
|
||||
CV_OclDbgAssert(clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p) == CL_SUCCESS);
|
||||
|
Loading…
Reference in New Issue
Block a user