mirror of
https://github.com/opencv/opencv.git
synced 2025-06-12 12:22:51 +08:00
ocl: avoid rescheduling of async kernels
This commit is contained in:
parent
3e3787ecb6
commit
4fa82809df
@ -2755,7 +2755,7 @@ KernelArg KernelArg::Constant(const Mat& m)
|
|||||||
struct Kernel::Impl
|
struct Kernel::Impl
|
||||||
{
|
{
|
||||||
Impl(const char* kname, const Program& prog) :
|
Impl(const char* kname, const Program& prog) :
|
||||||
refcount(1), handle(NULL), isInProgress(false), nu(0)
|
refcount(1), handle(NULL), isInProgress(false), isAsyncRun(false), nu(0)
|
||||||
{
|
{
|
||||||
cl_program ph = (cl_program)prog.ptr();
|
cl_program ph = (cl_program)prog.ptr();
|
||||||
cl_int retval = 0;
|
cl_int retval = 0;
|
||||||
@ -2832,6 +2832,7 @@ struct Kernel::Impl
|
|||||||
enum { MAX_ARRS = 16 };
|
enum { MAX_ARRS = 16 };
|
||||||
UMatData* u[MAX_ARRS];
|
UMatData* u[MAX_ARRS];
|
||||||
bool isInProgress;
|
bool isInProgress;
|
||||||
|
bool isAsyncRun; // true if kernel was scheduled in async mode
|
||||||
int nu;
|
int nu;
|
||||||
std::list<Image2D> images;
|
std::list<Image2D> images;
|
||||||
bool haveTempDstUMats;
|
bool haveTempDstUMats;
|
||||||
@ -3111,13 +3112,45 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static bool isRaiseErrorOnReuseAsyncKernel()
|
||||||
|
{
|
||||||
|
static bool initialized = false;
|
||||||
|
static bool value = false;
|
||||||
|
if (!initialized)
|
||||||
|
{
|
||||||
|
value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_RAISE_ERROR_REUSE_ASYNC_KERNEL", false);
|
||||||
|
initialized = true;
|
||||||
|
}
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
bool Kernel::Impl::run(int dims, size_t globalsize[], size_t localsize[],
|
bool Kernel::Impl::run(int dims, size_t globalsize[], size_t localsize[],
|
||||||
bool sync, int64* timeNS, const Queue& q)
|
bool sync, int64* timeNS, const Queue& q)
|
||||||
{
|
{
|
||||||
CV_INSTRUMENT_REGION_OPENCL_RUN(name.c_str());
|
CV_INSTRUMENT_REGION_OPENCL_RUN(name.c_str());
|
||||||
|
|
||||||
if (!handle || isInProgress)
|
if (!handle)
|
||||||
|
{
|
||||||
|
CV_LOG_ERROR(NULL, "OpenCL kernel has zero handle: " << name);
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isAsyncRun)
|
||||||
|
{
|
||||||
|
CV_LOG_ERROR(NULL, "OpenCL kernel can't be reused in async mode: " << name);
|
||||||
|
if (isRaiseErrorOnReuseAsyncKernel())
|
||||||
|
CV_Assert(0);
|
||||||
|
return false; // OpenCV 5.0: raise error
|
||||||
|
}
|
||||||
|
isAsyncRun = !sync;
|
||||||
|
|
||||||
|
if (isInProgress)
|
||||||
|
{
|
||||||
|
CV_LOG_ERROR(NULL, "Previous OpenCL kernel launch is not finished: " << name);
|
||||||
|
if (isRaiseErrorOnReuseAsyncKernel())
|
||||||
|
CV_Assert(0);
|
||||||
|
return false; // OpenCV 5.0: raise error
|
||||||
|
}
|
||||||
|
|
||||||
cl_command_queue qq = getQueue(q);
|
cl_command_queue qq = getQueue(q);
|
||||||
if (haveTempDstUMats)
|
if (haveTempDstUMats)
|
||||||
|
@ -46,6 +46,8 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include "opencl_kernels_dnn.hpp"
|
#include "opencl_kernels_dnn.hpp"
|
||||||
|
|
||||||
|
#include "opencv2/core/utils/logger.hpp"
|
||||||
|
|
||||||
namespace cv { namespace dnn { namespace ocl4dnn {
|
namespace cv { namespace dnn { namespace ocl4dnn {
|
||||||
|
|
||||||
enum gemm_data_type_t
|
enum gemm_data_type_t
|
||||||
@ -238,10 +240,6 @@ static bool ocl4dnnFastImageGEMM(const CBLAS_TRANSPOSE TransA,
|
|||||||
kernel_name += "_float";
|
kernel_name += "_float";
|
||||||
}
|
}
|
||||||
|
|
||||||
ocl::Kernel oclk_gemm_float(kernel_name.c_str(), ocl::dnn::gemm_image_oclsrc, opts);
|
|
||||||
if (oclk_gemm_float.empty())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
while (C_start_y < M)
|
while (C_start_y < M)
|
||||||
{
|
{
|
||||||
blockC_width = std::min(static_cast<int>(N) - C_start_x, blocksize);
|
blockC_width = std::min(static_cast<int>(N) - C_start_x, blocksize);
|
||||||
@ -348,6 +346,10 @@ static bool ocl4dnnFastImageGEMM(const CBLAS_TRANSPOSE TransA,
|
|||||||
}
|
}
|
||||||
local[1] = 1;
|
local[1] = 1;
|
||||||
|
|
||||||
|
ocl::Kernel oclk_gemm_float(kernel_name.c_str(), ocl::dnn::gemm_image_oclsrc, opts);
|
||||||
|
if (oclk_gemm_float.empty())
|
||||||
|
return false;
|
||||||
|
|
||||||
cl_uint arg_idx = 0;
|
cl_uint arg_idx = 0;
|
||||||
if (is_image_a)
|
if (is_image_a)
|
||||||
oclk_gemm_float.set(arg_idx++, ocl::KernelArg::PtrReadOnly(A));
|
oclk_gemm_float.set(arg_idx++, ocl::KernelArg::PtrReadOnly(A));
|
||||||
@ -378,7 +380,10 @@ static bool ocl4dnnFastImageGEMM(const CBLAS_TRANSPOSE TransA,
|
|||||||
oclk_gemm_float.set(arg_idx++, isFirstColBlock);
|
oclk_gemm_float.set(arg_idx++, isFirstColBlock);
|
||||||
|
|
||||||
if (!oclk_gemm_float.run(2, global, local, false))
|
if (!oclk_gemm_float.run(2, global, local, false))
|
||||||
|
{
|
||||||
|
CV_LOG_WARNING(NULL, "OpenCL kernel enqueue failed: " << kernel_name);
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
if (TransA == CblasNoTrans)
|
if (TransA == CblasNoTrans)
|
||||||
A_start_x += blockA_width;
|
A_start_x += blockA_width;
|
||||||
|
Loading…
Reference in New Issue
Block a user