mirror of
https://github.com/opencv/opencv.git
synced 2025-06-11 11:45:30 +08:00
dnn(ocl): fix conv DWCONV workgroup
This commit is contained in:
parent
4d587c341b
commit
f977d10a19
@ -1034,12 +1034,15 @@ bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top,
|
||||
kernel.set(argIdx++, (uint16_t)output_w_);
|
||||
kernel.set(argIdx++, (uint16_t)output_h_);
|
||||
|
||||
size_t global_size[3];
|
||||
global_size[0] = output_w_;
|
||||
global_size[1] = output_h_;
|
||||
global_size[2] = num_output_ * num_;
|
||||
|
||||
if (!kernel.run_(3, global_size, NULL, false))
|
||||
size_t wgs = kernel.workGroupSize();
|
||||
if (!wgs)
|
||||
{
|
||||
CV_LOG_ERROR(NULL, "DNN/OpenCL: Can't query workGroupSize of DWCONV kernel");
|
||||
return false;
|
||||
}
|
||||
size_t lws[1] = { wgs };
|
||||
size_t gws[1] = { roundUp((size_t)output_w_ * output_h_ * num_output_ * num_, (unsigned)lws[0]) };
|
||||
if (!kernel.run_(1, gws, lws, false))
|
||||
{
|
||||
CV_LOG_ERROR(NULL, "DNN/OpenCL: DWCONV kernel run failed");
|
||||
return false;
|
||||
|
@ -1850,10 +1850,13 @@ __kernel void DWCONV(
|
||||
const ushort output_width,
|
||||
const ushort output_height) {
|
||||
__global Dtype* convolved_image = convolved_image_base + convolved_image_offset;
|
||||
const int outputX = get_global_id(0);
|
||||
const int outputY = get_global_id(1);
|
||||
const int outputZ = get_global_id(2);
|
||||
if(outputX < output_width && outputY < output_height)
|
||||
const int out_idx = get_global_id(0); // 1D task layout: [output_width * output_height * OUTPUT_Z]
|
||||
const int plane_size = output_width * output_height;
|
||||
const int out_plane_idx = out_idx % plane_size;
|
||||
const int outputZ = out_idx / plane_size;
|
||||
const int outputY = out_plane_idx / output_width;
|
||||
const int outputX = out_plane_idx % output_width;
|
||||
if (outputZ < OUTPUT_Z)
|
||||
{
|
||||
Dtype sum = 0.;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user