mirror of
https://github.com/opencv/opencv.git
synced 2025-06-16 14:50:52 +08:00
dnn(ocl): fix conv DWCONV workgroup
This commit is contained in:
parent
4d587c341b
commit
f977d10a19
@ -1034,12 +1034,15 @@ bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top,
|
|||||||
kernel.set(argIdx++, (uint16_t)output_w_);
|
kernel.set(argIdx++, (uint16_t)output_w_);
|
||||||
kernel.set(argIdx++, (uint16_t)output_h_);
|
kernel.set(argIdx++, (uint16_t)output_h_);
|
||||||
|
|
||||||
size_t global_size[3];
|
size_t wgs = kernel.workGroupSize();
|
||||||
global_size[0] = output_w_;
|
if (!wgs)
|
||||||
global_size[1] = output_h_;
|
{
|
||||||
global_size[2] = num_output_ * num_;
|
CV_LOG_ERROR(NULL, "DNN/OpenCL: Can't query workGroupSize of DWCONV kernel");
|
||||||
|
return false;
|
||||||
if (!kernel.run_(3, global_size, NULL, false))
|
}
|
||||||
|
size_t lws[1] = { wgs };
|
||||||
|
size_t gws[1] = { roundUp((size_t)output_w_ * output_h_ * num_output_ * num_, (unsigned)lws[0]) };
|
||||||
|
if (!kernel.run_(1, gws, lws, false))
|
||||||
{
|
{
|
||||||
CV_LOG_ERROR(NULL, "DNN/OpenCL: DWCONV kernel run failed");
|
CV_LOG_ERROR(NULL, "DNN/OpenCL: DWCONV kernel run failed");
|
||||||
return false;
|
return false;
|
||||||
|
@ -1850,10 +1850,13 @@ __kernel void DWCONV(
|
|||||||
const ushort output_width,
|
const ushort output_width,
|
||||||
const ushort output_height) {
|
const ushort output_height) {
|
||||||
__global Dtype* convolved_image = convolved_image_base + convolved_image_offset;
|
__global Dtype* convolved_image = convolved_image_base + convolved_image_offset;
|
||||||
const int outputX = get_global_id(0);
|
const int out_idx = get_global_id(0); // 1D task layout: [output_width * output_height * OUTPUT_Z]
|
||||||
const int outputY = get_global_id(1);
|
const int plane_size = output_width * output_height;
|
||||||
const int outputZ = get_global_id(2);
|
const int out_plane_idx = out_idx % plane_size;
|
||||||
if(outputX < output_width && outputY < output_height)
|
const int outputZ = out_idx / plane_size;
|
||||||
|
const int outputY = out_plane_idx / output_width;
|
||||||
|
const int outputX = out_plane_idx % output_width;
|
||||||
|
if (outputZ < OUTPUT_Z)
|
||||||
{
|
{
|
||||||
Dtype sum = 0.;
|
Dtype sum = 0.;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user