Added integer and bool support to dnn OpenCL layers

This commit is contained in:
Alexander Lyulkov 2024-08-20 12:33:08 +03:00
parent 7e8f2a1bc4
commit a69cd7d6ba
10 changed files with 47 additions and 40 deletions

View File

@ -52,6 +52,7 @@
#ifdef HAVE_OPENCL
#include "opencl_kernels_dnn.hpp"
#include "../ocl4dnn/include/common.hpp"
#endif
#ifdef HAVE_CUDA
@ -235,8 +236,6 @@ public:
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
bool use_half = (inps.depth() == CV_16F);
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
@ -250,8 +249,9 @@ public:
int num_concats = total(shape(inputs[0]), 0, cAxis);
int offset_concat_axis = 0;
UMat& outMat = outputs[0];
String buildopt = format(" -DDtype=%s", (use_half) ? "half" : "float");
String kname = format("concat_%s", use_half ? "half" : "float");
String matType = matTypeToOclType(inputs[0].type());
String buildopt = " -DDtype=" + matType;
String kname = "concat_" + matType;
for (size_t i = 0; i < inputs.size(); i++)
{
@ -287,8 +287,7 @@ public:
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
(inputs_arr.depth() == CV_32F || inputs_arr.depth() == CV_16F),
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
std::vector<Mat> inputs, outputs;

View File

@ -337,11 +337,13 @@ public:
mnew_stride.copyTo(unew_stride);
}
bool use_half = (inps.depth() == CV_16F);
String opts = format("-DDtype=%s", use_half ? "half" : "float");
for (size_t i = 0; i < inputs.size(); i++)
{
ocl::Kernel kernel("permute", ocl::dnn::permute_oclsrc, opts);
String matType = matTypeToOclType(inputs[0].type());
String opts = " -DDtype=" + matType;
String kname = "permute_" + matType;
ocl::Kernel kernel(kname.c_str(), ocl::dnn::permute_oclsrc, opts);
kernel.set(0, (int)_count);
kernel.set(1, ocl::KernelArg::PtrReadOnly(inputs[i]));
@ -364,9 +366,7 @@ public:
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
inputs_arr.depth() != CV_8S && inputs_arr.depth() != CV_8U &&
inputs_arr.depth() != CV_Bool && inputs_arr.depth() != CV_64S,
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
std::vector<Mat> inputs, outputs;

View File

@ -315,25 +315,11 @@ public:
CV_Assert_N(inputs.size() == 1, !outputs.empty(), !computeMaxIdx || outputs.size() == 2);
UMat& inpMat = inputs[0];
UMat& outMat = outputs[0];
UMat maskMat;
if (computeMaxIdx)
maskMat.create(shape(outputs[1]), use_half ? CV_16F : CV_32F);
UMat maskMat = computeMaxIdx ? outputs[1] : UMat();
CV_Assert(inpMat.offset == 0 && outMat.offset == 0);
bool result = poolOp->Forward(inpMat, outMat, maskMat);
if (computeMaxIdx) {
if (use_half) {
UMat maskMat32F;
maskMat.convertTo(maskMat32F, CV_32F);
maskMat32F.convertTo(outputs[1], CV_64S);
}
else
maskMat.convertTo(outputs[1], CV_64S);
}
return result;
return poolOp->Forward(inpMat, outMat, maskMat);
}
#endif

View File

@ -195,7 +195,7 @@ public:
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && inputs_arr.depth() != CV_32S && inputs_arr.depth() != CV_64S,
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16F)

View File

@ -331,7 +331,7 @@ public:
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && inputs_arr.depth() != CV_32S && inputs_arr.depth() != CV_64S,
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
std::vector<Mat> inputs, outputs;

View File

@ -621,8 +621,7 @@ public:
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
CV_OCL_RUN((IS_DNN_OPENCL_TARGET(preferableTarget) &&
(outputs[0].type() != CV_32S && outputs[0].type() != CV_64S)),
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
const Mat& inpMat = inputs[0];

View File

@ -55,4 +55,6 @@
bool clOptionSupport(cv::String option);
cv::String matTypeToOclType(int cvMatType);
#endif

View File

@ -52,3 +52,21 @@ bool clOptionSupport(cv::String option)
ocl::Program program = ocl::Context::getDefault().getProg(ocl::dnn::dummy_oclsrc, option, errmsg);
return program.ptr() ? true : false;
}
cv::String matTypeToOclType(int cvMatType)
{
cv::String oclType;
switch(cvMatType)
{
case CV_16F: oclType = "half"; break;
case CV_32F: oclType = "float"; break;
case CV_Bool: oclType = "bool"; break;
case CV_8U: oclType = "uchar"; break;
case CV_8S: oclType = "char"; break;
case CV_32S: oclType = "int"; break;
case CV_64S: oclType = "long"; break;
default:
CV_Error(Error::StsBadArg, "Unsupported mat type");
}
return oclType;
}

View File

@ -61,7 +61,7 @@ __kernel void
const int pooled_height, const int pooled_width,
__global Dtype* top_data
#ifdef HAVE_MASK
, __global Dtype* mask
, __global long* mask
#endif
)
{

View File

@ -44,13 +44,16 @@
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#endif
__kernel void permute(const int nthreads,
__global Dtype* bottom_data,
global int* permute_order,
global int* oldStride,
global int* newStride,
const int num_axes,
__global Dtype* top_data)
#define CONCAT(A,B) A##_##B
#define TEMPLATE(name,type) CONCAT(name,type)
__kernel void TEMPLATE(permute, Dtype)(const int nthreads,
__global Dtype* bottom_data,
global int* permute_order,
global int* oldStride,
global int* newStride,
const int num_axes,
__global Dtype* top_data)
{
for (int i = get_global_id(0); i < nthreads; i += get_global_size(0))
{