mirror of
https://github.com/opencv/opencv.git
synced 2025-08-05 22:19:14 +08:00
cleanup ocl4dnn spatial convolution kernels
remove unused macros and half definition macros, also remove unused ocl::Queue Signed-off-by: Li Peng <peng.li@intel.com>
This commit is contained in:
parent
55260a8d3c
commit
04edc8fe3a
@ -65,8 +65,6 @@ ocl::Image2D ocl4dnnGEMMCopyBufferToImage(UMat buffer, int offset,
|
||||
int padded_width, int height,
|
||||
int width, int ld)
|
||||
{
|
||||
ocl::Context ctx = ocl::Context::getDefault();
|
||||
ocl::Queue queue = ocl::Queue::getDefault();
|
||||
ocl::Image2D image;
|
||||
|
||||
if (!is_matrix_a && transpose)
|
||||
@ -192,9 +190,6 @@ static bool ocl4dnnFastImageGEMM(const CBLAS_TRANSPOSE TransA,
|
||||
// just padding one line is enough as the sub group block read
|
||||
// will clamp to edge according to the spec.
|
||||
|
||||
ocl::Context ctx = ocl::Context::getDefault();
|
||||
ocl::Queue queue = ocl::Queue::getDefault();
|
||||
|
||||
ocl::Image2D ImA;
|
||||
ocl::Image2D ImB;
|
||||
|
||||
@ -446,7 +441,6 @@ bool ocl4dnnGEMV<float>(const CBLAS_TRANSPOSE TransA,
|
||||
const int32_t offx, const float beta, UMat y,
|
||||
const int32_t offy)
|
||||
{
|
||||
ocl::Queue queue = ocl::Queue::getDefault();
|
||||
bool ret = false;
|
||||
|
||||
if (TransA == CblasNoTrans)
|
||||
@ -507,8 +501,6 @@ bool ocl4dnnAXPY(const int32_t N, const Dtype alpha,
|
||||
const UMat X, const int32_t offX, UMat Y,
|
||||
const int32_t offY)
|
||||
{
|
||||
ocl::Context ctx = ocl::Context::getDefault();
|
||||
|
||||
ocl::Kernel oclk_axpy(CL_KERNEL_SELECT("axpy"), cv::ocl::dnn::math_oclsrc);
|
||||
if (oclk_axpy.empty())
|
||||
return false;
|
||||
|
@ -184,8 +184,6 @@ void OCL4DNNConvSpatial<Dtype>::collectCommonInformation()
|
||||
addDef("as_Dtype2", "as_float2");
|
||||
addDef("as_Dtype4", "as_float4");
|
||||
addDef("as_Dtype8", "as_float8");
|
||||
addDef("Dtype_ID", (int)CV_32F);
|
||||
addDef("Dtype_SIZE", (int)sizeof(Dtype));
|
||||
}
|
||||
|
||||
typedef enum {
|
||||
|
@ -92,7 +92,6 @@ bool OCL4DNNLRN<Dtype>::Forward(const UMat& bottom, UMat& top)
|
||||
template<typename Dtype>
|
||||
bool OCL4DNNLRN<Dtype>::crossChannelForward(const UMat& bottom, UMat& top)
|
||||
{
|
||||
ocl::Queue queue = ocl::Queue::getDefault();
|
||||
CHECK_EQ(phase_test_, true) << "Only support forward inference.";
|
||||
|
||||
cl_uint argIdx = 0;
|
||||
|
@ -97,7 +97,6 @@ bool OCL4DNNPool<Dtype>::Forward(const UMat& bottom,
|
||||
UMat& top_mask)
|
||||
{
|
||||
bool ret = true;
|
||||
ocl::Queue queue = ocl::Queue::getDefault();
|
||||
size_t global[] = { 128 * 128 };
|
||||
size_t local[] = { 128 };
|
||||
cl_uint argIdx = 0;
|
||||
|
@ -83,7 +83,6 @@ template<typename Dtype>
|
||||
bool OCL4DNNSoftmax<Dtype>::Forward(const UMat& bottom, UMat& top)
|
||||
{
|
||||
bool ret = false;
|
||||
ocl::Queue queue = ocl::Queue::getDefault();
|
||||
bool intel_subgroup = ocl::Device::getDefault().intelSubgroupsSupport();
|
||||
if (intel_subgroup && inner_num_ < 128)
|
||||
{
|
||||
|
@ -82,7 +82,6 @@
|
||||
#define LOOP(N, VAR, STMT) CAT(LOOP, N)((VAR), (STMT))
|
||||
|
||||
#if defined(convolve_simd) || defined(Conv_Interleaved)
|
||||
#if Dtype_SIZE == 4
|
||||
#define INT_TYPE uint
|
||||
#define INT_TYPE2 uint2
|
||||
#define INT_TYPE4 uint4
|
||||
@ -91,9 +90,6 @@
|
||||
#define SUB_GROUP_BLOCK_READ4 intel_sub_group_block_read4
|
||||
#define SUB_GROUP_BLOCK_READ8 intel_sub_group_block_read8
|
||||
#define SUB_GROUP_BLOCK_READ intel_sub_group_block_read
|
||||
#else
|
||||
#error "Unsupported type"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KERNEL_BASIC
|
||||
@ -176,11 +172,7 @@ __kernel void ConvolveBasic(
|
||||
|
||||
#elif defined KERNEL_IDLF
|
||||
|
||||
#if TYPE == TYPE_HALF
|
||||
#define VLOAD4(_v, _p) do { (_v).s0 = *(_p); (_v).s1 = *(_p + 1); (_v).s2 = *(_p + 2); (_v).s3 = *(_p + 3); } while(0)
|
||||
#else
|
||||
#define VLOAD4(_v, _p) do { _v = vload4(0, _p); } while(0)
|
||||
#endif
|
||||
|
||||
// Each work-item computes a OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT region of one output map.
|
||||
// Each work-group (which will be mapped to 1 SIMD16/SIMD8 EU thread) will compute 16/8 different feature maps, but each feature map is for the same region of the imput image.
|
||||
|
Loading…
Reference in New Issue
Block a user