mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00
Merge pull request #11927 from pengli:3.4
This commit is contained in:
commit
82c7ab0231
@ -502,15 +502,23 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
const bool kernel_width_is_odd = KERNEL_WIDTH % 2 == 1;
|
||||
|
||||
#if INPUT_PAD_W == 0 && INPUT_PAD_H == 0 && DILATION_X == 1 && DILATION_Y == 1 && INPUT_PAD_BOTTOM == 0 && INPUT_PAD_RIGHT == 0
|
||||
#if KERNEL_WIDTH == 3
|
||||
Dtype_t blockA00 = vload3(0, src0_read);
|
||||
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
||||
#else
|
||||
Dtype_t blockA00 = ( (const __global Dtype_t*)src0_read )[ 0 ];
|
||||
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
||||
#endif
|
||||
#else
|
||||
Dtype_t blockA00;
|
||||
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
||||
int pos = 0;
|
||||
LOOP(KERNEL_WIDTH, pos,
|
||||
{
|
||||
if (curr_y >= INPUT_PAD_H && curr_y < input_height + INPUT_PAD_H && curr_x + pos * DILATION_X >= INPUT_PAD_W && curr_x + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
if (curr_y >= INPUT_PAD_H &&
|
||||
curr_y < input_height + INPUT_PAD_H &&
|
||||
curr_x + pos * DILATION_X >= INPUT_PAD_W &&
|
||||
curr_x + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
pblockA00[pos] = src0_read[pos * DILATION_X];
|
||||
else
|
||||
pblockA00[pos] = 0;
|
||||
@ -564,7 +572,8 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
//while( ++patch_row < 1 ); //debug
|
||||
while( ++patch_row < KERNEL_HEIGHT );
|
||||
|
||||
src0_read += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y); // reset to start of next slice of patch
|
||||
// reset to start of next slice of patch
|
||||
src0_read += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y);
|
||||
}
|
||||
//while ( ++patch_depth < 1 ); //debug
|
||||
while ( ++patch_depth < INPUT_DEPTH );
|
||||
@ -653,7 +662,10 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
int pos = 0;
|
||||
LOOP(KERNEL_WIDTH, pos,
|
||||
{
|
||||
if (curr_y >= INPUT_PAD_H && curr_y < input_height + INPUT_PAD_H && curr_x + pos * DILATION_X >= INPUT_PAD_W && curr_x + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
if (curr_y >= INPUT_PAD_H &&
|
||||
curr_y < input_height + INPUT_PAD_H &&
|
||||
curr_x + pos * DILATION_X >= INPUT_PAD_W &&
|
||||
curr_x + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
pblockA00[pos] = src0_read[pos * DILATION_X];
|
||||
else
|
||||
pblockA00[pos] = 0;
|
||||
@ -730,7 +742,8 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
//while( ++patch_row < 1 ); //debug
|
||||
while( ++patch_row < KERNEL_HEIGHT );
|
||||
|
||||
src0_read += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y ); // reset to start of next slice of patch
|
||||
// reset to start of next slice of patch
|
||||
src0_read += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y );
|
||||
}
|
||||
//while ( ++patch_depth < 1 ); //debug
|
||||
while ( ++patch_depth < INPUT_DEPTH );
|
||||
@ -883,17 +896,38 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
// ...
|
||||
const bool kernel_width_is_odd = KERNEL_WIDTH % 2 == 1;
|
||||
#if INPUT_PAD_H == 0 && INPUT_PAD_W == 0 && DILATION_X == 1 && DILATION_Y == 1 && INPUT_PAD_BOTTOM == 0 && INPUT_PAD_RIGHT == 0
|
||||
Dtype_t blockA00 = ( (const __global Dtype_t*)src0_read0 )[ 0 ]; src0_read0 += ROW_PITCH;
|
||||
Dtype_t blockA01 = ( (const __global Dtype_t*)src0_read1 )[ 0 ]; src0_read1 += ROW_PITCH;
|
||||
#if KERNEL_WIDTH == 3
|
||||
Dtype_t blockA00 = vload3(0, src0_read0); src0_read0 += ROW_PITCH;
|
||||
Dtype_t blockA01 = vload3(0, src0_read1); src0_read1 += ROW_PITCH;
|
||||
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
||||
Dtype* pblockA01 = (Dtype*)(&blockA01);
|
||||
#else
|
||||
Dtype_t blockA00 = { (Dtype)0.f };
|
||||
Dtype_t blockA01 = { (Dtype)0.f };
|
||||
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
||||
Dtype* pblockA01 = (Dtype*)(&blockA01);
|
||||
int pos = 0;
|
||||
LOOP(KERNEL_WIDTH, pos,
|
||||
{
|
||||
if (curr_x0 + pos < input_width)
|
||||
pblockA00[pos] = src0_read0[pos];
|
||||
|
||||
if (curr_x1 + pos < input_width)
|
||||
pblockA01[pos] = src0_read1[pos];
|
||||
})
|
||||
src0_read0 += ROW_PITCH;
|
||||
src0_read1 += ROW_PITCH;
|
||||
#endif
|
||||
#else
|
||||
Dtype_t blockA00;
|
||||
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
||||
int pos = 0;
|
||||
LOOP(KERNEL_WIDTH, pos,
|
||||
{
|
||||
if (curr_y0 >= INPUT_PAD_H && curr_y0 < input_height + INPUT_PAD_H && curr_x0 + pos * DILATION_X >= INPUT_PAD_W && curr_x0 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
if (curr_y0 >= INPUT_PAD_H &&
|
||||
curr_y0 < input_height + INPUT_PAD_H &&
|
||||
curr_x0 + pos * DILATION_X >= INPUT_PAD_W &&
|
||||
curr_x0 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
pblockA00[pos] = src0_read0[pos * DILATION_X];
|
||||
else
|
||||
pblockA00[pos] = 0;
|
||||
@ -904,7 +938,10 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
pos = 0;
|
||||
LOOP(KERNEL_WIDTH, pos,
|
||||
{
|
||||
if (curr_y1 >= INPUT_PAD_H && curr_y1 < input_height + INPUT_PAD_H && curr_x1 + pos * DILATION_X >= INPUT_PAD_W && curr_x1 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
if (curr_y1 >= INPUT_PAD_H &&
|
||||
curr_y1 < input_height + INPUT_PAD_H &&
|
||||
curr_x1 + pos * DILATION_X >= INPUT_PAD_W &&
|
||||
curr_x1 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
pblockA01[pos] = src0_read1[pos * DILATION_X];
|
||||
else
|
||||
pblockA01[pos] = 0;
|
||||
@ -972,7 +1009,8 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
curr_y0 = saved_y0;
|
||||
curr_y1 = saved_y1;
|
||||
#endif
|
||||
src0_read0 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y ); // reset to start of next slice of patch
|
||||
// reset to start of next slice of patch
|
||||
src0_read0 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y );
|
||||
src0_read1 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y );
|
||||
}
|
||||
//while ( ++patch_depth < 1 ); //debug
|
||||
@ -1084,7 +1122,10 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
int pos = 0;
|
||||
LOOP(KERNEL_WIDTH, pos,
|
||||
{
|
||||
if (curr_y0 >= INPUT_PAD_H && curr_y0 < input_height + INPUT_PAD_H && curr_x0 + pos * DILATION_X >= INPUT_PAD_W && curr_x0 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
if (curr_y0 >= INPUT_PAD_H &&
|
||||
curr_y0 < input_height + INPUT_PAD_H &&
|
||||
curr_x0 + pos * DILATION_X >= INPUT_PAD_W &&
|
||||
curr_x0 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
pblockA00[pos] = src0_read0[pos * DILATION_X];
|
||||
else
|
||||
pblockA00[pos] = 0;
|
||||
@ -1095,7 +1136,10 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
pos = 0;
|
||||
LOOP(KERNEL_WIDTH, pos,
|
||||
{
|
||||
if (curr_y1 >= INPUT_PAD_H && curr_y1 < input_height + INPUT_PAD_H && curr_x1 + pos * DILATION_X >= INPUT_PAD_W && curr_x1 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
if (curr_y1 >= INPUT_PAD_H &&
|
||||
curr_y1 < input_height + INPUT_PAD_H &&
|
||||
curr_x1 + pos * DILATION_X >= INPUT_PAD_W &&
|
||||
curr_x1 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
pblockA01[pos] = src0_read1[pos * DILATION_X];
|
||||
else
|
||||
pblockA01[pos] = 0;
|
||||
@ -1185,7 +1229,8 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
curr_y0 = saved_y0;
|
||||
curr_y1 = saved_y1;
|
||||
#endif
|
||||
src0_read0 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y ); // reset to start of next slice of patch
|
||||
// reset to start of next slice of patch
|
||||
src0_read0 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y );
|
||||
src0_read1 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y );
|
||||
}
|
||||
//while ( ++patch_depth < 1 ); //debug
|
||||
@ -1409,15 +1454,23 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
const bool kernel_width_is_odd = KERNEL_WIDTH % 2 == 1;
|
||||
|
||||
#if INPUT_PAD_W == 0 && INPUT_PAD_H == 0 && DILATION_X == 1 && DILATION_Y == 1 && INPUT_PAD_BOTTOM == 0 && INPUT_PAD_RIGHT == 0
|
||||
#if KERNEL_WIDTH == 3
|
||||
Dtype_t blockA00 = vload3(0, src0_read);
|
||||
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
||||
#else
|
||||
Dtype_t blockA00 = ( (const __global Dtype_t*)src0_read )[ 0 ];
|
||||
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
||||
#endif
|
||||
#else
|
||||
Dtype_t blockA00;
|
||||
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
||||
int pos = 0;
|
||||
LOOP(KERNEL_WIDTH, pos,
|
||||
{
|
||||
if (curr_y >= INPUT_PAD_H && curr_y < input_height + INPUT_PAD_H && curr_x + pos * DILATION_X >= INPUT_PAD_W && curr_x + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
if (curr_y >= INPUT_PAD_H &&
|
||||
curr_y < input_height + INPUT_PAD_H &&
|
||||
curr_x + pos * DILATION_X >= INPUT_PAD_W &&
|
||||
curr_x + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
pblockA00[pos] = src0_read[pos * DILATION_X];
|
||||
else
|
||||
pblockA00[pos] = 0;
|
||||
@ -1463,7 +1516,8 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
//while( ++patch_row < 1 ); //debug
|
||||
while( ++patch_row < KERNEL_HEIGHT );
|
||||
|
||||
src0_read += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y ); // reset to start of next slice of patch
|
||||
// reset to start of next slice of patch
|
||||
src0_read += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y );
|
||||
}
|
||||
//while ( ++patch_depth < 1 ); //debug
|
||||
while ( ++patch_depth < INPUT_DEPTH );
|
||||
@ -1600,7 +1654,10 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
int pos = 0;
|
||||
LOOP(KERNEL_WIDTH, pos,
|
||||
{
|
||||
if (curr_y0 >= INPUT_PAD_H && curr_y0 < input_height + INPUT_PAD_H && curr_x0 + pos * DILATION_X >= INPUT_PAD_W && curr_x0 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
if (curr_y0 >= INPUT_PAD_H &&
|
||||
curr_y0 < input_height + INPUT_PAD_H &&
|
||||
curr_x0 + pos * DILATION_X >= INPUT_PAD_W &&
|
||||
curr_x0 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
pblockA00[pos] = src0_read0[pos * DILATION_X];
|
||||
else
|
||||
pblockA00[pos] = 0;
|
||||
@ -1611,7 +1668,10 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
pos = 0;
|
||||
LOOP(KERNEL_WIDTH, pos,
|
||||
{
|
||||
if (curr_y1 >= INPUT_PAD_H && curr_y1 < input_height + INPUT_PAD_H && curr_x1 + pos * DILATION_X >= INPUT_PAD_W && curr_x1 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
if (curr_y1 >= INPUT_PAD_H &&
|
||||
curr_y1 < input_height + INPUT_PAD_H &&
|
||||
curr_x1 + pos * DILATION_X >= INPUT_PAD_W &&
|
||||
curr_x1 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
pblockA01[pos] = src0_read1[pos * DILATION_X];
|
||||
else
|
||||
pblockA01[pos] = 0;
|
||||
@ -1667,7 +1727,8 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
curr_y0 = saved_y0;
|
||||
curr_y1 = saved_y1;
|
||||
#endif
|
||||
src0_read0 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y); // reset to start of next slice of patch
|
||||
// reset to start of next slice of patch
|
||||
src0_read0 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y);
|
||||
src0_read1 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y);
|
||||
}
|
||||
//while ( ++patch_depth < 1 ); //debug
|
||||
|
Loading…
Reference in New Issue
Block a user