mirror of
https://github.com/opencv/opencv.git
synced 2025-06-10 11:03:03 +08:00
Merge pull request #11927 from pengli:3.4
This commit is contained in:
commit
82c7ab0231
@ -502,15 +502,23 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
|||||||
const bool kernel_width_is_odd = KERNEL_WIDTH % 2 == 1;
|
const bool kernel_width_is_odd = KERNEL_WIDTH % 2 == 1;
|
||||||
|
|
||||||
#if INPUT_PAD_W == 0 && INPUT_PAD_H == 0 && DILATION_X == 1 && DILATION_Y == 1 && INPUT_PAD_BOTTOM == 0 && INPUT_PAD_RIGHT == 0
|
#if INPUT_PAD_W == 0 && INPUT_PAD_H == 0 && DILATION_X == 1 && DILATION_Y == 1 && INPUT_PAD_BOTTOM == 0 && INPUT_PAD_RIGHT == 0
|
||||||
|
#if KERNEL_WIDTH == 3
|
||||||
|
Dtype_t blockA00 = vload3(0, src0_read);
|
||||||
|
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
||||||
|
#else
|
||||||
Dtype_t blockA00 = ( (const __global Dtype_t*)src0_read )[ 0 ];
|
Dtype_t blockA00 = ( (const __global Dtype_t*)src0_read )[ 0 ];
|
||||||
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
||||||
|
#endif
|
||||||
#else
|
#else
|
||||||
Dtype_t blockA00;
|
Dtype_t blockA00;
|
||||||
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
||||||
int pos = 0;
|
int pos = 0;
|
||||||
LOOP(KERNEL_WIDTH, pos,
|
LOOP(KERNEL_WIDTH, pos,
|
||||||
{
|
{
|
||||||
if (curr_y >= INPUT_PAD_H && curr_y < input_height + INPUT_PAD_H && curr_x + pos * DILATION_X >= INPUT_PAD_W && curr_x + pos * DILATION_X < input_width + INPUT_PAD_W)
|
if (curr_y >= INPUT_PAD_H &&
|
||||||
|
curr_y < input_height + INPUT_PAD_H &&
|
||||||
|
curr_x + pos * DILATION_X >= INPUT_PAD_W &&
|
||||||
|
curr_x + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||||
pblockA00[pos] = src0_read[pos * DILATION_X];
|
pblockA00[pos] = src0_read[pos * DILATION_X];
|
||||||
else
|
else
|
||||||
pblockA00[pos] = 0;
|
pblockA00[pos] = 0;
|
||||||
@ -564,7 +572,8 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
|||||||
//while( ++patch_row < 1 ); //debug
|
//while( ++patch_row < 1 ); //debug
|
||||||
while( ++patch_row < KERNEL_HEIGHT );
|
while( ++patch_row < KERNEL_HEIGHT );
|
||||||
|
|
||||||
src0_read += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y); // reset to start of next slice of patch
|
// reset to start of next slice of patch
|
||||||
|
src0_read += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y);
|
||||||
}
|
}
|
||||||
//while ( ++patch_depth < 1 ); //debug
|
//while ( ++patch_depth < 1 ); //debug
|
||||||
while ( ++patch_depth < INPUT_DEPTH );
|
while ( ++patch_depth < INPUT_DEPTH );
|
||||||
@ -653,7 +662,10 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
|||||||
int pos = 0;
|
int pos = 0;
|
||||||
LOOP(KERNEL_WIDTH, pos,
|
LOOP(KERNEL_WIDTH, pos,
|
||||||
{
|
{
|
||||||
if (curr_y >= INPUT_PAD_H && curr_y < input_height + INPUT_PAD_H && curr_x + pos * DILATION_X >= INPUT_PAD_W && curr_x + pos * DILATION_X < input_width + INPUT_PAD_W)
|
if (curr_y >= INPUT_PAD_H &&
|
||||||
|
curr_y < input_height + INPUT_PAD_H &&
|
||||||
|
curr_x + pos * DILATION_X >= INPUT_PAD_W &&
|
||||||
|
curr_x + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||||
pblockA00[pos] = src0_read[pos * DILATION_X];
|
pblockA00[pos] = src0_read[pos * DILATION_X];
|
||||||
else
|
else
|
||||||
pblockA00[pos] = 0;
|
pblockA00[pos] = 0;
|
||||||
@ -730,7 +742,8 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
|||||||
//while( ++patch_row < 1 ); //debug
|
//while( ++patch_row < 1 ); //debug
|
||||||
while( ++patch_row < KERNEL_HEIGHT );
|
while( ++patch_row < KERNEL_HEIGHT );
|
||||||
|
|
||||||
src0_read += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y ); // reset to start of next slice of patch
|
// reset to start of next slice of patch
|
||||||
|
src0_read += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y );
|
||||||
}
|
}
|
||||||
//while ( ++patch_depth < 1 ); //debug
|
//while ( ++patch_depth < 1 ); //debug
|
||||||
while ( ++patch_depth < INPUT_DEPTH );
|
while ( ++patch_depth < INPUT_DEPTH );
|
||||||
@ -883,17 +896,38 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
|||||||
// ...
|
// ...
|
||||||
const bool kernel_width_is_odd = KERNEL_WIDTH % 2 == 1;
|
const bool kernel_width_is_odd = KERNEL_WIDTH % 2 == 1;
|
||||||
#if INPUT_PAD_H == 0 && INPUT_PAD_W == 0 && DILATION_X == 1 && DILATION_Y == 1 && INPUT_PAD_BOTTOM == 0 && INPUT_PAD_RIGHT == 0
|
#if INPUT_PAD_H == 0 && INPUT_PAD_W == 0 && DILATION_X == 1 && DILATION_Y == 1 && INPUT_PAD_BOTTOM == 0 && INPUT_PAD_RIGHT == 0
|
||||||
Dtype_t blockA00 = ( (const __global Dtype_t*)src0_read0 )[ 0 ]; src0_read0 += ROW_PITCH;
|
#if KERNEL_WIDTH == 3
|
||||||
Dtype_t blockA01 = ( (const __global Dtype_t*)src0_read1 )[ 0 ]; src0_read1 += ROW_PITCH;
|
Dtype_t blockA00 = vload3(0, src0_read0); src0_read0 += ROW_PITCH;
|
||||||
|
Dtype_t blockA01 = vload3(0, src0_read1); src0_read1 += ROW_PITCH;
|
||||||
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
||||||
Dtype* pblockA01 = (Dtype*)(&blockA01);
|
Dtype* pblockA01 = (Dtype*)(&blockA01);
|
||||||
|
#else
|
||||||
|
Dtype_t blockA00 = { (Dtype)0.f };
|
||||||
|
Dtype_t blockA01 = { (Dtype)0.f };
|
||||||
|
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
||||||
|
Dtype* pblockA01 = (Dtype*)(&blockA01);
|
||||||
|
int pos = 0;
|
||||||
|
LOOP(KERNEL_WIDTH, pos,
|
||||||
|
{
|
||||||
|
if (curr_x0 + pos < input_width)
|
||||||
|
pblockA00[pos] = src0_read0[pos];
|
||||||
|
|
||||||
|
if (curr_x1 + pos < input_width)
|
||||||
|
pblockA01[pos] = src0_read1[pos];
|
||||||
|
})
|
||||||
|
src0_read0 += ROW_PITCH;
|
||||||
|
src0_read1 += ROW_PITCH;
|
||||||
|
#endif
|
||||||
#else
|
#else
|
||||||
Dtype_t blockA00;
|
Dtype_t blockA00;
|
||||||
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
||||||
int pos = 0;
|
int pos = 0;
|
||||||
LOOP(KERNEL_WIDTH, pos,
|
LOOP(KERNEL_WIDTH, pos,
|
||||||
{
|
{
|
||||||
if (curr_y0 >= INPUT_PAD_H && curr_y0 < input_height + INPUT_PAD_H && curr_x0 + pos * DILATION_X >= INPUT_PAD_W && curr_x0 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
if (curr_y0 >= INPUT_PAD_H &&
|
||||||
|
curr_y0 < input_height + INPUT_PAD_H &&
|
||||||
|
curr_x0 + pos * DILATION_X >= INPUT_PAD_W &&
|
||||||
|
curr_x0 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||||
pblockA00[pos] = src0_read0[pos * DILATION_X];
|
pblockA00[pos] = src0_read0[pos * DILATION_X];
|
||||||
else
|
else
|
||||||
pblockA00[pos] = 0;
|
pblockA00[pos] = 0;
|
||||||
@ -904,7 +938,10 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
|||||||
pos = 0;
|
pos = 0;
|
||||||
LOOP(KERNEL_WIDTH, pos,
|
LOOP(KERNEL_WIDTH, pos,
|
||||||
{
|
{
|
||||||
if (curr_y1 >= INPUT_PAD_H && curr_y1 < input_height + INPUT_PAD_H && curr_x1 + pos * DILATION_X >= INPUT_PAD_W && curr_x1 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
if (curr_y1 >= INPUT_PAD_H &&
|
||||||
|
curr_y1 < input_height + INPUT_PAD_H &&
|
||||||
|
curr_x1 + pos * DILATION_X >= INPUT_PAD_W &&
|
||||||
|
curr_x1 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||||
pblockA01[pos] = src0_read1[pos * DILATION_X];
|
pblockA01[pos] = src0_read1[pos * DILATION_X];
|
||||||
else
|
else
|
||||||
pblockA01[pos] = 0;
|
pblockA01[pos] = 0;
|
||||||
@ -972,7 +1009,8 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
|||||||
curr_y0 = saved_y0;
|
curr_y0 = saved_y0;
|
||||||
curr_y1 = saved_y1;
|
curr_y1 = saved_y1;
|
||||||
#endif
|
#endif
|
||||||
src0_read0 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y ); // reset to start of next slice of patch
|
// reset to start of next slice of patch
|
||||||
|
src0_read0 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y );
|
||||||
src0_read1 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y );
|
src0_read1 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y );
|
||||||
}
|
}
|
||||||
//while ( ++patch_depth < 1 ); //debug
|
//while ( ++patch_depth < 1 ); //debug
|
||||||
@ -1084,7 +1122,10 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
|||||||
int pos = 0;
|
int pos = 0;
|
||||||
LOOP(KERNEL_WIDTH, pos,
|
LOOP(KERNEL_WIDTH, pos,
|
||||||
{
|
{
|
||||||
if (curr_y0 >= INPUT_PAD_H && curr_y0 < input_height + INPUT_PAD_H && curr_x0 + pos * DILATION_X >= INPUT_PAD_W && curr_x0 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
if (curr_y0 >= INPUT_PAD_H &&
|
||||||
|
curr_y0 < input_height + INPUT_PAD_H &&
|
||||||
|
curr_x0 + pos * DILATION_X >= INPUT_PAD_W &&
|
||||||
|
curr_x0 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||||
pblockA00[pos] = src0_read0[pos * DILATION_X];
|
pblockA00[pos] = src0_read0[pos * DILATION_X];
|
||||||
else
|
else
|
||||||
pblockA00[pos] = 0;
|
pblockA00[pos] = 0;
|
||||||
@ -1095,7 +1136,10 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
|||||||
pos = 0;
|
pos = 0;
|
||||||
LOOP(KERNEL_WIDTH, pos,
|
LOOP(KERNEL_WIDTH, pos,
|
||||||
{
|
{
|
||||||
if (curr_y1 >= INPUT_PAD_H && curr_y1 < input_height + INPUT_PAD_H && curr_x1 + pos * DILATION_X >= INPUT_PAD_W && curr_x1 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
if (curr_y1 >= INPUT_PAD_H &&
|
||||||
|
curr_y1 < input_height + INPUT_PAD_H &&
|
||||||
|
curr_x1 + pos * DILATION_X >= INPUT_PAD_W &&
|
||||||
|
curr_x1 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||||
pblockA01[pos] = src0_read1[pos * DILATION_X];
|
pblockA01[pos] = src0_read1[pos * DILATION_X];
|
||||||
else
|
else
|
||||||
pblockA01[pos] = 0;
|
pblockA01[pos] = 0;
|
||||||
@ -1185,7 +1229,8 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
|||||||
curr_y0 = saved_y0;
|
curr_y0 = saved_y0;
|
||||||
curr_y1 = saved_y1;
|
curr_y1 = saved_y1;
|
||||||
#endif
|
#endif
|
||||||
src0_read0 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y ); // reset to start of next slice of patch
|
// reset to start of next slice of patch
|
||||||
|
src0_read0 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y );
|
||||||
src0_read1 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y );
|
src0_read1 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y );
|
||||||
}
|
}
|
||||||
//while ( ++patch_depth < 1 ); //debug
|
//while ( ++patch_depth < 1 ); //debug
|
||||||
@ -1409,15 +1454,23 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
|||||||
const bool kernel_width_is_odd = KERNEL_WIDTH % 2 == 1;
|
const bool kernel_width_is_odd = KERNEL_WIDTH % 2 == 1;
|
||||||
|
|
||||||
#if INPUT_PAD_W == 0 && INPUT_PAD_H == 0 && DILATION_X == 1 && DILATION_Y == 1 && INPUT_PAD_BOTTOM == 0 && INPUT_PAD_RIGHT == 0
|
#if INPUT_PAD_W == 0 && INPUT_PAD_H == 0 && DILATION_X == 1 && DILATION_Y == 1 && INPUT_PAD_BOTTOM == 0 && INPUT_PAD_RIGHT == 0
|
||||||
|
#if KERNEL_WIDTH == 3
|
||||||
|
Dtype_t blockA00 = vload3(0, src0_read);
|
||||||
|
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
||||||
|
#else
|
||||||
Dtype_t blockA00 = ( (const __global Dtype_t*)src0_read )[ 0 ];
|
Dtype_t blockA00 = ( (const __global Dtype_t*)src0_read )[ 0 ];
|
||||||
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
||||||
|
#endif
|
||||||
#else
|
#else
|
||||||
Dtype_t blockA00;
|
Dtype_t blockA00;
|
||||||
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
||||||
int pos = 0;
|
int pos = 0;
|
||||||
LOOP(KERNEL_WIDTH, pos,
|
LOOP(KERNEL_WIDTH, pos,
|
||||||
{
|
{
|
||||||
if (curr_y >= INPUT_PAD_H && curr_y < input_height + INPUT_PAD_H && curr_x + pos * DILATION_X >= INPUT_PAD_W && curr_x + pos * DILATION_X < input_width + INPUT_PAD_W)
|
if (curr_y >= INPUT_PAD_H &&
|
||||||
|
curr_y < input_height + INPUT_PAD_H &&
|
||||||
|
curr_x + pos * DILATION_X >= INPUT_PAD_W &&
|
||||||
|
curr_x + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||||
pblockA00[pos] = src0_read[pos * DILATION_X];
|
pblockA00[pos] = src0_read[pos * DILATION_X];
|
||||||
else
|
else
|
||||||
pblockA00[pos] = 0;
|
pblockA00[pos] = 0;
|
||||||
@ -1463,7 +1516,8 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
|||||||
//while( ++patch_row < 1 ); //debug
|
//while( ++patch_row < 1 ); //debug
|
||||||
while( ++patch_row < KERNEL_HEIGHT );
|
while( ++patch_row < KERNEL_HEIGHT );
|
||||||
|
|
||||||
src0_read += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y ); // reset to start of next slice of patch
|
// reset to start of next slice of patch
|
||||||
|
src0_read += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y );
|
||||||
}
|
}
|
||||||
//while ( ++patch_depth < 1 ); //debug
|
//while ( ++patch_depth < 1 ); //debug
|
||||||
while ( ++patch_depth < INPUT_DEPTH );
|
while ( ++patch_depth < INPUT_DEPTH );
|
||||||
@ -1600,7 +1654,10 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
|||||||
int pos = 0;
|
int pos = 0;
|
||||||
LOOP(KERNEL_WIDTH, pos,
|
LOOP(KERNEL_WIDTH, pos,
|
||||||
{
|
{
|
||||||
if (curr_y0 >= INPUT_PAD_H && curr_y0 < input_height + INPUT_PAD_H && curr_x0 + pos * DILATION_X >= INPUT_PAD_W && curr_x0 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
if (curr_y0 >= INPUT_PAD_H &&
|
||||||
|
curr_y0 < input_height + INPUT_PAD_H &&
|
||||||
|
curr_x0 + pos * DILATION_X >= INPUT_PAD_W &&
|
||||||
|
curr_x0 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||||
pblockA00[pos] = src0_read0[pos * DILATION_X];
|
pblockA00[pos] = src0_read0[pos * DILATION_X];
|
||||||
else
|
else
|
||||||
pblockA00[pos] = 0;
|
pblockA00[pos] = 0;
|
||||||
@ -1611,7 +1668,10 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
|||||||
pos = 0;
|
pos = 0;
|
||||||
LOOP(KERNEL_WIDTH, pos,
|
LOOP(KERNEL_WIDTH, pos,
|
||||||
{
|
{
|
||||||
if (curr_y1 >= INPUT_PAD_H && curr_y1 < input_height + INPUT_PAD_H && curr_x1 + pos * DILATION_X >= INPUT_PAD_W && curr_x1 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
if (curr_y1 >= INPUT_PAD_H &&
|
||||||
|
curr_y1 < input_height + INPUT_PAD_H &&
|
||||||
|
curr_x1 + pos * DILATION_X >= INPUT_PAD_W &&
|
||||||
|
curr_x1 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||||
pblockA01[pos] = src0_read1[pos * DILATION_X];
|
pblockA01[pos] = src0_read1[pos * DILATION_X];
|
||||||
else
|
else
|
||||||
pblockA01[pos] = 0;
|
pblockA01[pos] = 0;
|
||||||
@ -1667,7 +1727,8 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
|||||||
curr_y0 = saved_y0;
|
curr_y0 = saved_y0;
|
||||||
curr_y1 = saved_y1;
|
curr_y1 = saved_y1;
|
||||||
#endif
|
#endif
|
||||||
src0_read0 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y); // reset to start of next slice of patch
|
// reset to start of next slice of patch
|
||||||
|
src0_read0 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y);
|
||||||
src0_read1 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y);
|
src0_read1 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y);
|
||||||
}
|
}
|
||||||
//while ( ++patch_depth < 1 ); //debug
|
//while ( ++patch_depth < 1 ); //debug
|
||||||
|
Loading…
Reference in New Issue
Block a user