mirror of
https://github.com/opencv/opencv.git
synced 2025-08-06 14:36:36 +08:00
Merge remote-tracking branch 'upstream/3.4' into merge-3.4
This commit is contained in:
commit
fa66c6b797
@ -1318,7 +1318,7 @@ public:
|
||||
/** @brief Returns a zero array of the specified size and type.
|
||||
|
||||
The method returns a Matlab-style zero array initializer. It can be used to quickly form a constant
|
||||
array as a function parameter, part of a matrix expression, or as a matrix initializer. :
|
||||
array as a function parameter, part of a matrix expression, or as a matrix initializer:
|
||||
@code
|
||||
Mat A;
|
||||
A = Mat::zeros(3, 3, CV_32F);
|
||||
@ -1354,6 +1354,8 @@ public:
|
||||
The above operation does not form a 100x100 matrix of 1's and then multiply it by 3. Instead, it
|
||||
just remembers the scale factor (3 in this case) and use it when actually invoking the matrix
|
||||
initializer.
|
||||
@note In case of multi-channels type, only the first channel will be initialized with 1's, the
|
||||
others will be set to 0's.
|
||||
@param rows Number of rows.
|
||||
@param cols Number of columns.
|
||||
@param type Created matrix type.
|
||||
@ -1381,6 +1383,8 @@ public:
|
||||
// make a 4x4 diagonal matrix with 0.1's on the diagonal.
|
||||
Mat A = Mat::eye(4, 4, CV_32F)*0.1;
|
||||
@endcode
|
||||
@note In case of multi-channels type, identity matrix will be initialized only for the first channel,
|
||||
the others will be set to 0's
|
||||
@param rows Number of rows.
|
||||
@param cols Number of columns.
|
||||
@param type Created matrix type.
|
||||
|
@ -467,7 +467,7 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
int saved_y = curr_y;
|
||||
#endif
|
||||
const __global Dtype *src0_read = src0
|
||||
+ aligned_input_size * global_z // batch offset
|
||||
+ aligned_input_size * global_z // batch offset
|
||||
+ (curr_y - INPUT_PAD_H) * ROW_PITCH // y offset
|
||||
+ (curr_x - INPUT_PAD_W); // x offset
|
||||
|
||||
@ -502,15 +502,23 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
const bool kernel_width_is_odd = KERNEL_WIDTH % 2 == 1;
|
||||
|
||||
#if INPUT_PAD_W == 0 && INPUT_PAD_H == 0 && DILATION_X == 1 && DILATION_Y == 1 && INPUT_PAD_BOTTOM == 0 && INPUT_PAD_RIGHT == 0
|
||||
#if KERNEL_WIDTH == 3
|
||||
Dtype_t blockA00 = vload3(0, src0_read);
|
||||
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
||||
#else
|
||||
Dtype_t blockA00 = ( (const __global Dtype_t*)src0_read )[ 0 ];
|
||||
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
||||
#endif
|
||||
#else
|
||||
Dtype_t blockA00;
|
||||
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
||||
int pos = 0;
|
||||
LOOP(KERNEL_WIDTH, pos,
|
||||
{
|
||||
if (curr_y >= INPUT_PAD_H && curr_y < input_height + INPUT_PAD_H && curr_x + pos * DILATION_X >= INPUT_PAD_W && curr_x + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
if (curr_y >= INPUT_PAD_H &&
|
||||
curr_y < input_height + INPUT_PAD_H &&
|
||||
curr_x + pos * DILATION_X >= INPUT_PAD_W &&
|
||||
curr_x + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
pblockA00[pos] = src0_read[pos * DILATION_X];
|
||||
else
|
||||
pblockA00[pos] = 0;
|
||||
@ -564,17 +572,18 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
//while( ++patch_row < 1 ); //debug
|
||||
while( ++patch_row < KERNEL_HEIGHT );
|
||||
|
||||
src0_read += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y); // reset to start of next slice of patch
|
||||
// reset to start of next slice of patch
|
||||
src0_read += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y);
|
||||
}
|
||||
//while ( ++patch_depth < 1 ); //debug
|
||||
while ( ++patch_depth < INPUT_DEPTH );
|
||||
|
||||
// Dst resembles a cube of width x height x (output channel * batches). Each tile writes:
|
||||
// (SIMD * TILE_M) x 1 x TILE_N. Partial writes most likely generated if padding used.
|
||||
int out_offset = global_z * out_pitch_z // batch offset
|
||||
+ ( group_x * TILE_N ) * out_pitch_y // channel offset
|
||||
int out_offset = global_z * out_pitch_z // batch offset
|
||||
+ ( group_x * TILE_N ) * out_pitch_y // channel offset
|
||||
+ ( ( global_y * TILE_M ) / output_width + OUT_PADDING_HEIGHT) * OUT_PITCH_X // y offset
|
||||
+ ( ( global_y * TILE_M ) % output_width ) + OUT_PADDING_LEFT; // x offset
|
||||
+ ( ( global_y * TILE_M ) % output_width ) + OUT_PADDING_LEFT; // x offset
|
||||
|
||||
__global Dtype *out = dst + out_offset;
|
||||
#if APPLY_BIAS
|
||||
@ -621,7 +630,7 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
int saved_y = curr_y;
|
||||
#endif
|
||||
const __global Dtype *src0_read = src0
|
||||
+ aligned_input_size * global_z // batch offset
|
||||
+ aligned_input_size * global_z // batch offset
|
||||
+ (curr_y - INPUT_PAD_H) * ROW_PITCH // y offset
|
||||
+ (curr_x - INPUT_PAD_W); // x offset
|
||||
|
||||
@ -653,7 +662,10 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
int pos = 0;
|
||||
LOOP(KERNEL_WIDTH, pos,
|
||||
{
|
||||
if (curr_y >= INPUT_PAD_H && curr_y < input_height + INPUT_PAD_H && curr_x + pos * DILATION_X >= INPUT_PAD_W && curr_x + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
if (curr_y >= INPUT_PAD_H &&
|
||||
curr_y < input_height + INPUT_PAD_H &&
|
||||
curr_x + pos * DILATION_X >= INPUT_PAD_W &&
|
||||
curr_x + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
pblockA00[pos] = src0_read[pos * DILATION_X];
|
||||
else
|
||||
pblockA00[pos] = 0;
|
||||
@ -730,17 +742,18 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
//while( ++patch_row < 1 ); //debug
|
||||
while( ++patch_row < KERNEL_HEIGHT );
|
||||
|
||||
src0_read += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y ); // reset to start of next slice of patch
|
||||
// reset to start of next slice of patch
|
||||
src0_read += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y );
|
||||
}
|
||||
//while ( ++patch_depth < 1 ); //debug
|
||||
while ( ++patch_depth < INPUT_DEPTH );
|
||||
|
||||
// Dst resembles a cube of width x height x (output channel * batches). Each tile writes:
|
||||
// (SIMD * TILE_M) x 1 x TILE_N. Partial writes most likely generated if padding used.
|
||||
int out_offset = global_z * out_pitch_z // batch offset
|
||||
+ ( group_x * TILE_N ) * out_pitch_y // channel offset
|
||||
int out_offset = global_z * out_pitch_z // batch offset
|
||||
+ ( group_x * TILE_N ) * out_pitch_y // channel offset
|
||||
+ ( ( global_y * TILE_M ) / output_width + OUT_PADDING_HEIGHT) * OUT_PITCH_X // y offset
|
||||
+ ( ( global_y * TILE_M ) % output_width ) + OUT_PADDING_LEFT; // x offset
|
||||
+ ( ( global_y * TILE_M ) % output_width ) + OUT_PADDING_LEFT; // x offset
|
||||
__global Dtype *out = dst + out_offset;
|
||||
#if APPLY_BIAS
|
||||
Dtype bias[4];
|
||||
@ -849,11 +862,11 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
int saved_y1 = curr_y1;
|
||||
#endif
|
||||
const __global Dtype *src0_read0 = src0
|
||||
+ aligned_input_size * global_z // batch offset
|
||||
+ aligned_input_size * global_z // batch offset
|
||||
+ (curr_y0 - INPUT_PAD_H) * ROW_PITCH // y offset
|
||||
+ curr_x0 - INPUT_PAD_W; // x offset
|
||||
const __global Dtype *src0_read1 = src0
|
||||
+ aligned_input_size * global_z // batch offset
|
||||
+ aligned_input_size * global_z // batch offset
|
||||
+ (curr_y1 - INPUT_PAD_H) * ROW_PITCH // y offset
|
||||
+ curr_x1 - INPUT_PAD_W; // x offset
|
||||
|
||||
@ -883,17 +896,38 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
// ...
|
||||
const bool kernel_width_is_odd = KERNEL_WIDTH % 2 == 1;
|
||||
#if INPUT_PAD_H == 0 && INPUT_PAD_W == 0 && DILATION_X == 1 && DILATION_Y == 1 && INPUT_PAD_BOTTOM == 0 && INPUT_PAD_RIGHT == 0
|
||||
Dtype_t blockA00 = ( (const __global Dtype_t*)src0_read0 )[ 0 ]; src0_read0 += ROW_PITCH;
|
||||
Dtype_t blockA01 = ( (const __global Dtype_t*)src0_read1 )[ 0 ]; src0_read1 += ROW_PITCH;
|
||||
#if KERNEL_WIDTH == 3
|
||||
Dtype_t blockA00 = vload3(0, src0_read0); src0_read0 += ROW_PITCH;
|
||||
Dtype_t blockA01 = vload3(0, src0_read1); src0_read1 += ROW_PITCH;
|
||||
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
||||
Dtype* pblockA01 = (Dtype*)(&blockA01);
|
||||
#else
|
||||
Dtype_t blockA00 = { (Dtype)0.f };
|
||||
Dtype_t blockA01 = { (Dtype)0.f };
|
||||
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
||||
Dtype* pblockA01 = (Dtype*)(&blockA01);
|
||||
int pos = 0;
|
||||
LOOP(KERNEL_WIDTH, pos,
|
||||
{
|
||||
if (curr_x0 + pos < input_width)
|
||||
pblockA00[pos] = src0_read0[pos];
|
||||
|
||||
if (curr_x1 + pos < input_width)
|
||||
pblockA01[pos] = src0_read1[pos];
|
||||
})
|
||||
src0_read0 += ROW_PITCH;
|
||||
src0_read1 += ROW_PITCH;
|
||||
#endif
|
||||
#else
|
||||
Dtype_t blockA00;
|
||||
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
||||
int pos = 0;
|
||||
LOOP(KERNEL_WIDTH, pos,
|
||||
{
|
||||
if (curr_y0 >= INPUT_PAD_H && curr_y0 < input_height + INPUT_PAD_H && curr_x0 + pos * DILATION_X >= INPUT_PAD_W && curr_x0 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
if (curr_y0 >= INPUT_PAD_H &&
|
||||
curr_y0 < input_height + INPUT_PAD_H &&
|
||||
curr_x0 + pos * DILATION_X >= INPUT_PAD_W &&
|
||||
curr_x0 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
pblockA00[pos] = src0_read0[pos * DILATION_X];
|
||||
else
|
||||
pblockA00[pos] = 0;
|
||||
@ -904,7 +938,10 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
pos = 0;
|
||||
LOOP(KERNEL_WIDTH, pos,
|
||||
{
|
||||
if (curr_y1 >= INPUT_PAD_H && curr_y1 < input_height + INPUT_PAD_H && curr_x1 + pos * DILATION_X >= INPUT_PAD_W && curr_x1 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
if (curr_y1 >= INPUT_PAD_H &&
|
||||
curr_y1 < input_height + INPUT_PAD_H &&
|
||||
curr_x1 + pos * DILATION_X >= INPUT_PAD_W &&
|
||||
curr_x1 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
pblockA01[pos] = src0_read1[pos * DILATION_X];
|
||||
else
|
||||
pblockA01[pos] = 0;
|
||||
@ -972,7 +1009,8 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
curr_y0 = saved_y0;
|
||||
curr_y1 = saved_y1;
|
||||
#endif
|
||||
src0_read0 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y ); // reset to start of next slice of patch
|
||||
// reset to start of next slice of patch
|
||||
src0_read0 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y );
|
||||
src0_read1 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y );
|
||||
}
|
||||
//while ( ++patch_depth < 1 ); //debug
|
||||
@ -980,14 +1018,14 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
|
||||
// Dst resembles a cube of width x height x (output channel * batches). Each tile writes:
|
||||
// (SIMD * TILE_M) x 1 x TILE_N. Partial writes most likely generated if padding used.
|
||||
int out0_offset = global_z * out_pitch_z // batch offset
|
||||
+ ( group_x * TILE_N ) * out_pitch_y // channel offset
|
||||
int out0_offset = global_z * out_pitch_z // batch offset
|
||||
+ ( group_x * TILE_N ) * out_pitch_y // channel offset
|
||||
+ ( ( global_y * TILE_M + 0 ) / output_width + OUT_PADDING_HEIGHT ) * OUT_PITCH_X // y offset
|
||||
+ ( ( global_y * TILE_M + 0 ) % output_width ) + OUT_PADDING_LEFT; // x offset
|
||||
int out1_offset = global_z * out_pitch_z // batch offset
|
||||
+ ( group_x * TILE_N ) * out_pitch_y // channel offset
|
||||
+ ( ( global_y * TILE_M + 0 ) % output_width ) + OUT_PADDING_LEFT; // x offset
|
||||
int out1_offset = global_z * out_pitch_z // batch offset
|
||||
+ ( group_x * TILE_N ) * out_pitch_y // channel offset
|
||||
+ ( ( global_y * TILE_M + 1 ) / output_width + OUT_PADDING_HEIGHT ) * OUT_PITCH_X // y offset
|
||||
+ ( ( global_y * TILE_M + 1 ) % output_width ) + OUT_PADDING_LEFT; // x offset
|
||||
+ ( ( global_y * TILE_M + 1 ) % output_width ) + OUT_PADDING_LEFT; // x offset
|
||||
|
||||
#if APPLY_BIAS
|
||||
Dtype bias[4];
|
||||
@ -1049,11 +1087,11 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
int saved_y1 = curr_y1;
|
||||
#endif
|
||||
const __global Dtype *src0_read0 = src0
|
||||
+ aligned_input_size * global_z // batch offset
|
||||
+ aligned_input_size * global_z // batch offset
|
||||
+ (curr_y0 - INPUT_PAD_H) * ROW_PITCH // y offset
|
||||
+ curr_x0 - INPUT_PAD_W; // x offset
|
||||
const __global Dtype *src0_read1 = src0
|
||||
+ aligned_input_size * global_z // batch offset
|
||||
+ aligned_input_size * global_z // batch offset
|
||||
+ (curr_y1 - INPUT_PAD_H) * ROW_PITCH // y offset
|
||||
+ curr_x1 - INPUT_PAD_W; // x offset
|
||||
|
||||
@ -1084,7 +1122,10 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
int pos = 0;
|
||||
LOOP(KERNEL_WIDTH, pos,
|
||||
{
|
||||
if (curr_y0 >= INPUT_PAD_H && curr_y0 < input_height + INPUT_PAD_H && curr_x0 + pos * DILATION_X >= INPUT_PAD_W && curr_x0 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
if (curr_y0 >= INPUT_PAD_H &&
|
||||
curr_y0 < input_height + INPUT_PAD_H &&
|
||||
curr_x0 + pos * DILATION_X >= INPUT_PAD_W &&
|
||||
curr_x0 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
pblockA00[pos] = src0_read0[pos * DILATION_X];
|
||||
else
|
||||
pblockA00[pos] = 0;
|
||||
@ -1095,7 +1136,10 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
pos = 0;
|
||||
LOOP(KERNEL_WIDTH, pos,
|
||||
{
|
||||
if (curr_y1 >= INPUT_PAD_H && curr_y1 < input_height + INPUT_PAD_H && curr_x1 + pos * DILATION_X >= INPUT_PAD_W && curr_x1 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
if (curr_y1 >= INPUT_PAD_H &&
|
||||
curr_y1 < input_height + INPUT_PAD_H &&
|
||||
curr_x1 + pos * DILATION_X >= INPUT_PAD_W &&
|
||||
curr_x1 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
pblockA01[pos] = src0_read1[pos * DILATION_X];
|
||||
else
|
||||
pblockA01[pos] = 0;
|
||||
@ -1185,7 +1229,8 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
curr_y0 = saved_y0;
|
||||
curr_y1 = saved_y1;
|
||||
#endif
|
||||
src0_read0 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y ); // reset to start of next slice of patch
|
||||
// reset to start of next slice of patch
|
||||
src0_read0 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y );
|
||||
src0_read1 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y );
|
||||
}
|
||||
//while ( ++patch_depth < 1 ); //debug
|
||||
@ -1193,14 +1238,14 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
|
||||
// Dst resembles a cube of width x height x (output channel * batches). Each tile writes:
|
||||
// (SIMD * TILE_M) x 1 x TILE_N. Partial writes most likely generated if padding used.
|
||||
int out0_offset = global_z * out_pitch_z // batch offset
|
||||
+ ( group_x * TILE_N ) * out_pitch_y // channel offset
|
||||
int out0_offset = global_z * out_pitch_z // batch offset
|
||||
+ ( group_x * TILE_N ) * out_pitch_y // channel offset
|
||||
+ ( ( global_y * TILE_M + 0 ) / output_width + OUT_PADDING_HEIGHT ) * OUT_PITCH_X // y offset
|
||||
+ ( ( global_y * TILE_M + 0 ) % output_width ) + OUT_PADDING_LEFT; // x offset
|
||||
int out1_offset = global_z * out_pitch_z // batch offset
|
||||
+ ( group_x * TILE_N ) * out_pitch_y // channel offset
|
||||
+ ( ( global_y * TILE_M + 0 ) % output_width ) + OUT_PADDING_LEFT; // x offset
|
||||
int out1_offset = global_z * out_pitch_z // batch offset
|
||||
+ ( group_x * TILE_N ) * out_pitch_y // channel offset
|
||||
+ ( ( global_y * TILE_M + 1 ) / output_width + OUT_PADDING_HEIGHT ) * OUT_PITCH_X // y offset
|
||||
+ ( ( global_y * TILE_M + 1 ) % output_width ) + OUT_PADDING_LEFT; // x offset
|
||||
+ ( ( global_y * TILE_M + 1 ) % output_width ) + OUT_PADDING_LEFT; // x offset
|
||||
__global Dtype *out1 = dst + out1_offset;
|
||||
|
||||
#if APPLY_BIAS
|
||||
@ -1352,9 +1397,9 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
int saved_y = curr_y;
|
||||
#endif
|
||||
const __global Dtype *src0_read = src0
|
||||
+ aligned_input_size * global_z // batch offset
|
||||
+ aligned_input_size * global_z // batch offset
|
||||
+ (curr_y - INPUT_PAD_H) * ROW_PITCH // y offset
|
||||
+ curr_x - INPUT_PAD_W; // x offset
|
||||
+ curr_x - INPUT_PAD_W; // x offset
|
||||
const __global Dtype *src0_read_orig = src0_read;
|
||||
|
||||
// Src1 (filter) is directly used as btile.
|
||||
@ -1409,15 +1454,23 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
const bool kernel_width_is_odd = KERNEL_WIDTH % 2 == 1;
|
||||
|
||||
#if INPUT_PAD_W == 0 && INPUT_PAD_H == 0 && DILATION_X == 1 && DILATION_Y == 1 && INPUT_PAD_BOTTOM == 0 && INPUT_PAD_RIGHT == 0
|
||||
#if KERNEL_WIDTH == 3
|
||||
Dtype_t blockA00 = vload3(0, src0_read);
|
||||
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
||||
#else
|
||||
Dtype_t blockA00 = ( (const __global Dtype_t*)src0_read )[ 0 ];
|
||||
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
||||
#endif
|
||||
#else
|
||||
Dtype_t blockA00;
|
||||
Dtype* pblockA00 = (Dtype*)(&blockA00);
|
||||
int pos = 0;
|
||||
LOOP(KERNEL_WIDTH, pos,
|
||||
{
|
||||
if (curr_y >= INPUT_PAD_H && curr_y < input_height + INPUT_PAD_H && curr_x + pos * DILATION_X >= INPUT_PAD_W && curr_x + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
if (curr_y >= INPUT_PAD_H &&
|
||||
curr_y < input_height + INPUT_PAD_H &&
|
||||
curr_x + pos * DILATION_X >= INPUT_PAD_W &&
|
||||
curr_x + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
pblockA00[pos] = src0_read[pos * DILATION_X];
|
||||
else
|
||||
pblockA00[pos] = 0;
|
||||
@ -1463,17 +1516,18 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
//while( ++patch_row < 1 ); //debug
|
||||
while( ++patch_row < KERNEL_HEIGHT );
|
||||
|
||||
src0_read += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y ); // reset to start of next slice of patch
|
||||
// reset to start of next slice of patch
|
||||
src0_read += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y );
|
||||
}
|
||||
//while ( ++patch_depth < 1 ); //debug
|
||||
while ( ++patch_depth < INPUT_DEPTH );
|
||||
|
||||
// Dst resembles a cube of width x height x (output channel * batches). Each tile writes:
|
||||
// (SIMD * TILE_M) x 1 x TILE_N. Partial writes most likely generated if padding used.
|
||||
int out_offset = global_z * out_pitch_z // batch offset
|
||||
+ ( group_x * TILE_N ) * out_pitch_y // channel offset
|
||||
int out_offset = global_z * out_pitch_z // batch offset
|
||||
+ ( group_x * TILE_N ) * out_pitch_y // channel offset
|
||||
+ ( ( global_y * TILE_M ) / output_width + OUT_PADDING_HEIGHT) * OUT_PITCH_X // y offset
|
||||
+ ( ( global_y * TILE_M ) % output_width ) + OUT_PADDING_LEFT; // x offset
|
||||
+ ( ( global_y * TILE_M ) % output_width ) + OUT_PADDING_LEFT; // x offset
|
||||
__global Dtype *out = dst + out_offset;
|
||||
|
||||
#if APPLY_BIAS
|
||||
@ -1556,11 +1610,11 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
int saved_y1 = curr_y1;
|
||||
#endif
|
||||
const __global Dtype *src0_read0 = src0
|
||||
+ aligned_input_size * global_z // batch offset
|
||||
+ aligned_input_size * global_z // batch offset
|
||||
+ (curr_y0 - INPUT_PAD_H) * ROW_PITCH // y offset
|
||||
+ curr_x0 - INPUT_PAD_W; // x offset
|
||||
const __global Dtype *src0_read1 = src0
|
||||
+ aligned_input_size * global_z // batch offset
|
||||
+ aligned_input_size * global_z // batch offset
|
||||
+ (curr_y1 - INPUT_PAD_H) * ROW_PITCH // y offset
|
||||
+ curr_x1 - INPUT_PAD_W; // x offset
|
||||
|
||||
@ -1600,7 +1654,10 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
int pos = 0;
|
||||
LOOP(KERNEL_WIDTH, pos,
|
||||
{
|
||||
if (curr_y0 >= INPUT_PAD_H && curr_y0 < input_height + INPUT_PAD_H && curr_x0 + pos * DILATION_X >= INPUT_PAD_W && curr_x0 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
if (curr_y0 >= INPUT_PAD_H &&
|
||||
curr_y0 < input_height + INPUT_PAD_H &&
|
||||
curr_x0 + pos * DILATION_X >= INPUT_PAD_W &&
|
||||
curr_x0 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
pblockA00[pos] = src0_read0[pos * DILATION_X];
|
||||
else
|
||||
pblockA00[pos] = 0;
|
||||
@ -1611,7 +1668,10 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
pos = 0;
|
||||
LOOP(KERNEL_WIDTH, pos,
|
||||
{
|
||||
if (curr_y1 >= INPUT_PAD_H && curr_y1 < input_height + INPUT_PAD_H && curr_x1 + pos * DILATION_X >= INPUT_PAD_W && curr_x1 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
if (curr_y1 >= INPUT_PAD_H &&
|
||||
curr_y1 < input_height + INPUT_PAD_H &&
|
||||
curr_x1 + pos * DILATION_X >= INPUT_PAD_W &&
|
||||
curr_x1 + pos * DILATION_X < input_width + INPUT_PAD_W)
|
||||
pblockA01[pos] = src0_read1[pos * DILATION_X];
|
||||
else
|
||||
pblockA01[pos] = 0;
|
||||
@ -1667,7 +1727,8 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
curr_y0 = saved_y0;
|
||||
curr_y1 = saved_y1;
|
||||
#endif
|
||||
src0_read0 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y); // reset to start of next slice of patch
|
||||
// reset to start of next slice of patch
|
||||
src0_read0 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y);
|
||||
src0_read1 += slice_pitch - ( KERNEL_HEIGHT * ROW_PITCH * DILATION_Y);
|
||||
}
|
||||
//while ( ++patch_depth < 1 ); //debug
|
||||
@ -1675,14 +1736,14 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
|
||||
// Dst resembles a cube of width x height x (output channel * batches). Each tile writes:
|
||||
// (SIMD * TILE_M) x 1 x TILE_N. Partial writes most likely generated if padding used.
|
||||
int out0_offset = global_z * out_pitch_z // batch offset
|
||||
+ ( group_x * TILE_N ) * out_pitch_y // channel offset
|
||||
int out0_offset = global_z * out_pitch_z // batch offset
|
||||
+ ( group_x * TILE_N ) * out_pitch_y // channel offset
|
||||
+ ( ( global_y * TILE_M + 0 ) / output_width + OUT_PADDING_HEIGHT ) * OUT_PITCH_X // y offset
|
||||
+ ( ( global_y * TILE_M + 0 ) % output_width ) + OUT_PADDING_LEFT; // x offset
|
||||
int out1_offset = global_z * out_pitch_z // batch offset
|
||||
+ ( group_x * TILE_N ) * out_pitch_y // channel offset
|
||||
+ ( ( global_y * TILE_M + 0 ) % output_width ) + OUT_PADDING_LEFT; // x offset
|
||||
int out1_offset = global_z * out_pitch_z // batch offset
|
||||
+ ( group_x * TILE_N ) * out_pitch_y // channel offset
|
||||
+ ( ( global_y * TILE_M + 1 ) / output_width + OUT_PADDING_HEIGHT ) * OUT_PITCH_X // y offset
|
||||
+ ( ( global_y * TILE_M + 1 ) % output_width ) + OUT_PADDING_LEFT; // x offset
|
||||
+ ( ( global_y * TILE_M + 1 ) % output_width ) + OUT_PADDING_LEFT; // x offset
|
||||
|
||||
#if APPLY_BIAS
|
||||
Dtype bias[2];
|
||||
|
@ -278,19 +278,19 @@ TEST_P(DNNTestNetwork, FastNeuralStyle_eccv16)
|
||||
processNet("dnn/fast_neural_style_eccv16_starry_night.t7", "", inp, "", "", l1, lInf);
|
||||
}
|
||||
|
||||
const tuple<DNNBackend, DNNTarget> testCases[] = {
|
||||
const tuple<Backend, Target> testCases[] = {
|
||||
#ifdef HAVE_HALIDE
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_HALIDE, DNN_TARGET_CPU),
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL),
|
||||
tuple<Backend, Target>(DNN_BACKEND_HALIDE, DNN_TARGET_CPU),
|
||||
tuple<Backend, Target>(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL),
|
||||
#endif
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU),
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL),
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16),
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD),
|
||||
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU),
|
||||
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL),
|
||||
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16),
|
||||
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD),
|
||||
#endif
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL),
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16)
|
||||
tuple<Backend, Target>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL),
|
||||
tuple<Backend, Target>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16)
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/*nothing*/, DNNTestNetwork, testing::ValuesIn(testCases));
|
||||
|
@ -82,7 +82,7 @@ TEST(Test_Caffe, read_googlenet)
|
||||
ASSERT_FALSE(net.empty());
|
||||
}
|
||||
|
||||
typedef testing::TestWithParam<tuple<bool, DNNTarget> > Reproducibility_AlexNet;
|
||||
typedef testing::TestWithParam<tuple<bool, Target> > Reproducibility_AlexNet;
|
||||
TEST_P(Reproducibility_AlexNet, Accuracy)
|
||||
{
|
||||
bool readFromMemory = get<0>(GetParam());
|
||||
@ -179,7 +179,7 @@ TEST(Reproducibility_SSD, Accuracy)
|
||||
normAssertDetections(ref, out);
|
||||
}
|
||||
|
||||
typedef testing::TestWithParam<DNNTarget> Reproducibility_MobileNet_SSD;
|
||||
typedef testing::TestWithParam<Target> Reproducibility_MobileNet_SSD;
|
||||
TEST_P(Reproducibility_MobileNet_SSD, Accuracy)
|
||||
{
|
||||
const string proto = findDataFile("dnn/MobileNetSSD_deploy.prototxt", false);
|
||||
@ -234,7 +234,7 @@ TEST_P(Reproducibility_MobileNet_SSD, Accuracy)
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Reproducibility_MobileNet_SSD,
|
||||
Values(DNN_TARGET_CPU, DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16));
|
||||
|
||||
typedef testing::TestWithParam<DNNTarget> Reproducibility_ResNet50;
|
||||
typedef testing::TestWithParam<Target> Reproducibility_ResNet50;
|
||||
TEST_P(Reproducibility_ResNet50, Accuracy)
|
||||
{
|
||||
Net net = readNetFromCaffe(findDataFile("dnn/ResNet-50-deploy.prototxt", false),
|
||||
@ -270,7 +270,7 @@ TEST_P(Reproducibility_ResNet50, Accuracy)
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Reproducibility_ResNet50,
|
||||
Values(DNN_TARGET_CPU, DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16));
|
||||
|
||||
typedef testing::TestWithParam<DNNTarget> Reproducibility_SqueezeNet_v1_1;
|
||||
typedef testing::TestWithParam<Target> Reproducibility_SqueezeNet_v1_1;
|
||||
TEST_P(Reproducibility_SqueezeNet_v1_1, Accuracy)
|
||||
{
|
||||
Net net = readNetFromCaffe(findDataFile("dnn/squeezenet_v1.1.prototxt", false),
|
||||
@ -413,7 +413,7 @@ TEST(Test_Caffe, multiple_inputs)
|
||||
normAssert(out, first_image + second_image);
|
||||
}
|
||||
|
||||
typedef testing::TestWithParam<tuple<std::string, DNNTarget> > opencv_face_detector;
|
||||
typedef testing::TestWithParam<tuple<std::string, Target> > opencv_face_detector;
|
||||
TEST_P(opencv_face_detector, Accuracy)
|
||||
{
|
||||
std::string proto = findDataFile("dnn/opencv_face_detector.prototxt", false);
|
||||
|
@ -52,7 +52,7 @@ static std::string _tf(TString filename)
|
||||
return (getOpenCVExtraDir() + "/dnn/") + filename;
|
||||
}
|
||||
|
||||
typedef testing::TestWithParam<DNNTarget> Reproducibility_GoogLeNet;
|
||||
typedef testing::TestWithParam<Target> Reproducibility_GoogLeNet;
|
||||
TEST_P(Reproducibility_GoogLeNet, Batching)
|
||||
{
|
||||
Net net = readNetFromCaffe(findDataFile("dnn/bvlc_googlenet.prototxt", false),
|
||||
|
@ -41,21 +41,21 @@ static void test(LayerParams& params, Mat& input, int backendId, int targetId)
|
||||
test(input, net, backendId, targetId);
|
||||
}
|
||||
|
||||
static testing::internal::ParamGenerator<tuple<DNNBackend, DNNTarget> > dnnBackendsAndTargetsWithHalide()
|
||||
static testing::internal::ParamGenerator<tuple<Backend, Target> > dnnBackendsAndTargetsWithHalide()
|
||||
{
|
||||
static const tuple<DNNBackend, DNNTarget> testCases[] = {
|
||||
static const tuple<Backend, Target> testCases[] = {
|
||||
#ifdef HAVE_HALIDE
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_HALIDE, DNN_TARGET_CPU),
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL),
|
||||
tuple<Backend, Target>(DNN_BACKEND_HALIDE, DNN_TARGET_CPU),
|
||||
tuple<Backend, Target>(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL),
|
||||
#endif
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU),
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL),
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16),
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD),
|
||||
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU),
|
||||
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL),
|
||||
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16),
|
||||
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD),
|
||||
#endif
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL),
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16)
|
||||
tuple<Backend, Target>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL),
|
||||
tuple<Backend, Target>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16)
|
||||
};
|
||||
return testing::ValuesIn(testCases);
|
||||
}
|
||||
@ -89,7 +89,7 @@ TEST_P(Test_Halide_layers, Padding)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Convolution
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
typedef TestWithParam<tuple<Vec3i, Size, Size, Size, Size, Size, bool, tuple<DNNBackend, DNNTarget> > > Convolution;
|
||||
typedef TestWithParam<tuple<Vec3i, Size, Size, Size, Size, Size, bool, tuple<Backend, Target> > > Convolution;
|
||||
TEST_P(Convolution, Accuracy)
|
||||
{
|
||||
int inChannels = get<0>(GetParam())[0];
|
||||
@ -154,7 +154,7 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Convolution, Combine(
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Deconvolution
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
typedef TestWithParam<tuple<Vec3i, Size, Size, Size, Size, Vec4i, bool, tuple<DNNBackend, DNNTarget> > > Deconvolution;
|
||||
typedef TestWithParam<tuple<Vec3i, Size, Size, Size, Size, Vec4i, bool, tuple<Backend, Target> > > Deconvolution;
|
||||
TEST_P(Deconvolution, Accuracy)
|
||||
{
|
||||
int inChannels = get<0>(GetParam())[0];
|
||||
@ -220,7 +220,7 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Deconvolution, Combine(
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// LRN
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
typedef TestWithParam<tuple<Vec3i, int, Vec3f, bool, std::string, tuple<DNNBackend, DNNTarget> > > LRN;
|
||||
typedef TestWithParam<tuple<Vec3i, int, Vec3f, bool, std::string, tuple<Backend, Target> > > LRN;
|
||||
TEST_P(LRN, Accuracy)
|
||||
{
|
||||
int inChannels = get<0>(GetParam())[0];
|
||||
@ -265,7 +265,7 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, LRN, Combine(
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Average pooling
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
typedef TestWithParam<tuple<int, Size, Size, Size, tuple<DNNBackend, DNNTarget> > > AvePooling;
|
||||
typedef TestWithParam<tuple<int, Size, Size, Size, tuple<Backend, Target> > > AvePooling;
|
||||
TEST_P(AvePooling, Accuracy)
|
||||
{
|
||||
int inChannels = get<0>(GetParam());
|
||||
@ -305,7 +305,7 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, AvePooling, Combine(
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Maximum pooling
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
typedef TestWithParam<tuple<int, Size, Size, Size, Size, tuple<DNNBackend, DNNTarget> > > MaxPooling;
|
||||
typedef TestWithParam<tuple<int, Size, Size, Size, Size, tuple<Backend, Target> > > MaxPooling;
|
||||
TEST_P(MaxPooling, Accuracy)
|
||||
{
|
||||
int inChannels = get<0>(GetParam());
|
||||
@ -344,7 +344,7 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, MaxPooling, Combine(
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Fully-connected
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
typedef TestWithParam<tuple<int, Size, int, bool, tuple<DNNBackend, DNNTarget> > > FullyConnected;
|
||||
typedef TestWithParam<tuple<int, Size, int, bool, tuple<Backend, Target> > > FullyConnected;
|
||||
TEST_P(FullyConnected, Accuracy)
|
||||
{
|
||||
int inChannels = get<0>(GetParam());
|
||||
@ -387,7 +387,7 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, FullyConnected, Combine(
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// SoftMax
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
typedef TestWithParam<tuple<int, tuple<DNNBackend, DNNTarget> > > SoftMax;
|
||||
typedef TestWithParam<tuple<int, tuple<Backend, Target> > > SoftMax;
|
||||
TEST_P(SoftMax, Accuracy)
|
||||
{
|
||||
int inChannels = get<0>(GetParam());
|
||||
@ -476,7 +476,7 @@ void testInPlaceActivation(LayerParams& lp, int backendId, int targetId)
|
||||
test(input, net, backendId, targetId);
|
||||
}
|
||||
|
||||
typedef TestWithParam<tuple<bool, bool, float, tuple<DNNBackend, DNNTarget> > > BatchNorm;
|
||||
typedef TestWithParam<tuple<bool, bool, float, tuple<Backend, Target> > > BatchNorm;
|
||||
TEST_P(BatchNorm, Accuracy)
|
||||
{
|
||||
bool hasWeights = get<0>(GetParam());
|
||||
@ -511,7 +511,7 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, BatchNorm, Combine(
|
||||
dnnBackendsAndTargetsWithHalide()
|
||||
));
|
||||
|
||||
typedef TestWithParam<tuple<float, tuple<DNNBackend, DNNTarget> > > ReLU;
|
||||
typedef TestWithParam<tuple<float, tuple<Backend, Target> > > ReLU;
|
||||
TEST_P(ReLU, Accuracy)
|
||||
{
|
||||
float negativeSlope = get<0>(GetParam());
|
||||
@ -530,7 +530,7 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, ReLU, Combine(
|
||||
dnnBackendsAndTargetsWithHalide()
|
||||
));
|
||||
|
||||
typedef TestWithParam<tuple<std::string, tuple<DNNBackend, DNNTarget> > > NoParamActivation;
|
||||
typedef TestWithParam<tuple<std::string, tuple<Backend, Target> > > NoParamActivation;
|
||||
TEST_P(NoParamActivation, Accuracy)
|
||||
{
|
||||
int backendId = get<0>(get<1>(GetParam()));
|
||||
@ -546,7 +546,7 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, NoParamActivation, Combine(
|
||||
dnnBackendsAndTargetsWithHalide()
|
||||
));
|
||||
|
||||
typedef TestWithParam<tuple<Vec3f, tuple<DNNBackend, DNNTarget> > > Power;
|
||||
typedef TestWithParam<tuple<Vec3f, tuple<Backend, Target> > > Power;
|
||||
TEST_P(Power, Accuracy)
|
||||
{
|
||||
float power = get<0>(GetParam())[0];
|
||||
@ -582,7 +582,7 @@ TEST_P(Test_Halide_layers, ChannelsPReLU)
|
||||
testInPlaceActivation(lp, backend, target);
|
||||
}
|
||||
|
||||
typedef TestWithParam<tuple<bool, tuple<DNNBackend, DNNTarget> > > Scale;
|
||||
typedef TestWithParam<tuple<bool, tuple<Backend, Target> > > Scale;
|
||||
TEST_P(Scale, Accuracy)
|
||||
{
|
||||
bool hasBias = get<0>(GetParam());
|
||||
@ -616,7 +616,7 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Scale, Combine(
|
||||
// `--- conv ----^ ^ ^
|
||||
// `---- ... ------' '
|
||||
// `-----------------'
|
||||
typedef TestWithParam<tuple<Vec3i, Vec3i, tuple<DNNBackend, DNNTarget> > > Concat;
|
||||
typedef TestWithParam<tuple<Vec3i, Vec3i, tuple<Backend, Target> > > Concat;
|
||||
TEST_P(Concat, Accuracy)
|
||||
{
|
||||
Vec3i inSize = get<0>(GetParam());
|
||||
@ -682,7 +682,7 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Concat, Combine(
|
||||
// `--- conv ----^ ^ ^
|
||||
// `---- ... ------' '
|
||||
// `-----------------'
|
||||
typedef TestWithParam<tuple<Vec3i, std::string, int, bool, tuple<DNNBackend, DNNTarget> > > Eltwise;
|
||||
typedef TestWithParam<tuple<Vec3i, std::string, int, bool, tuple<Backend, Target> > > Eltwise;
|
||||
TEST_P(Eltwise, Accuracy)
|
||||
{
|
||||
Vec3i inSize = get<0>(GetParam());
|
||||
|
@ -49,15 +49,41 @@
|
||||
#include "opencv2/dnn.hpp"
|
||||
#include "test_common.hpp"
|
||||
|
||||
namespace opencv_test { namespace {
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
CV__DNN_EXPERIMENTAL_NS_BEGIN
|
||||
|
||||
static inline void PrintTo(const cv::dnn::Backend& v, std::ostream* os)
|
||||
{
|
||||
switch (v) {
|
||||
case DNN_BACKEND_DEFAULT: *os << "DNN_BACKEND_DEFAULT"; return;
|
||||
case DNN_BACKEND_HALIDE: *os << "DNN_BACKEND_HALIDE"; return;
|
||||
case DNN_BACKEND_INFERENCE_ENGINE: *os << "DNN_BACKEND_INFERENCE_ENGINE"; return;
|
||||
case DNN_BACKEND_OPENCV: *os << "DNN_BACKEND_OPENCV"; return;
|
||||
} // don't use "default:" to emit compiler warnings
|
||||
*os << "DNN_BACKEND_UNKNOWN(" << v << ")";
|
||||
}
|
||||
|
||||
static inline void PrintTo(const cv::dnn::Target& v, std::ostream* os)
|
||||
{
|
||||
switch (v) {
|
||||
case DNN_TARGET_CPU: *os << "DNN_TARGET_CPU"; return;
|
||||
case DNN_TARGET_OPENCL: *os << "DNN_TARGET_OPENCL"; return;
|
||||
case DNN_TARGET_OPENCL_FP16: *os << "DNN_TARGET_OPENCL_FP16"; return;
|
||||
case DNN_TARGET_MYRIAD: *os << "DNN_TARGET_MYRIAD"; return;
|
||||
} // don't use "default:" to emit compiler warnings
|
||||
*os << "DNN_TARGET_UNKNOWN(" << v << ")";
|
||||
}
|
||||
|
||||
CV__DNN_EXPERIMENTAL_NS_END
|
||||
}} // namespace
|
||||
|
||||
namespace opencv_test {
|
||||
using namespace cv::dnn;
|
||||
|
||||
CV_ENUM(DNNBackend, DNN_BACKEND_DEFAULT, DNN_BACKEND_HALIDE, DNN_BACKEND_INFERENCE_ENGINE, DNN_BACKEND_OPENCV)
|
||||
CV_ENUM(DNNTarget, DNN_TARGET_CPU, DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16, DNN_TARGET_MYRIAD)
|
||||
|
||||
static testing::internal::ParamGenerator<DNNTarget> availableDnnTargets()
|
||||
static testing::internal::ParamGenerator<Target> availableDnnTargets()
|
||||
{
|
||||
static std::vector<DNNTarget> targets;
|
||||
static std::vector<Target> targets;
|
||||
if (targets.empty())
|
||||
{
|
||||
targets.push_back(DNN_TARGET_CPU);
|
||||
@ -69,23 +95,23 @@ static testing::internal::ParamGenerator<DNNTarget> availableDnnTargets()
|
||||
return testing::ValuesIn(targets);
|
||||
}
|
||||
|
||||
static testing::internal::ParamGenerator<tuple<DNNBackend, DNNTarget> > dnnBackendsAndTargets()
|
||||
static testing::internal::ParamGenerator<tuple<Backend, Target> > dnnBackendsAndTargets()
|
||||
{
|
||||
static const tuple<DNNBackend, DNNTarget> testCases[] = {
|
||||
static const tuple<Backend, Target> testCases[] = {
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU),
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL),
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16),
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD),
|
||||
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU),
|
||||
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL),
|
||||
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16),
|
||||
tuple<Backend, Target>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD),
|
||||
#endif
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_CPU),
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL),
|
||||
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16)
|
||||
tuple<Backend, Target>(DNN_BACKEND_OPENCV, DNN_TARGET_CPU),
|
||||
tuple<Backend, Target>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL),
|
||||
tuple<Backend, Target>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16)
|
||||
};
|
||||
return testing::ValuesIn(testCases);
|
||||
}
|
||||
|
||||
class DNNTestLayer : public TestWithParam <tuple<DNNBackend, DNNTarget> >
|
||||
class DNNTestLayer : public TestWithParam<tuple<Backend, Target> >
|
||||
{
|
||||
public:
|
||||
dnn::Backend backend;
|
||||
@ -156,6 +182,5 @@ protected:
|
||||
}
|
||||
};
|
||||
|
||||
}}
|
||||
|
||||
} // namespace
|
||||
#endif
|
||||
|
@ -243,7 +243,7 @@ TEST_P(Test_TensorFlow_layers, l2_normalize_3d)
|
||||
runTensorFlowNet("l2_normalize_3d");
|
||||
}
|
||||
|
||||
typedef testing::TestWithParam<DNNTarget> Test_TensorFlow_nets;
|
||||
typedef testing::TestWithParam<Target> Test_TensorFlow_nets;
|
||||
|
||||
TEST_P(Test_TensorFlow_nets, MobileNet_SSD)
|
||||
{
|
||||
|
@ -100,7 +100,7 @@ static void runTorchNet(String prefix, int targetId = DNN_TARGET_CPU, String out
|
||||
}
|
||||
}
|
||||
|
||||
typedef testing::TestWithParam<DNNTarget> Test_Torch_layers;
|
||||
typedef testing::TestWithParam<Target> Test_Torch_layers;
|
||||
|
||||
TEST_P(Test_Torch_layers, run_convolution)
|
||||
{
|
||||
@ -208,7 +208,7 @@ TEST_P(Test_Torch_layers, net_non_spatial)
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_layers, availableDnnTargets());
|
||||
|
||||
typedef testing::TestWithParam<DNNTarget> Test_Torch_nets;
|
||||
typedef testing::TestWithParam<Target> Test_Torch_nets;
|
||||
|
||||
TEST_P(Test_Torch_nets, OpenFace_accuracy)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user