opencv/modules/dnn/test/test_onnx_conformance.cpp

1280 lines
49 KiB
C++
Raw Normal View History

// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "test_precomp.hpp"
#include <set>
#include <string>
#include "npy_blob.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#if defined(_MSC_VER) // workaround for 32-bit MSVC compiler
#pragma optimize("", off)
#endif
#define CV_TEST_TAG_DNN_ERROR_PARSER "dnn_error_parser"
#define CV_TEST_TAG_DNN_ERROR_NET_SETUP "dnn_error_net_setup"
#define CV_TEST_TAG_DNN_ERROR_FORWARD "dnn_error_forward"
#define CV_TEST_TAG_DNN_LAYER_FALLBACK "dnn_layer_fallback"
#define CV_TEST_TAG_DNN_NO_ACCURACY_CHECK "dnn_no_accuracy_check"
namespace opencv_test {
struct TestCase
{
const char* name;
uint32_t inputs;
uint32_t outputs;
};
static const TestCase testConformanceConfig[] = {
{"test_abs", 1, 1},
{"test_acos", 1, 1},
{"test_acos_example", 1, 1},
{"test_acosh", 1, 1},
{"test_acosh_example", 1, 1},
{"test_adagrad", 5, 2},
{"test_adagrad_multiple", 8, 4},
{"test_adam", 6, 3},
{"test_adam_multiple", 10, 6},
{"test_add", 2, 1},
{"test_add_bcast", 2, 1},
{"test_add_uint8", 2, 1},
{"test_and2d", 2, 1},
{"test_and3d", 2, 1},
{"test_and4d", 2, 1},
{"test_and_bcast3v1d", 2, 1},
{"test_and_bcast3v2d", 2, 1},
{"test_and_bcast4v2d", 2, 1},
{"test_and_bcast4v3d", 2, 1},
{"test_and_bcast4v4d", 2, 1},
{"test_argmax_default_axis_example", 1, 1},
{"test_argmax_default_axis_example_select_last_index", 1, 1},
{"test_argmax_default_axis_random", 1, 1},
{"test_argmax_default_axis_random_select_last_index", 1, 1},
{"test_argmax_keepdims_example", 1, 1},
{"test_argmax_keepdims_example_select_last_index", 1, 1},
{"test_argmax_keepdims_random", 1, 1},
{"test_argmax_keepdims_random_select_last_index", 1, 1},
{"test_argmax_negative_axis_keepdims_example", 1, 1},
{"test_argmax_negative_axis_keepdims_example_select_last_index", 1, 1},
{"test_argmax_negative_axis_keepdims_random", 1, 1},
{"test_argmax_negative_axis_keepdims_random_select_last_index", 1, 1},
{"test_argmax_no_keepdims_example", 1, 1},
{"test_argmax_no_keepdims_example_select_last_index", 1, 1},
{"test_argmax_no_keepdims_random", 1, 1},
{"test_argmax_no_keepdims_random_select_last_index", 1, 1},
{"test_argmin_default_axis_example", 1, 1},
{"test_argmin_default_axis_example_select_last_index", 1, 1},
{"test_argmin_default_axis_random", 1, 1},
{"test_argmin_default_axis_random_select_last_index", 1, 1},
{"test_argmin_keepdims_example", 1, 1},
{"test_argmin_keepdims_example_select_last_index", 1, 1},
{"test_argmin_keepdims_random", 1, 1},
{"test_argmin_keepdims_random_select_last_index", 1, 1},
{"test_argmin_negative_axis_keepdims_example", 1, 1},
{"test_argmin_negative_axis_keepdims_example_select_last_index", 1, 1},
{"test_argmin_negative_axis_keepdims_random", 1, 1},
{"test_argmin_negative_axis_keepdims_random_select_last_index", 1, 1},
{"test_argmin_no_keepdims_example", 1, 1},
{"test_argmin_no_keepdims_example_select_last_index", 1, 1},
{"test_argmin_no_keepdims_random", 1, 1},
{"test_argmin_no_keepdims_random_select_last_index", 1, 1},
{"test_asin", 1, 1},
{"test_asin_example", 1, 1},
{"test_asinh", 1, 1},
{"test_asinh_example", 1, 1},
{"test_atan", 1, 1},
{"test_atan_example", 1, 1},
{"test_atanh", 1, 1},
{"test_atanh_example", 1, 1},
{"test_averagepool_1d_default", 1, 1},
{"test_averagepool_2d_ceil", 1, 1},
{"test_averagepool_2d_default", 1, 1},
{"test_averagepool_2d_pads", 1, 1},
{"test_averagepool_2d_pads_count_include_pad", 1, 1},
{"test_averagepool_2d_precomputed_pads", 1, 1},
{"test_averagepool_2d_precomputed_pads_count_include_pad", 1, 1},
{"test_averagepool_2d_precomputed_same_upper", 1, 1},
{"test_averagepool_2d_precomputed_strides", 1, 1},
{"test_averagepool_2d_same_lower", 1, 1},
{"test_averagepool_2d_same_upper", 1, 1},
{"test_averagepool_2d_strides", 1, 1},
{"test_averagepool_3d_default", 1, 1},
{"test_basic_conv_with_padding", 2, 1},
{"test_basic_conv_without_padding", 2, 1},
{"test_basic_convinteger", 3, 1},
{"test_batchnorm_epsilon", 5, 1},
{"test_batchnorm_epsilon_training_mode", 5, 3},
{"test_batchnorm_example", 5, 1},
{"test_batchnorm_example_training_mode", 5, 3},
{"test_bernoulli", 1, 1},
{"test_bernoulli_double", 1, 1},
{"test_bernoulli_double_expanded", 1, 1},
{"test_bernoulli_expanded", 1, 1},
{"test_bernoulli_seed", 1, 1},
{"test_bernoulli_seed_expanded", 1, 1},
{"test_bitshift_left_uint16", 2, 1},
{"test_bitshift_left_uint32", 2, 1},
{"test_bitshift_left_uint64", 2, 1},
{"test_bitshift_left_uint8", 2, 1},
{"test_bitshift_right_uint16", 2, 1},
{"test_bitshift_right_uint32", 2, 1},
{"test_bitshift_right_uint64", 2, 1},
{"test_bitshift_right_uint8", 2, 1},
{"test_cast_BFLOAT16_to_FLOAT", 1, 1},
{"test_cast_DOUBLE_to_FLOAT", 1, 1},
{"test_cast_DOUBLE_to_FLOAT16", 1, 1},
{"test_cast_FLOAT16_to_DOUBLE", 1, 1},
{"test_cast_FLOAT16_to_FLOAT", 1, 1},
{"test_cast_FLOAT_to_BFLOAT16", 1, 1},
{"test_cast_FLOAT_to_DOUBLE", 1, 1},
{"test_cast_FLOAT_to_FLOAT16", 1, 1},
{"test_cast_FLOAT_to_STRING", 1, 1},
{"test_cast_STRING_to_FLOAT", 1, 1},
{"test_castlike_BFLOAT16_to_FLOAT", 2, 1},
{"test_castlike_BFLOAT16_to_FLOAT_expanded", 2, 1},
{"test_castlike_DOUBLE_to_FLOAT", 2, 1},
{"test_castlike_DOUBLE_to_FLOAT16", 2, 1},
{"test_castlike_DOUBLE_to_FLOAT16_expanded", 2, 1},
{"test_castlike_DOUBLE_to_FLOAT_expanded", 2, 1},
{"test_castlike_FLOAT16_to_DOUBLE", 2, 1},
{"test_castlike_FLOAT16_to_DOUBLE_expanded", 2, 1},
{"test_castlike_FLOAT16_to_FLOAT", 2, 1},
{"test_castlike_FLOAT16_to_FLOAT_expanded", 2, 1},
{"test_castlike_FLOAT_to_BFLOAT16", 2, 1},
{"test_castlike_FLOAT_to_BFLOAT16_expanded", 2, 1},
{"test_castlike_FLOAT_to_DOUBLE", 2, 1},
{"test_castlike_FLOAT_to_DOUBLE_expanded", 2, 1},
{"test_castlike_FLOAT_to_FLOAT16", 2, 1},
{"test_castlike_FLOAT_to_FLOAT16_expanded", 2, 1},
{"test_castlike_FLOAT_to_STRING", 2, 1},
{"test_castlike_FLOAT_to_STRING_expanded", 2, 1},
{"test_castlike_STRING_to_FLOAT", 2, 1},
{"test_castlike_STRING_to_FLOAT_expanded", 2, 1},
{"test_ceil", 1, 1},
{"test_ceil_example", 1, 1},
{"test_celu", 1, 1},
{"test_celu_expanded", 1, 1},
{"test_clip", 3, 1},
{"test_clip_default_inbounds", 1, 1},
{"test_clip_default_int8_inbounds", 1, 1},
{"test_clip_default_int8_max", 2, 1},
{"test_clip_default_int8_min", 2, 1},
{"test_clip_default_max", 2, 1},
{"test_clip_default_min", 2, 1},
{"test_clip_example", 3, 1},
{"test_clip_inbounds", 3, 1},
{"test_clip_outbounds", 3, 1},
{"test_clip_splitbounds", 3, 1},
{"test_compress_0", 2, 1},
{"test_compress_1", 2, 1},
{"test_compress_default_axis", 2, 1},
{"test_compress_negative_axis", 2, 1},
{"test_concat_1d_axis_0", 2, 1},
{"test_concat_1d_axis_negative_1", 2, 1},
{"test_concat_2d_axis_0", 2, 1},
{"test_concat_2d_axis_1", 2, 1},
{"test_concat_2d_axis_negative_1", 2, 1},
{"test_concat_2d_axis_negative_2", 2, 1},
{"test_concat_3d_axis_0", 2, 1},
{"test_concat_3d_axis_1", 2, 1},
{"test_concat_3d_axis_2", 2, 1},
{"test_concat_3d_axis_negative_1", 2, 1},
{"test_concat_3d_axis_negative_2", 2, 1},
{"test_concat_3d_axis_negative_3", 2, 1},
{"test_constant", 0, 1},
{"test_constant_pad", 3, 1},
{"test_constantofshape_float_ones", 1, 1},
{"test_constantofshape_int_shape_zero", 1, 1},
{"test_constantofshape_int_zeros", 1, 1},
{"test_conv_with_autopad_same", 2, 1},
{"test_conv_with_strides_and_asymmetric_padding", 2, 1},
{"test_conv_with_strides_no_padding", 2, 1},
{"test_conv_with_strides_padding", 2, 1},
{"test_convinteger_with_padding", 3, 1},
{"test_convinteger_without_padding", 3, 1},
{"test_convtranspose", 2, 1},
{"test_convtranspose_1d", 2, 1},
{"test_convtranspose_3d", 2, 1},
{"test_convtranspose_autopad_same", 2, 1},
{"test_convtranspose_dilations", 2, 1},
{"test_convtranspose_kernel_shape", 2, 1},
{"test_convtranspose_output_shape", 2, 1},
{"test_convtranspose_pad", 2, 1},
{"test_convtranspose_pads", 2, 1},
{"test_convtranspose_with_kernel", 2, 1},
{"test_cos", 1, 1},
{"test_cos_example", 1, 1},
{"test_cosh", 1, 1},
{"test_cosh_example", 1, 1},
{"test_cumsum_1d", 2, 1},
{"test_cumsum_1d_exclusive", 2, 1},
{"test_cumsum_1d_reverse", 2, 1},
{"test_cumsum_1d_reverse_exclusive", 2, 1},
{"test_cumsum_2d_axis_0", 2, 1},
{"test_cumsum_2d_axis_1", 2, 1},
{"test_cumsum_2d_negative_axis", 2, 1},
{"test_depthtospace_crd_mode", 1, 1},
{"test_depthtospace_crd_mode_example", 1, 1},
{"test_depthtospace_dcr_mode", 1, 1},
{"test_depthtospace_example", 1, 1},
{"test_dequantizelinear", 3, 1},
{"test_dequantizelinear_axis", 3, 1},
{"test_det_2d", 1, 1},
{"test_det_nd", 1, 1},
{"test_div", 2, 1},
{"test_div_bcast", 2, 1},
{"test_div_example", 2, 1},
{"test_div_uint8", 2, 1},
{"test_dropout_default", 1, 1},
{"test_dropout_default_mask", 1, 2},
{"test_dropout_default_mask_ratio", 2, 2},
{"test_dropout_default_old", 1, 1},
{"test_dropout_default_ratio", 2, 1},
{"test_dropout_random_old", 1, 1},
{"test_dynamicquantizelinear", 1, 3},
{"test_dynamicquantizelinear_expanded", 1, 3},
{"test_dynamicquantizelinear_max_adjusted", 1, 3},
{"test_dynamicquantizelinear_max_adjusted_expanded", 1, 3},
{"test_dynamicquantizelinear_min_adjusted", 1, 3},
{"test_dynamicquantizelinear_min_adjusted_expanded", 1, 3},
{"test_edge_pad", 2, 1},
{"test_einsum_batch_diagonal", 1, 1},
{"test_einsum_batch_matmul", 2, 1},
{"test_einsum_inner_prod", 2, 1},
{"test_einsum_sum", 1, 1},
{"test_einsum_transpose", 1, 1},
{"test_elu", 1, 1},
{"test_elu_default", 1, 1},
{"test_elu_example", 1, 1},
{"test_equal", 2, 1},
{"test_equal_bcast", 2, 1},
{"test_erf", 1, 1},
{"test_exp", 1, 1},
{"test_exp_example", 1, 1},
{"test_expand_dim_changed", 2, 1},
{"test_expand_dim_unchanged", 2, 1},
{"test_eyelike_populate_off_main_diagonal", 1, 1},
{"test_eyelike_with_dtype", 1, 1},
{"test_eyelike_without_dtype", 1, 1},
{"test_flatten_axis0", 1, 1},
{"test_flatten_axis1", 1, 1},
{"test_flatten_axis2", 1, 1},
{"test_flatten_axis3", 1, 1},
{"test_flatten_default_axis", 1, 1},
{"test_flatten_negative_axis1", 1, 1},
{"test_flatten_negative_axis2", 1, 1},
{"test_flatten_negative_axis3", 1, 1},
{"test_flatten_negative_axis4", 1, 1},
{"test_floor", 1, 1},
{"test_floor_example", 1, 1},
{"test_gather_0", 2, 1},
{"test_gather_1", 2, 1},
{"test_gather_2d_indices", 2, 1},
{"test_gather_elements_0", 2, 1},
{"test_gather_elements_1", 2, 1},
{"test_gather_elements_negative_indices", 2, 1},
{"test_gather_negative_indices", 2, 1},
{"test_gathernd_example_float32", 2, 1},
{"test_gathernd_example_int32", 2, 1},
{"test_gathernd_example_int32_batch_dim1", 2, 1},
{"test_gemm_all_attributes", 3, 1},
{"test_gemm_alpha", 3, 1},
{"test_gemm_beta", 3, 1},
{"test_gemm_default_matrix_bias", 3, 1},
{"test_gemm_default_no_bias", 2, 1},
{"test_gemm_default_scalar_bias", 3, 1},
{"test_gemm_default_single_elem_vector_bias", 3, 1},
{"test_gemm_default_vector_bias", 3, 1},
{"test_gemm_default_zero_bias", 3, 1},
{"test_gemm_transposeA", 3, 1},
{"test_gemm_transposeB", 3, 1},
{"test_globalaveragepool", 1, 1},
{"test_globalaveragepool_precomputed", 1, 1},
{"test_globalmaxpool", 1, 1},
{"test_globalmaxpool_precomputed", 1, 1},
{"test_greater", 2, 1},
{"test_greater_bcast", 2, 1},
{"test_greater_equal", 2, 1},
{"test_greater_equal_bcast", 2, 1},
{"test_greater_equal_bcast_expanded", 2, 1},
{"test_greater_equal_expanded", 2, 1},
{"test_gridsample", 2, 1},
{"test_gridsample_aligncorners_true", 2, 1},
{"test_gridsample_bicubic", 2, 1},
{"test_gridsample_bilinear", 2, 1},
{"test_gridsample_border_padding", 2, 1},
{"test_gridsample_nearest", 2, 1},
{"test_gridsample_reflection_padding", 2, 1},
{"test_gridsample_zeros_padding", 2, 1},
{"test_gru_batchwise", 3, 2},
{"test_gru_defaults", 3, 1},
{"test_gru_seq_length", 4, 1},
{"test_gru_with_initial_bias", 4, 1},
{"test_hardmax_axis_0", 1, 1},
{"test_hardmax_axis_1", 1, 1},
{"test_hardmax_axis_2", 1, 1},
{"test_hardmax_default_axis", 1, 1},
{"test_hardmax_example", 1, 1},
{"test_hardmax_negative_axis", 1, 1},
{"test_hardmax_one_hot", 1, 1},
{"test_hardsigmoid", 1, 1},
{"test_hardsigmoid_default", 1, 1},
{"test_hardsigmoid_example", 1, 1},
{"test_hardswish", 1, 1},
{"test_hardswish_expanded", 1, 1},
{"test_identity", 1, 1},
{"test_identity_opt", 1, 1},
{"test_identity_sequence", 1, 1},
{"test_if", 1, 1},
{"test_if_opt", 1, 1},
{"test_if_seq", 1, 1},
{"test_instancenorm_epsilon", 3, 1},
{"test_instancenorm_example", 3, 1},
{"test_isinf", 1, 1},
{"test_isinf_negative", 1, 1},
{"test_isinf_positive", 1, 1},
{"test_isnan", 1, 1},
Merge pull request #24544 from fengyuentau:layernorm_conformance dnn test: move layer norm tests into conformance tests #24544 Merge with https://github.com/opencv/opencv_extra/pull/1122 ## Motivation Some ONNX operators, such as `LayerNormalization`, `BatchNormalization` and so on, produce outputs for training (mean, stdev). So they have reference outputs of conformance tests for those training outputs as well. However, when it comes to inference, we do not need and produce those outputs for training here in dnn. Hence, output size does not match if we use dnn to infer those conformance models. This has become the barrier if we want to test these operators using their conformance tests. <!-- | Operator | Inference needed | Outputs (required - total) | Optional outputs for training? | | ----------------------- | ----------------------------------- | -------------------------- | ------------------------------ | | BatchNormalization | Yes | 1 - 3 | Yes | | Dropout | Maybe, can be eliminated via fusion | 1 - 2 | Yes | | GRU | Yes | 0 - 2 | No | | LSTM | Yes | 0 - 3 | No | | LayerNormalization | Yes | 1 - 3 | Yes | | MaxPool | Yes | 1 - 2 | Yes | | RNN | Yes | 0 - 2 | No | | SoftmaxCrossEntropyLoss | No | 1 - 2 | -- | --> **I checked all ONNX operators with optional outputs. Turns out there are only `BatchNormalization`, `Dropout`, `LayerNormalization` and `MaxPool` has optional outputs for training. All except `LayerNormalization` have models set for training mode and eval mode. Blame ONNX for that.** ## Solution In this pull request, we remove graph outputs if the graph looks like the following: ``` [X] [Scale] [Bias] [X] [Scale] [Bias] \ | / this patch \ | / LayerNormalization -----------> LayerNormalization / | \ | [Y] [Mean] [Stdev] [Y] ``` We can update conformance tests and turn on some cases as well if extending to more layers. Notes: 1. This workaround does not solve expanded function operators if they are fused into a single operator, such as `$onnx/onnx/backend/test/data/node/test_layer_normalization_2d_axis1_expanded`, but they can be run without fusion. Note that either dnn or onnxruntime does not fuse those expanded function operators. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
2023-11-20 16:19:24 +08:00
{"test_layer_normalization_2d_axis0", 3, 1},
{"test_layer_normalization_2d_axis1", 3, 1},
{"test_layer_normalization_2d_axis_negative_1", 3, 1},
{"test_layer_normalization_2d_axis_negative_2", 3, 1},
{"test_layer_normalization_3d_axis0_epsilon", 3, 1},
{"test_layer_normalization_3d_axis1_epsilon", 3, 1},
{"test_layer_normalization_3d_axis2_epsilon", 3, 1},
{"test_layer_normalization_3d_axis_negative_1_epsilon", 3, 1},
{"test_layer_normalization_3d_axis_negative_2_epsilon", 3, 1},
{"test_layer_normalization_3d_axis_negative_3_epsilon", 3, 1},
{"test_layer_normalization_4d_axis0", 3, 1},
{"test_layer_normalization_4d_axis1", 3, 1},
{"test_layer_normalization_4d_axis2", 3, 1},
{"test_layer_normalization_4d_axis3", 3, 1},
{"test_layer_normalization_4d_axis_negative_1", 3, 1},
{"test_layer_normalization_4d_axis_negative_2", 3, 1},
{"test_layer_normalization_4d_axis_negative_3", 3, 1},
{"test_layer_normalization_4d_axis_negative_4", 3, 1},
{"test_layer_normalization_default_axis", 3, 1},
{"test_leakyrelu", 1, 1},
{"test_leakyrelu_default", 1, 1},
{"test_leakyrelu_example", 1, 1},
{"test_less", 2, 1},
{"test_less_bcast", 2, 1},
{"test_less_equal", 2, 1},
{"test_less_equal_bcast", 2, 1},
{"test_less_equal_bcast_expanded", 2, 1},
{"test_less_equal_expanded", 2, 1},
{"test_log", 1, 1},
{"test_log_example", 1, 1},
{"test_logsoftmax_axis_0", 1, 1},
{"test_logsoftmax_axis_0_expanded", 1, 1},
{"test_logsoftmax_axis_1", 1, 1},
{"test_logsoftmax_axis_1_expanded", 1, 1},
{"test_logsoftmax_axis_2", 1, 1},
{"test_logsoftmax_axis_2_expanded", 1, 1},
{"test_logsoftmax_default_axis", 1, 1},
{"test_logsoftmax_default_axis_expanded", 1, 1},
{"test_logsoftmax_example_1", 1, 1},
{"test_logsoftmax_example_1_expanded", 1, 1},
{"test_logsoftmax_large_number", 1, 1},
{"test_logsoftmax_large_number_expanded", 1, 1},
{"test_logsoftmax_negative_axis", 1, 1},
{"test_logsoftmax_negative_axis_expanded", 1, 1},
{"test_loop11", 3, 2},
{"test_loop13_seq", 3, 1},
{"test_loop16_seq_none", 3, 1},
{"test_lrn", 1, 1},
{"test_lrn_default", 1, 1},
{"test_lstm_batchwise", 3, 2},
{"test_lstm_defaults", 3, 1},
{"test_lstm_with_initial_bias", 4, 1},
{"test_lstm_with_peepholes", 8, 1},
{"test_matmul_2d", 2, 1},
{"test_matmul_3d", 2, 1},
{"test_matmul_4d", 2, 1},
{"test_matmulinteger", 4, 1},
{"test_max_example", 3, 1},
{"test_max_float16", 2, 1},
{"test_max_float32", 2, 1},
{"test_max_float64", 2, 1},
{"test_max_int16", 2, 1},
{"test_max_int32", 2, 1},
{"test_max_int64", 2, 1},
{"test_max_int8", 2, 1},
{"test_max_one_input", 1, 1},
{"test_max_two_inputs", 2, 1},
{"test_max_uint16", 2, 1},
{"test_max_uint32", 2, 1},
{"test_max_uint64", 2, 1},
{"test_max_uint8", 2, 1},
{"test_maxpool_1d_default", 1, 1},
{"test_maxpool_2d_ceil", 1, 1},
{"test_maxpool_2d_default", 1, 1},
{"test_maxpool_2d_dilations", 1, 1},
{"test_maxpool_2d_pads", 1, 1},
{"test_maxpool_2d_precomputed_pads", 1, 1},
{"test_maxpool_2d_precomputed_same_upper", 1, 1},
{"test_maxpool_2d_precomputed_strides", 1, 1},
{"test_maxpool_2d_same_lower", 1, 1},
{"test_maxpool_2d_same_upper", 1, 1},
{"test_maxpool_2d_strides", 1, 1},
{"test_maxpool_2d_uint8", 1, 1},
{"test_maxpool_3d_default", 1, 1},
{"test_maxpool_with_argmax_2d_precomputed_pads", 1, 2},
{"test_maxpool_with_argmax_2d_precomputed_strides", 1, 2},
{"test_maxunpool_export_with_output_shape", 3, 1},
{"test_maxunpool_export_without_output_shape", 2, 1},
{"test_mean_example", 3, 1},
{"test_mean_one_input", 1, 1},
{"test_mean_two_inputs", 2, 1},
{"test_min_example", 3, 1},
{"test_min_float16", 2, 1},
{"test_min_float32", 2, 1},
{"test_min_float64", 2, 1},
{"test_min_int16", 2, 1},
{"test_min_int32", 2, 1},
{"test_min_int64", 2, 1},
{"test_min_int8", 2, 1},
{"test_min_one_input", 1, 1},
{"test_min_two_inputs", 2, 1},
{"test_min_uint16", 2, 1},
{"test_min_uint32", 2, 1},
{"test_min_uint64", 2, 1},
{"test_min_uint8", 2, 1},
{"test_mod_broadcast", 2, 1},
{"test_mod_int64_fmod", 2, 1},
{"test_mod_mixed_sign_float16", 2, 1},
{"test_mod_mixed_sign_float32", 2, 1},
{"test_mod_mixed_sign_float64", 2, 1},
{"test_mod_mixed_sign_int16", 2, 1},
{"test_mod_mixed_sign_int32", 2, 1},
{"test_mod_mixed_sign_int64", 2, 1},
{"test_mod_mixed_sign_int8", 2, 1},
{"test_mod_uint16", 2, 1},
{"test_mod_uint32", 2, 1},
{"test_mod_uint64", 2, 1},
{"test_mod_uint8", 2, 1},
{"test_momentum", 5, 2},
{"test_momentum_multiple", 8, 4},
{"test_mul", 2, 1},
{"test_mul_bcast", 2, 1},
{"test_mul_example", 2, 1},
{"test_mul_uint8", 2, 1},
{"test_mvn", 1, 1},
{"test_mvn_expanded", 1, 1},
{"test_neg", 1, 1},
{"test_neg_example", 1, 1},
{"test_nesterov_momentum", 5, 2},
{"test_nllloss_NC", 2, 1},
{"test_nllloss_NC_expanded", 2, 1},
{"test_nllloss_NCd1", 2, 1},
{"test_nllloss_NCd1_expanded", 2, 1},
{"test_nllloss_NCd1_ii", 2, 1},
{"test_nllloss_NCd1_ii_expanded", 2, 1},
{"test_nllloss_NCd1_mean_weight_negative_ii", 3, 1},
{"test_nllloss_NCd1_mean_weight_negative_ii_expanded", 3, 1},
{"test_nllloss_NCd1_weight", 3, 1},
{"test_nllloss_NCd1_weight_expanded", 3, 1},
{"test_nllloss_NCd1_weight_ii", 3, 1},
{"test_nllloss_NCd1_weight_ii_expanded", 3, 1},
{"test_nllloss_NCd1d2", 2, 1},
{"test_nllloss_NCd1d2_expanded", 2, 1},
{"test_nllloss_NCd1d2_no_weight_reduction_mean_ii", 2, 1},
{"test_nllloss_NCd1d2_no_weight_reduction_mean_ii_expanded", 2, 1},
{"test_nllloss_NCd1d2_reduction_mean", 2, 1},
{"test_nllloss_NCd1d2_reduction_mean_expanded", 2, 1},
{"test_nllloss_NCd1d2_reduction_sum", 2, 1},
{"test_nllloss_NCd1d2_reduction_sum_expanded", 2, 1},
{"test_nllloss_NCd1d2_with_weight", 3, 1},
{"test_nllloss_NCd1d2_with_weight_expanded", 3, 1},
{"test_nllloss_NCd1d2_with_weight_reduction_mean", 3, 1},
{"test_nllloss_NCd1d2_with_weight_reduction_mean_expanded", 3, 1},
{"test_nllloss_NCd1d2_with_weight_reduction_sum", 3, 1},
{"test_nllloss_NCd1d2_with_weight_reduction_sum_expanded", 3, 1},
{"test_nllloss_NCd1d2_with_weight_reduction_sum_ii", 3, 1},
{"test_nllloss_NCd1d2_with_weight_reduction_sum_ii_expanded", 3, 1},
{"test_nllloss_NCd1d2d3_none_no_weight_negative_ii", 2, 1},
{"test_nllloss_NCd1d2d3_none_no_weight_negative_ii_expanded", 2, 1},
{"test_nllloss_NCd1d2d3_sum_weight_high_ii", 3, 1},
{"test_nllloss_NCd1d2d3_sum_weight_high_ii_expanded", 3, 1},
{"test_nllloss_NCd1d2d3d4d5_mean_weight", 3, 1},
{"test_nllloss_NCd1d2d3d4d5_mean_weight_expanded", 3, 1},
{"test_nllloss_NCd1d2d3d4d5_none_no_weight", 2, 1},
{"test_nllloss_NCd1d2d3d4d5_none_no_weight_expanded", 2, 1},
{"test_nonmaxsuppression_center_point_box_format", 5, 1},
{"test_nonmaxsuppression_flipped_coordinates", 5, 1},
{"test_nonmaxsuppression_identical_boxes", 5, 1},
{"test_nonmaxsuppression_limit_output_size", 5, 1},
{"test_nonmaxsuppression_single_box", 5, 1},
{"test_nonmaxsuppression_suppress_by_IOU", 5, 1},
{"test_nonmaxsuppression_suppress_by_IOU_and_scores", 5, 1},
{"test_nonmaxsuppression_two_batches", 5, 1},
{"test_nonmaxsuppression_two_classes", 5, 1},
{"test_nonzero_example", 1, 1},
{"test_not_2d", 1, 1},
{"test_not_3d", 1, 1},
{"test_not_4d", 1, 1},
{"test_onehot_negative_indices", 3, 1},
{"test_onehot_with_axis", 3, 1},
{"test_onehot_with_negative_axis", 3, 1},
{"test_onehot_without_axis", 3, 1},
{"test_optional_get_element", 1, 1},
{"test_optional_get_element_sequence", 1, 1},
{"test_optional_has_element", 1, 1},
{"test_optional_has_element_empty", 1, 1},
{"test_or2d", 2, 1},
{"test_or3d", 2, 1},
{"test_or4d", 2, 1},
{"test_or_bcast3v1d", 2, 1},
{"test_or_bcast3v2d", 2, 1},
{"test_or_bcast4v2d", 2, 1},
{"test_or_bcast4v3d", 2, 1},
{"test_or_bcast4v4d", 2, 1},
{"test_pow", 2, 1},
{"test_pow_bcast_array", 2, 1},
{"test_pow_bcast_scalar", 2, 1},
{"test_pow_example", 2, 1},
{"test_pow_types_float", 2, 1},
{"test_pow_types_float32_int32", 2, 1},
{"test_pow_types_float32_int64", 2, 1},
{"test_pow_types_float32_uint32", 2, 1},
{"test_pow_types_float32_uint64", 2, 1},
{"test_pow_types_int", 2, 1},
{"test_pow_types_int32_float32", 2, 1},
{"test_pow_types_int32_int32", 2, 1},
{"test_pow_types_int64_float32", 2, 1},
{"test_pow_types_int64_int64", 2, 1},
{"test_prelu_broadcast", 2, 1},
{"test_prelu_example", 2, 1},
{"test_qlinearconv", 8, 1},
{"test_qlinearmatmul_2D", 8, 1},
{"test_qlinearmatmul_3D", 8, 1},
{"test_quantizelinear", 3, 1},
{"test_quantizelinear_axis", 3, 1},
{"test_range_float_type_positive_delta", 3, 1},
{"test_range_float_type_positive_delta_expanded", 3, 1},
{"test_range_int32_type_negative_delta", 3, 1},
{"test_range_int32_type_negative_delta_expanded", 3, 1},
{"test_reciprocal", 1, 1},
{"test_reciprocal_example", 1, 1},
{"test_reduce_l1_default_axes_keepdims_example", 1, 1},
{"test_reduce_l1_default_axes_keepdims_random", 1, 1},
{"test_reduce_l1_do_not_keepdims_example", 1, 1},
{"test_reduce_l1_do_not_keepdims_random", 1, 1},
{"test_reduce_l1_keep_dims_example", 1, 1},
{"test_reduce_l1_keep_dims_random", 1, 1},
{"test_reduce_l1_negative_axes_keep_dims_example", 1, 1},
{"test_reduce_l1_negative_axes_keep_dims_random", 1, 1},
{"test_reduce_l2_default_axes_keepdims_example", 1, 1},
{"test_reduce_l2_default_axes_keepdims_random", 1, 1},
{"test_reduce_l2_do_not_keepdims_example", 1, 1},
{"test_reduce_l2_do_not_keepdims_random", 1, 1},
{"test_reduce_l2_keep_dims_example", 1, 1},
{"test_reduce_l2_keep_dims_random", 1, 1},
{"test_reduce_l2_negative_axes_keep_dims_example", 1, 1},
{"test_reduce_l2_negative_axes_keep_dims_random", 1, 1},
{"test_reduce_log_sum", 1, 1},
{"test_reduce_log_sum_asc_axes", 1, 1},
{"test_reduce_log_sum_default", 1, 1},
{"test_reduce_log_sum_desc_axes", 1, 1},
{"test_reduce_log_sum_exp_default_axes_keepdims_example", 1, 1},
{"test_reduce_log_sum_exp_default_axes_keepdims_random", 1, 1},
{"test_reduce_log_sum_exp_do_not_keepdims_example", 1, 1},
{"test_reduce_log_sum_exp_do_not_keepdims_random", 1, 1},
{"test_reduce_log_sum_exp_keepdims_example", 1, 1},
{"test_reduce_log_sum_exp_keepdims_random", 1, 1},
{"test_reduce_log_sum_exp_negative_axes_keepdims_example", 1, 1},
{"test_reduce_log_sum_exp_negative_axes_keepdims_random", 1, 1},
{"test_reduce_log_sum_negative_axes", 1, 1},
{"test_reduce_max_default_axes_keepdim_example", 1, 1},
{"test_reduce_max_default_axes_keepdims_random", 1, 1},
{"test_reduce_max_do_not_keepdims_example", 1, 1},
{"test_reduce_max_do_not_keepdims_random", 1, 1},
{"test_reduce_max_keepdims_example", 1, 1},
{"test_reduce_max_keepdims_random", 1, 1},
{"test_reduce_max_negative_axes_keepdims_example", 1, 1},
{"test_reduce_max_negative_axes_keepdims_random", 1, 1},
{"test_reduce_mean_default_axes_keepdims_example", 1, 1},
{"test_reduce_mean_default_axes_keepdims_random", 1, 1},
{"test_reduce_mean_do_not_keepdims_example", 1, 1},
{"test_reduce_mean_do_not_keepdims_random", 1, 1},
{"test_reduce_mean_keepdims_example", 1, 1},
{"test_reduce_mean_keepdims_random", 1, 1},
{"test_reduce_mean_negative_axes_keepdims_example", 1, 1},
{"test_reduce_mean_negative_axes_keepdims_random", 1, 1},
{"test_reduce_min_default_axes_keepdims_example", 1, 1},
{"test_reduce_min_default_axes_keepdims_random", 1, 1},
{"test_reduce_min_do_not_keepdims_example", 1, 1},
{"test_reduce_min_do_not_keepdims_random", 1, 1},
{"test_reduce_min_keepdims_example", 1, 1},
{"test_reduce_min_keepdims_random", 1, 1},
{"test_reduce_min_negative_axes_keepdims_example", 1, 1},
{"test_reduce_min_negative_axes_keepdims_random", 1, 1},
{"test_reduce_prod_default_axes_keepdims_example", 1, 1},
{"test_reduce_prod_default_axes_keepdims_random", 1, 1},
{"test_reduce_prod_do_not_keepdims_example", 1, 1},
{"test_reduce_prod_do_not_keepdims_random", 1, 1},
{"test_reduce_prod_keepdims_example", 1, 1},
{"test_reduce_prod_keepdims_random", 1, 1},
{"test_reduce_prod_negative_axes_keepdims_example", 1, 1},
{"test_reduce_prod_negative_axes_keepdims_random", 1, 1},
{"test_reduce_sum_default_axes_keepdims_example", 2, 1},
{"test_reduce_sum_default_axes_keepdims_random", 2, 1},
{"test_reduce_sum_do_not_keepdims_example", 2, 1},
{"test_reduce_sum_do_not_keepdims_random", 2, 1},
{"test_reduce_sum_empty_axes_input_noop_example", 2, 1},
{"test_reduce_sum_empty_axes_input_noop_random", 2, 1},
{"test_reduce_sum_keepdims_example", 2, 1},
{"test_reduce_sum_keepdims_random", 2, 1},
{"test_reduce_sum_negative_axes_keepdims_example", 2, 1},
{"test_reduce_sum_negative_axes_keepdims_random", 2, 1},
{"test_reduce_sum_square_default_axes_keepdims_example", 1, 1},
{"test_reduce_sum_square_default_axes_keepdims_random", 1, 1},
{"test_reduce_sum_square_do_not_keepdims_example", 1, 1},
{"test_reduce_sum_square_do_not_keepdims_random", 1, 1},
{"test_reduce_sum_square_keepdims_example", 1, 1},
{"test_reduce_sum_square_keepdims_random", 1, 1},
{"test_reduce_sum_square_negative_axes_keepdims_example", 1, 1},
{"test_reduce_sum_square_negative_axes_keepdims_random", 1, 1},
{"test_reflect_pad", 2, 1},
{"test_relu", 1, 1},
{"test_reshape_allowzero_reordered", 2, 1},
{"test_reshape_extended_dims", 2, 1},
{"test_reshape_negative_dim", 2, 1},
{"test_reshape_negative_extended_dims", 2, 1},
{"test_reshape_one_dim", 2, 1},
{"test_reshape_reduced_dims", 2, 1},
{"test_reshape_reordered_all_dims", 2, 1},
{"test_reshape_reordered_last_dims", 2, 1},
{"test_reshape_zero_and_negative_dim", 2, 1},
{"test_reshape_zero_dim", 2, 1},
{"test_resize_downsample_scales_cubic", 2, 1},
{"test_resize_downsample_scales_cubic_A_n0p5_exclude_outside", 2, 1},
{"test_resize_downsample_scales_cubic_align_corners", 2, 1},
{"test_resize_downsample_scales_linear", 2, 1},
{"test_resize_downsample_scales_linear_align_corners", 2, 1},
{"test_resize_downsample_scales_nearest", 2, 1},
{"test_resize_downsample_sizes_cubic", 2, 1},
{"test_resize_downsample_sizes_linear_pytorch_half_pixel", 2, 1},
{"test_resize_downsample_sizes_nearest", 2, 1},
{"test_resize_downsample_sizes_nearest_tf_half_pixel_for_nn", 2, 1},
{"test_resize_tf_crop_and_resize", 3, 1},
{"test_resize_upsample_scales_cubic", 2, 1},
{"test_resize_upsample_scales_cubic_A_n0p5_exclude_outside", 2, 1},
{"test_resize_upsample_scales_cubic_align_corners", 2, 1},
{"test_resize_upsample_scales_cubic_asymmetric", 2, 1},
{"test_resize_upsample_scales_linear", 2, 1},
{"test_resize_upsample_scales_linear_align_corners", 2, 1},
{"test_resize_upsample_scales_nearest", 2, 1},
{"test_resize_upsample_sizes_cubic", 2, 1},
{"test_resize_upsample_sizes_nearest", 2, 1},
{"test_resize_upsample_sizes_nearest_ceil_half_pixel", 2, 1},
{"test_resize_upsample_sizes_nearest_floor_align_corners", 2, 1},
{"test_resize_upsample_sizes_nearest_round_prefer_ceil_asymmetric", 2, 1},
{"test_reversesequence_batch", 2, 1},
{"test_reversesequence_time", 2, 1},
{"test_rnn_seq_length", 4, 1},
{"test_roialign_aligned_false", 3, 1},
{"test_roialign_aligned_true", 3, 1},
{"test_round", 1, 1},
{"test_scan9_sum", 2, 2},
{"test_scan_sum", 2, 2},
{"test_scatter_elements_with_axis", 3, 1},
{"test_scatter_elements_with_duplicate_indices", 3, 1},
{"test_scatter_elements_with_negative_indices", 3, 1},
{"test_scatter_elements_with_reduction_max", 3, 1},
{"test_scatter_elements_with_reduction_min", 3, 1},
{"test_scatter_elements_without_axis", 3, 1},
{"test_scatter_with_axis", 3, 1},
{"test_scatter_without_axis", 3, 1},
{"test_scatternd", 3, 1},
{"test_scatternd_add", 3, 1},
{"test_scatternd_max", 3, 1},
{"test_scatternd_min", 3, 1},
{"test_scatternd_multiply", 3, 1},
{"test_sce_NCd1_mean_weight_negative_ii", 3, 1},
{"test_sce_NCd1_mean_weight_negative_ii_expanded", 3, 1},
{"test_sce_NCd1_mean_weight_negative_ii_log_prob", 3, 2},
{"test_sce_NCd1_mean_weight_negative_ii_log_prob_expanded", 3, 2},
{"test_sce_NCd1d2d3_none_no_weight_negative_ii", 2, 1},
{"test_sce_NCd1d2d3_none_no_weight_negative_ii_expanded", 2, 1},
{"test_sce_NCd1d2d3_none_no_weight_negative_ii_log_prob", 2, 2},
{"test_sce_NCd1d2d3_none_no_weight_negative_ii_log_prob_expanded", 2, 2},
{"test_sce_NCd1d2d3_sum_weight_high_ii", 3, 1},
{"test_sce_NCd1d2d3_sum_weight_high_ii_expanded", 3, 1},
{"test_sce_NCd1d2d3_sum_weight_high_ii_log_prob", 3, 2},
{"test_sce_NCd1d2d3_sum_weight_high_ii_log_prob_expanded", 3, 2},
{"test_sce_NCd1d2d3d4d5_mean_weight", 3, 1},
{"test_sce_NCd1d2d3d4d5_mean_weight_expanded", 3, 1},
{"test_sce_NCd1d2d3d4d5_mean_weight_log_prob", 3, 2},
{"test_sce_NCd1d2d3d4d5_mean_weight_log_prob_expanded", 3, 2},
{"test_sce_NCd1d2d3d4d5_none_no_weight", 2, 1},
{"test_sce_NCd1d2d3d4d5_none_no_weight_expanded", 2, 1},
{"test_sce_NCd1d2d3d4d5_none_no_weight_log_prob", 2, 2},
{"test_sce_NCd1d2d3d4d5_none_no_weight_log_prob_expanded", 2, 2},
{"test_sce_mean", 2, 1},
{"test_sce_mean_3d", 2, 1},
{"test_sce_mean_3d_expanded", 2, 1},
{"test_sce_mean_3d_log_prob", 2, 2},
{"test_sce_mean_3d_log_prob_expanded", 2, 2},
{"test_sce_mean_expanded", 2, 1},
{"test_sce_mean_log_prob", 2, 2},
{"test_sce_mean_log_prob_expanded", 2, 2},
{"test_sce_mean_no_weight_ii", 2, 1},
{"test_sce_mean_no_weight_ii_3d", 2, 1},
{"test_sce_mean_no_weight_ii_3d_expanded", 2, 1},
{"test_sce_mean_no_weight_ii_3d_log_prob", 2, 2},
{"test_sce_mean_no_weight_ii_3d_log_prob_expanded", 2, 2},
{"test_sce_mean_no_weight_ii_4d", 2, 1},
{"test_sce_mean_no_weight_ii_4d_expanded", 2, 1},
{"test_sce_mean_no_weight_ii_4d_log_prob", 2, 2},
{"test_sce_mean_no_weight_ii_4d_log_prob_expanded", 2, 2},
{"test_sce_mean_no_weight_ii_expanded", 2, 1},
{"test_sce_mean_no_weight_ii_log_prob", 2, 2},
{"test_sce_mean_no_weight_ii_log_prob_expanded", 2, 2},
{"test_sce_mean_weight", 3, 1},
{"test_sce_mean_weight_expanded", 3, 1},
{"test_sce_mean_weight_ii", 3, 1},
{"test_sce_mean_weight_ii_3d", 3, 1},
{"test_sce_mean_weight_ii_3d_expanded", 3, 1},
{"test_sce_mean_weight_ii_3d_log_prob", 3, 2},
{"test_sce_mean_weight_ii_3d_log_prob_expanded", 3, 2},
{"test_sce_mean_weight_ii_4d", 3, 1},
{"test_sce_mean_weight_ii_4d_expanded", 3, 1},
{"test_sce_mean_weight_ii_4d_log_prob", 3, 2},
{"test_sce_mean_weight_ii_4d_log_prob_expanded", 3, 2},
{"test_sce_mean_weight_ii_expanded", 3, 1},
{"test_sce_mean_weight_ii_log_prob", 3, 2},
{"test_sce_mean_weight_ii_log_prob_expanded", 3, 2},
{"test_sce_mean_weight_log_prob", 3, 2},
{"test_sce_mean_weight_log_prob_expanded", 3, 2},
{"test_sce_none", 2, 1},
{"test_sce_none_expanded", 2, 1},
{"test_sce_none_log_prob", 2, 2},
{"test_sce_none_log_prob_expanded", 2, 2},
{"test_sce_none_weights", 3, 1},
{"test_sce_none_weights_expanded", 3, 1},
{"test_sce_none_weights_log_prob", 3, 2},
{"test_sce_none_weights_log_prob_expanded", 3, 2},
{"test_sce_sum", 2, 1},
{"test_sce_sum_expanded", 2, 1},
{"test_sce_sum_log_prob", 2, 2},
{"test_sce_sum_log_prob_expanded", 2, 2},
{"test_selu", 1, 1},
{"test_selu_default", 1, 1},
{"test_selu_example", 1, 1},
{"test_sequence_insert_at_back", 2, 1},
{"test_sequence_insert_at_front", 3, 1},
{"test_shape", 1, 1},
{"test_shape_clip_end", 1, 1},
{"test_shape_clip_start", 1, 1},
{"test_shape_end_1", 1, 1},
{"test_shape_end_negative_1", 1, 1},
{"test_shape_example", 1, 1},
{"test_shape_start_1", 1, 1},
{"test_shape_start_1_end_2", 1, 1},
{"test_shape_start_1_end_negative_1", 1, 1},
{"test_shape_start_negative_1", 1, 1},
{"test_shrink_hard", 1, 1},
{"test_shrink_soft", 1, 1},
{"test_sigmoid", 1, 1},
{"test_sigmoid_example", 1, 1},
{"test_sign", 1, 1},
{"test_simple_rnn_batchwise", 3, 2},
{"test_simple_rnn_defaults", 3, 1},
{"test_simple_rnn_with_initial_bias", 4, 1},
{"test_sin", 1, 1},
{"test_sin_example", 1, 1},
{"test_sinh", 1, 1},
{"test_sinh_example", 1, 1},
{"test_size", 1, 1},
{"test_size_example", 1, 1},
{"test_slice", 5, 1},
{"test_slice_default_axes", 3, 1},
{"test_slice_default_steps", 4, 1},
{"test_slice_end_out_of_bounds", 5, 1},
{"test_slice_neg", 5, 1},
{"test_slice_neg_steps", 5, 1},
{"test_slice_negative_axes", 4, 1},
{"test_slice_start_out_of_bounds", 5, 1},
{"test_softmax_axis_0", 1, 1},
{"test_softmax_axis_0_expanded", 1, 1},
{"test_softmax_axis_1", 1, 1},
{"test_softmax_axis_1_expanded", 1, 1},
{"test_softmax_axis_2", 1, 1},
{"test_softmax_axis_2_expanded", 1, 1},
{"test_softmax_default_axis", 1, 1},
{"test_softmax_default_axis_expanded", 1, 1},
{"test_softmax_example", 1, 1},
{"test_softmax_example_expanded", 1, 1},
{"test_softmax_large_number", 1, 1},
{"test_softmax_large_number_expanded", 1, 1},
{"test_softmax_negative_axis", 1, 1},
{"test_softmax_negative_axis_expanded", 1, 1},
{"test_softplus", 1, 1},
{"test_softplus_example", 1, 1},
{"test_softsign", 1, 1},
{"test_softsign_example", 1, 1},
{"test_spacetodepth", 1, 1},
{"test_spacetodepth_example", 1, 1},
{"test_split_equal_parts_1d", 1, 3},
{"test_split_equal_parts_2d", 1, 2},
{"test_split_equal_parts_default_axis", 1, 3},
{"test_split_variable_parts_1d", 2, 2},
{"test_split_variable_parts_2d", 2, 2},
{"test_split_variable_parts_default_axis", 2, 2},
{"test_split_zero_size_splits", 2, 3},
{"test_sqrt", 1, 1},
{"test_sqrt_example", 1, 1},
{"test_squeeze", 2, 1},
{"test_squeeze_negative_axes", 2, 1},
{"test_strnormalizer_export_monday_casesensintive_lower", 1, 1},
{"test_strnormalizer_export_monday_casesensintive_nochangecase", 1, 1},
{"test_strnormalizer_export_monday_casesensintive_upper", 1, 1},
{"test_strnormalizer_export_monday_empty_output", 1, 1},
{"test_strnormalizer_export_monday_insensintive_upper_twodim", 1, 1},
{"test_strnormalizer_nostopwords_nochangecase", 1, 1},
{"test_sub", 2, 1},
{"test_sub_bcast", 2, 1},
{"test_sub_example", 2, 1},
{"test_sub_uint8", 2, 1},
{"test_sum_example", 3, 1},
{"test_sum_one_input", 1, 1},
{"test_sum_two_inputs", 2, 1},
{"test_tan", 1, 1},
{"test_tan_example", 1, 1},
{"test_tanh", 1, 1},
{"test_tanh_example", 1, 1},
{"test_tfidfvectorizer_tf_batch_onlybigrams_skip0", 1, 1},
{"test_tfidfvectorizer_tf_batch_onlybigrams_skip5", 1, 1},
{"test_tfidfvectorizer_tf_batch_uniandbigrams_skip5", 1, 1},
{"test_tfidfvectorizer_tf_only_bigrams_skip0", 1, 1},
{"test_tfidfvectorizer_tf_onlybigrams_levelempty", 1, 1},
{"test_tfidfvectorizer_tf_onlybigrams_skip5", 1, 1},
{"test_tfidfvectorizer_tf_uniandbigrams_skip5", 1, 1},
{"test_thresholdedrelu", 1, 1},
{"test_thresholdedrelu_default", 1, 1},
{"test_thresholdedrelu_example", 1, 1},
{"test_tile", 2, 1},
{"test_tile_precomputed", 2, 1},
{"test_top_k", 2, 2},
{"test_top_k_negative_axis", 2, 2},
{"test_top_k_smallest", 2, 2},
{"test_training_dropout", 3, 1},
{"test_training_dropout_default", 3, 1},
{"test_training_dropout_default_mask", 3, 2},
{"test_training_dropout_mask", 3, 2},
{"test_training_dropout_zero_ratio", 3, 1},
{"test_training_dropout_zero_ratio_mask", 3, 2},
{"test_transpose_all_permutations_0", 1, 1},
{"test_transpose_all_permutations_1", 1, 1},
{"test_transpose_all_permutations_2", 1, 1},
{"test_transpose_all_permutations_3", 1, 1},
{"test_transpose_all_permutations_4", 1, 1},
{"test_transpose_all_permutations_5", 1, 1},
{"test_transpose_default", 1, 1},
{"test_tril", 1, 1},
{"test_tril_neg", 2, 1},
{"test_tril_one_row_neg", 1, 1},
{"test_tril_out_neg", 2, 1},
{"test_tril_out_pos", 2, 1},
{"test_tril_pos", 2, 1},
{"test_tril_square", 1, 1},
{"test_tril_square_neg", 2, 1},
{"test_tril_zero", 2, 1},
{"test_triu", 1, 1},
{"test_triu_neg", 2, 1},
{"test_triu_one_row", 2, 1},
{"test_triu_out_neg_out", 2, 1},
{"test_triu_out_pos", 2, 1},
{"test_triu_pos", 2, 1},
{"test_triu_square", 1, 1},
{"test_triu_square_neg", 2, 1},
{"test_triu_zero", 2, 1},
{"test_unique_not_sorted_without_axis", 1, 4},
{"test_unique_sorted_with_axis", 1, 4},
{"test_unique_sorted_with_axis_3d", 1, 4},
{"test_unique_sorted_with_negative_axis", 1, 4},
{"test_unique_sorted_without_axis", 1, 4},
{"test_unsqueeze_axis_0", 2, 1},
{"test_unsqueeze_axis_1", 2, 1},
{"test_unsqueeze_axis_2", 2, 1},
{"test_unsqueeze_axis_3", 1, 1},
{"test_unsqueeze_negative_axes", 2, 1},
{"test_unsqueeze_three_axes", 2, 1},
{"test_unsqueeze_two_axes", 2, 1},
{"test_unsqueeze_unsorted_axes", 2, 1},
{"test_upsample_nearest", 2, 1},
{"test_where_example", 3, 1},
{"test_where_long_example", 3, 1},
{"test_xor2d", 2, 1},
{"test_xor3d", 2, 1},
{"test_xor4d", 2, 1},
{"test_xor_bcast3v1d", 2, 1},
{"test_xor_bcast3v2d", 2, 1},
{"test_xor_bcast4v2d", 2, 1},
{"test_xor_bcast4v3d", 2, 1},
{"test_xor_bcast4v4d", 2, 1},
};
std::ostream& operator<<(std::ostream& os, const TestCase& test_case)
{
return os << test_case.name;
}
typedef tuple<TestCase, tuple<Backend, Target> > ONNXConfParams;
std::string printOnnxConfParams(const testing::TestParamInfo<ONNXConfParams>& params)
{
TestCase test_case = get<0>(params.param);
Backend backend = get<0>(get<1>(params.param));
Target target = get<1>(get<1>(params.param));
std::stringstream ss;
ss << test_case.name << "_";
PrintTo(backend, &ss);
ss << "_";
PrintTo(target, &ss);
return ss.str();
}
class Test_ONNX_conformance : public TestWithParam<ONNXConfParams>
{
public:
TestCase test_case;
Backend backend;
Target target;
double default_l1;
double default_lInf;
static std::set<std::string> parser_deny_list;
static std::set<std::string> global_deny_list;
static std::set<std::string> opencv_deny_list;
static std::set<std::string> opencl_fp16_deny_list;
static std::set<std::string> opencl_deny_list;
static std::set<std::string> cpu_deny_list;
#ifdef HAVE_HALIDE
static std::set<std::string> halide_deny_list;
#endif
#ifdef HAVE_VULKAN
static std::set<std::string> vulkan_deny_list;
#endif
#ifdef HAVE_CUDA
static std::set<std::string> cuda_deny_list;
#endif
Test_ONNX_conformance()
{
test_case = get<0>(GetParam());
backend = get<0>(get<1>(GetParam()));
target = get<1>(get<1>(GetParam()));
Merge pull request #22275 from zihaomu:fp16_support_conv DNN: FP16 support on Convolution 2D #22275 ## FP16 support on ARM platform This PR proposes to support FP16 backend in Convolution. For now, we only support FP16 at ARM aarch64. In addition to adding fp16, I also added `seperateIm2col` optimization in this patch. ## How to use FP16 to speed up convolution? ``` Net net = readNet(modelPath); net.setPreferableTarget(DNN_TARGET_CPU_FP16); net.setInput(blob); Mat output = net.forward(); ``` ### TODO List | Task | Status | Remarks | |:-------:|:--------:|:------------:| | Convolution 2D FP16 | :heavy_check_mark: | Done | | Winograd FP16 | Because the current modification has reached 2k lines, winograd fp16 will be completed in the next PR. | | | Accuracy Test | :heavy_check_mark: | Done | | Performance Test | :heavy_check_mark: | Done | | Compiler bug | :heavy_check_mark: | Done | ### Speed Test for FP 16. **Test on M1 chip, 4 threads.** | Model Name | FP32 (Conv+Wino) | Conv(FP16) + Wino(FP 32) | |:-------:|:--------:|:------------:| | ReseNet 50 | 26.0 ms | **18.05 ms** (25% speed up)| | MobileNet V2 | 4.17 ms | **3.09 ms (29% speed up)** | ### Speed Test for `seperateIm2col` trick on X86. **Test on AMD 5600x, 12 threads.** | Model Name | 4.x | Patch | |:-------:|:--------:|:------------:| | MobileNet V2 | 5.6 ms | **3.0 ms (46% speed up)** | ### Performance Test #### Performance Test of X86 platform: AMD 5600X, with `-perf_threas=1` |Name of Test|4.x|patch|patch vs 4.x (x-factor)| |---|:-:|:-:|:-:| |Name of Test|4.x 0|fp16pr final|fp16pr final vs 4.x 0 (x-factor)| |---|:-:|:-:|:-:| |conv1d::Conv1D::(GFLOPS=0.000, K=[3], IN={1, 2, 19}, OCN=2, G=2, S=2, P=(1, 1), BIAS, OCV/CPU)|0.001|0.001|1.00| |conv1d::Conv1D::(GFLOPS=0.000, K=[3], IN={1, 2, 25}, OCN=2, G=2, P=(2, 2), PM=SAME, OCV/CPU)|0.001|0.001|1.03| |conv1d::Conv1D::(GFLOPS=0.000, K=[3], IN={1, 6, 10}, OCN=6, PM=VALID, BIAS, OCV/CPU)|0.001|0.001|0.92| |conv3d::Conv3D::(GFLOPS=0.000, K=[1 x 1 x 1], IN={1, 4, 9, 10, 10}, OCN=4, S=[1 x 1 x 2], P=(1, 1) x (1, 1) x (1, 1), PM=VALID, OCV/CPU)|0.002|0.003|0.95| |conv3d::Conv3D::(GFLOPS=0.000, K=[1 x 1 x 1], IN={1, 8, 1, 10, 10}, OCN=8, G=8, P=(1, 1) x (1, 1) x (1, 1), BIAS, OCV/CPU)|0.006|0.006|1.00| |conv3d::Conv3D::(GFLOPS=0.000, K=[3 x 3 x 3], IN={1, 2, 19, 19, 19}, OCN=2, G=2, S=[2 x 2 x 2], P=(1, 1) x (1, 1) x (1, 1), BIAS, OCV/CPU)|0.045|0.033|1.39| |conv3d::Conv3D::(GFLOPS=0.000, K=[3 x 4 x 2], IN={1, 4, 8, 10, 10}, OCN=4, G=4, S=[1 x 2 x 1], BIAS, OCV/CPU)|0.011|0.009|1.17| |conv3d::Conv3D::(GFLOPS=0.001, K=[3 x 3 x 3], IN={1, 2, 25, 19, 19}, OCN=2, G=2, S=[1 x 2 x 2], P=(2, 2) x (2, 2) x (2, 2), PM=SAME, OCV/CPU)|0.109|0.078|1.39| |conv3d::Conv3D::(GFLOPS=0.002, K=[3 x 1 x 4], IN={1, 14, 5, 10, 10}, OCN=14, PM=SAME, OCV/CPU)|0.040|0.042|0.94| |conv3d::Conv3D::(GFLOPS=0.006, K=[5 x 5 x 5], IN={1, 4, 50, 19, 19}, OCN=4, S=[2 x 2 x 2], P=(1, 1) x (1, 1) x (1, 1), PM=VALID, OCV/CPU)|0.326|0.342|0.95| |conv3d::Conv3D::(GFLOPS=0.027, K=[3 x 3 x 3], IN={1, 6, 10, 38, 50}, OCN=6, PM=VALID, BIAS, OCV/CPU)|0.580|0.589|0.99| |conv3d::Conv3D::(GFLOPS=0.030, K=[5 x 5 x 5], IN={1, 6, 19, 19, 19}, OCN=6, G=2, OCV/CPU)|1.293|1.382|0.94| |conv3d::Conv3D::(GFLOPS=0.045, K=[7 x 7 x 7], IN={1, 2, 38, 38, 38}, OCN=2, S=[1 x 2 x 1], OCV/CPU)|3.590|3.710|0.97| |conv3d::Conv3D::(GFLOPS=0.053, K=[3 x 3 x 3], IN={1, 10, 98, 10, 10}, OCN=10, PM=SAME, OCV/CPU)|1.120|1.191|0.94| |conv3d::Conv3D::(GFLOPS=0.071, K=[7 x 7 x 7], IN={1, 6, 15, 19, 19}, OCN=6, S=[2 x 1 x 1], P=(3, 3) x (3, 3) x (3, 3), PM=SAME, BIAS, OCV/CPU)|2.576|2.872|0.90| |conv3d::Conv3D::(GFLOPS=0.093, K=[5 x 5 x 5], IN={1, 4, 40, 75, 75}, OCN=4, S=[2 x 2 x 2], OCV/CPU)|4.599|4.670|0.98| |conv3d::Conv3D::(GFLOPS=0.116, K=[5 x 5 x 5], IN={1, 2, 21, 75, 100}, OCN=2, BIAS, OCV/CPU)|9.230|9.582|0.96| |conv3d::Conv3D::(GFLOPS=1.267, K=[5 x 5 x 5], IN={1, 3, 75, 75, 100}, OCN=3, PM=SAME, BIAS, OCV/CPU)|65.946|69.381|0.95| |conv3d::Conv3D::(GFLOPS=1.343, K=[3 x 3 x 3], IN={1, 11, 9, 150, 200}, OCN=11, PM=VALID, BIAS, OCV/CPU)|18.915|19.289|0.98| |conv::Conv::(GFLOPS=0.177, K=[1 x 1], IN={1, 512, 26, 26}, OCN=256, OCV/CPU)|1.404|1.457|0.96| |conv::Conv::(GFLOPS=0.177, K=[1 x 1], IN={1, 1024, 13, 13}, OCN=512, OCV/CPU)|2.060|1.501|1.37| |conv::Conv::(GFLOPS=0.178, K=[1 x 1], IN={1, 256, 52, 52}, OCN=128, OCV/CPU)|1.409|1.464|0.96| |conv::Conv::(GFLOPS=0.210, K=[1 x 1], IN={1, 576, 38, 50}, OCN=96, PM=SAME, BIAS, OCV/CPU)|1.793|1.838|0.98| |conv::Conv::(GFLOPS=0.231, K=[3 x 3], IN={1, 128, 56, 56}, OCN=32, P=[1 x 1], OCV/CPU)|1.207|1.199|1.01| |conv::Conv::(GFLOPS=0.231, K=[3 x 3], IN={1, 256, 14, 14}, OCN=256, P=[1 x 1], OCV/CPU)|1.277|1.275|1.00| |conv::Conv::(GFLOPS=0.280, K=[1 x 1], IN={1, 576, 38, 50}, OCN=128, PM=SAME, BIAS, OCV/CPU)|2.319|2.370|0.98| |conv::Conv::(GFLOPS=0.302, K=[3 x 3], IN={1, 64, 64, 64}, OCN=64, PM=SAME, OCV/CPU)|1.351|1.346|1.00| |conv::Conv::(GFLOPS=0.357, K=[1 x 1], IN={1, 64, 208, 208}, OCN=64, OCV/CPU)|3.520|3.612|0.97| |conv::Conv::(GFLOPS=0.420, K=[3 x 3], IN={1, 96, 38, 50}, OCN=128, PM=SAME, BIAS, OCV/CPU)|1.876|1.880|1.00| |conv::Conv::(GFLOPS=0.472, K=[3 x 3], IN={1, 128, 40, 40}, OCN=128, PM=SAME, OCV/CPU)|1.981|1.995|0.99| |conv::Conv::(GFLOPS=0.472, K=[3 x 3], IN={1, 256, 20, 20}, OCN=256, PM=SAME, OCV/CPU)|2.620|2.627|1.00| |conv::Conv::(GFLOPS=0.472, K=[3 x 3], IN={1, 512, 10, 10}, OCN=512, PM=SAME, OCV/CPU)|4.202|4.123|1.02| |conv::Conv::(GFLOPS=0.561, K=[3 x 3], IN={1, 128, 38, 50}, OCN=128, PM=SAME, BIAS, OCV/CPU)|2.429|2.445|0.99| |conv::Conv::(GFLOPS=0.624, K=[3 x 3], IN={1, 128, 46, 46}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|2.591|2.576|1.01| |conv::Conv::(GFLOPS=0.701, K=[3 x 3], IN={1, 128, 38, 50}, OCN=160, PM=SAME, BIAS, OCV/CPU)|3.005|2.998|1.00| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 64, 104, 104}, OCN=64, P=[1 x 1], OCV/CPU)|3.515|3.532|1.00| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 128, 52, 52}, OCN=128, P=[1 x 1], OCV/CPU)|3.115|3.134|0.99| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 256, 26, 26}, OCN=256, P=[1 x 1], OCV/CPU)|3.937|3.899|1.01| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 512, 13, 13}, OCN=512, P=[1 x 1], OCV/CPU)|5.533|5.471|1.01| |conv::Conv::(GFLOPS=0.830, K=[3 x 3], IN={1, 64, 75, 100}, OCN=96, PM=SAME, BIAS, OCV/CPU)|3.472|3.464|1.00| |conv::Conv::(GFLOPS=0.958, K=[3 x 3], IN={1, 192, 38, 38}, OCN=192, PM=SAME, OCV/CPU)|4.302|4.322|1.00| |conv::Conv::(GFLOPS=0.958, K=[3 x 3], IN={1, 384, 19, 19}, OCN=384, PM=SAME, OCV/CPU)|6.100|6.035|1.01| |conv::Conv::(GFLOPS=1.022, K=[3 x 3], IN={1, 576, 19, 19}, OCN=273, PM=SAME, BIAS, OCV/CPU)|6.580|6.484|1.01| |conv::Conv::(GFLOPS=1.112, K=[3 x 3], IN={1, 512, 10, 10}, OCN=1206, P=[1 x 1], BIAS, OCV/CPU)|9.741|9.634|1.01| |conv::Conv::(GFLOPS=1.181, K=[3 x 3], IN={1, 64, 160, 200}, OCN=128, S=[2 x 2], P=[1 x 1], BIAS, OCV/CPU)|10.131|10.156|1.00| |conv::Conv::(GFLOPS=1.182, K=[3 x 3], IN={1, 32, 320, 400}, OCN=64, S=[2 x 2], P=[1 x 1], BIAS, OCV/CPU)|12.391|12.350|1.00| |conv::Conv::(GFLOPS=1.195, K=[9 x 9], IN={1, 32, 240, 320}, OCN=3, P=[4 x 4], BIAS, OCV/CPU)|91.074|87.893|1.04| |conv::Conv::(GFLOPS=1.196, K=[3 x 3], IN={1, 384, 26, 26}, OCN=256, P=[1 x 1], OCV/CPU)|5.903|5.903|1.00| |conv::Conv::(GFLOPS=1.210, K=[3 x 3], IN={1, 32, 256, 256}, OCN=32, PM=SAME, OCV/CPU)|6.890|6.794|1.01| |conv::Conv::(GFLOPS=1.245, K=[3 x 3], IN={1, 64, 75, 75}, OCN=192, PM=SAME, BIAS, OCV/CPU)|5.160|5.131|1.01| |conv::Conv::(GFLOPS=1.245, K=[3 x 3], IN={1, 96, 75, 100}, OCN=96, PM=SAME, BIAS, OCV/CPU)|4.970|5.036|0.99| |conv::Conv::(GFLOPS=1.248, K=[3 x 3], IN={1, 256, 46, 46}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|5.045|5.015|1.01| |conv::Conv::(GFLOPS=1.258, K=[3 x 3], IN={1, 1280, 10, 10}, OCN=546, PM=SAME, BIAS, OCV/CPU)|11.583|11.343|1.02| |conv::Conv::(GFLOPS=1.261, K=[3 x 3], IN={1, 192, 38, 50}, OCN=192, PM=SAME, BIAS, OCV/CPU)|5.348|5.320|1.01| |conv::Conv::(GFLOPS=1.416, K=[3 x 3], IN={1, 128, 62, 82}, OCN=128, BIAS, OCV/CPU)|5.357|5.396|0.99| |conv::Conv::(GFLOPS=1.500, K=[3 x 3], IN={1, 128, 64, 84}, OCN=128, BIAS, OCV/CPU)|6.050|6.006|1.01| |conv::Conv::(GFLOPS=1.586, K=[3 x 3], IN={1, 128, 66, 86}, OCN=128, BIAS, OCV/CPU)|5.952|5.953|1.00| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 256, 26, 26}, OCN=512, P=[1 x 1], OCV/CPU)|8.014|8.014|1.00| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 256, 52, 52}, OCN=512, S=[2 x 2], P=[1 x 1], OCV/CPU)|12.472|12.577|0.99| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 512, 13, 13}, OCN=1024, P=[1 x 1], OCV/CPU)|10.803|10.655|1.01| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 512, 26, 26}, OCN=1024, S=[2 x 2], P=[1 x 1], OCV/CPU)|18.429|13.405|1.37| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 64, 104, 104}, OCN=128, P=[1 x 1], OCV/CPU)|6.659|6.647|1.00| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 64, 208, 208}, OCN=128, S=[2 x 2], P=[1 x 1], OCV/CPU)|14.192|13.819|1.03| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 128, 52, 52}, OCN=256, P=[1 x 1], OCV/CPU)|6.045|6.068|1.00| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 128, 104, 104}, OCN=256, S=[2 x 2], P=[1 x 1], OCV/CPU)|12.742|12.828|0.99| |conv::Conv::(GFLOPS=1.598, K=[3 x 3], IN={1, 32, 208, 208}, OCN=64, P=[1 x 1], OCV/CPU)|8.046|7.773|1.04| |conv::Conv::(GFLOPS=1.598, K=[3 x 3], IN={1, 32, 416, 416}, OCN=64, S=[2 x 2], P=[1 x 1], OCV/CPU)|17.440|17.192|1.01| |conv::Conv::(GFLOPS=1.659, K=[3 x 3], IN={1, 960, 10, 10}, OCN=960, PM=SAME, OCV/CPU)|15.418|14.972|1.03| |conv::Conv::(GFLOPS=1.660, K=[3 x 3], IN={1, 128, 75, 75}, OCN=128, G=128, P=[1 x 1], BIAS, OCV/CPU)|0.430|0.430|1.00| |conv::Conv::(GFLOPS=1.660, K=[3 x 3], IN={1, 128, 75, 75}, OCN=128, PM=SAME, OCV/CPU)|6.692|6.663|1.00| |conv::Conv::(GFLOPS=1.675, K=[3 x 3], IN={1, 128, 68, 88}, OCN=128, BIAS, OCV/CPU)|6.350|6.347|1.00| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 256, 38, 38}, OCN=256, G=256, P=[1 x 1], BIAS, OCV/CPU)|0.267|0.265|1.01| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 256, 38, 38}, OCN=256, PM=SAME, OCV/CPU)|7.755|7.558|1.03| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 512, 19, 19}, OCN=512, G=512, P=[1 x 1], BIAS, OCV/CPU)|0.203|0.202|1.00| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 512, 19, 19}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|10.663|10.576|1.01| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 512, 19, 19}, OCN=512, PM=SAME, OCV/CPU)|10.827|10.614|1.02| |conv::Conv::(GFLOPS=1.766, K=[3 x 3], IN={1, 128, 70, 90}, OCN=128, BIAS, OCV/CPU)|7.049|6.947|1.01| |conv::Conv::(GFLOPS=1.859, K=[3 x 3], IN={1, 128, 72, 92}, OCN=128, BIAS, OCV/CPU)|6.900|6.901|1.00| |conv::Conv::(GFLOPS=1.888, K=[3 x 3], IN={1, 1024, 10, 10}, OCN=1024, G=1024, P=[1 x 1], BIAS, OCV/CPU)|0.165|0.165|1.00| |conv::Conv::(GFLOPS=1.888, K=[3 x 3], IN={1, 1024, 10, 10}, OCN=1024, PM=SAME, OCV/CPU)|17.953|17.251|1.04| |conv::Conv::(GFLOPS=1.954, K=[3 x 3], IN={1, 128, 74, 94}, OCN=128, BIAS, OCV/CPU)|7.430|7.320|1.01| |conv::Conv::(GFLOPS=1.995, K=[9 x 9], IN={1, 3, 320, 400}, OCN=32, P=[4 x 4], BIAS, OCV/CPU)|22.187|21.705|1.02| |conv::Conv::(GFLOPS=2.052, K=[3 x 3], IN={1, 128, 76, 96}, OCN=128, BIAS, OCV/CPU)|8.349|8.126|1.03| |conv::Conv::(GFLOPS=2.100, K=[3 x 3], IN={1, 144, 75, 75}, OCN=144, PM=SAME, OCV/CPU)|8.273|8.297|1.00| |conv::Conv::(GFLOPS=2.153, K=[3 x 3], IN={1, 128, 78, 98}, OCN=128, BIAS, OCV/CPU)|8.169|8.094|1.01| |conv::Conv::(GFLOPS=2.156, K=[3 x 3], IN={1, 576, 19, 19}, OCN=576, PM=SAME, OCV/CPU)|13.602|13.359|1.02| |conv::Conv::(GFLOPS=2.255, K=[3 x 3], IN={1, 128, 80, 100}, OCN=128, BIAS, OCV/CPU)|8.633|8.584|1.01| |conv::Conv::(GFLOPS=2.719, K=[3 x 3], IN={1, 96, 256, 256}, OCN=96, S=[2 x 2], PM=SAME, OCV/CPU)|29.339|28.897|1.02| |conv::Conv::(GFLOPS=3.319, K=[3 x 3], IN={1, 128, 75, 75}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|13.000|12.920|1.01| |conv::Conv::(GFLOPS=3.321, K=[3 x 3], IN={1, 64, 150, 150}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|14.262|13.319|1.07| |conv::Conv::(GFLOPS=3.398, K=[7 x 7], IN={1, 128, 46, 46}, OCN=128, P=[3 x 3], BIAS, OCV/CPU)|27.453|27.253|1.01| |conv::Conv::(GFLOPS=3.407, K=[3 x 3], IN={1, 512, 19, 19}, OCN=1024, D=[6 x 6], P=[6 x 6], BIAS, OCV/CPU)|32.052|27.269|1.18| |conv::Conv::(GFLOPS=3.408, K=[3 x 3], IN={1, 256, 38, 38}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|15.363|15.208|1.01| |conv::Conv::(GFLOPS=4.247, K=[3 x 3], IN={1, 480, 32, 32}, OCN=480, PM=SAME, OCV/CPU)|18.543|18.434|1.01| |conv::Conv::(GFLOPS=4.247, K=[5 x 5], IN={1, 144, 128, 128}, OCN=144, S=[2 x 2], PM=SAME, OCV/CPU)|39.114|37.954|1.03| |conv::Conv::(GFLOPS=4.566, K=[7 x 7], IN={1, 172, 46, 46}, OCN=128, P=[3 x 3], BIAS, OCV/CPU)|36.271|36.972|0.98| |conv::Conv::(GFLOPS=4.993, K=[3 x 3], IN={1, 256, 46, 46}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|19.262|19.427|0.99| |conv::Conv::(GFLOPS=4.993, K=[3 x 3], IN={1, 512, 46, 46}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|19.298|19.349|1.00| |conv::Conv::(GFLOPS=4.994, K=[3 x 3], IN={1, 128, 92, 92}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|20.261|19.847|1.02| |conv::Conv::(GFLOPS=4.997, K=[3 x 3], IN={1, 64, 184, 184}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|21.867|21.525|1.02| |conv::Conv::(GFLOPS=5.780, K=[5 x 5], IN={1, 672, 32, 32}, OCN=672, S=[2 x 2], PM=SAME, OCV/CPU)|51.756|49.979|1.04| |conv::Conv::(GFLOPS=6.116, K=[3 x 3], IN={1, 1152, 16, 16}, OCN=1152, PM=SAME, OCV/CPU)|28.133|27.060|1.04| |conv::Conv::(GFLOPS=6.118, K=[3 x 3], IN={1, 144, 128, 128}, OCN=144, PM=SAME, OCV/CPU)|25.035|24.980|1.00| |conv::Conv::(GFLOPS=6.637, K=[3 x 3], IN={1, 256, 75, 75}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|25.858|25.821|1.00| |conv::Conv::(GFLOPS=6.638, K=[3 x 3], IN={1, 128, 150, 150}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|27.313|27.149|1.01| |conv::Conv::(GFLOPS=6.641, K=[3 x 3], IN={1, 64, 150, 200}, OCN=192, PM=SAME, BIAS, OCV/CPU)|28.219|28.111|1.00| |conv::Conv::(GFLOPS=6.641, K=[3 x 3], IN={1, 64, 300, 300}, OCN=64, P=[1 x 1], BIAS, OCV/CPU)|46.025|46.674|0.99| |conv::Conv::(GFLOPS=6.814, K=[3 x 3], IN={1, 512, 38, 38}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|30.220|29.446|1.03| |conv::Conv::(GFLOPS=8.025, K=[3 x 3], IN={1, 1024, 19, 19}, OCN=1206, P=[1 x 1], BIAS, OCV/CPU)|49.410|48.708|1.01| |conv::Conv::(GFLOPS=9.986, K=[3 x 3], IN={1, 512, 46, 46}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|38.203|38.001|1.01| |conv::Conv::(GFLOPS=9.987, K=[3 x 3], IN={1, 256, 92, 92}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|39.961|39.021|1.02| |conv::Conv::(GFLOPS=9.989, K=[3 x 3], IN={1, 128, 184, 184}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|48.685|47.075|1.03| |conv::Conv::(GFLOPS=9.993, K=[3 x 3], IN={1, 64, 368, 368}, OCN=64, P=[1 x 1], BIAS, OCV/CPU)|75.114|72.586|1.03| |conv::Conv::(GFLOPS=10.087, K=[3 x 3], IN={1, 576, 38, 50}, OCN=512, PM=SAME, BIAS, OCV/CPU)|41.222|41.144|1.00| |conv::Conv::(GFLOPS=10.701, K=[3 x 3], IN={1, 512, 38, 38}, OCN=804, P=[1 x 1], BIAS, OCV/CPU)|46.220|46.353|1.00| |conv::Conv::(GFLOPS=11.797, K=[5 x 5], IN={1, 240, 64, 64}, OCN=240, PM=SAME, OCV/CPU)|98.201|98.771|0.99| |conv::Conv::(GFLOPS=11.797, K=[5 x 5], IN={1, 480, 32, 32}, OCN=480, PM=SAME, OCV/CPU)|100.106|96.971|1.03| |conv::Conv::(GFLOPS=16.987, K=[5 x 5], IN={1, 1152, 16, 16}, OCN=1152, PM=SAME, OCV/CPU)|146.977|140.445|1.05| |conv::Conv::(GFLOPS=23.122, K=[5 x 5], IN={1, 672, 32, 32}, OCN=672, PM=SAME, OCV/CPU)|198.618|194.665|1.02| #### Performance Test of ARM platform: apple M1, with `-perf_threas=1` Min (ms) |Name of Test|4.x|patch|4.x vs patch (x-factor)| |---|:-:|:-:|:-:| |conv1d::Conv1D::(GFLOPS=0.000, K=[3], IN={1, 2, 19}, OCN=2, G=2, S=2, P=(1, 1), BIAS, OCV/CPU)|0.001|0.001|1.07| |conv1d::Conv1D::(GFLOPS=0.000, K=[3], IN={1, 2, 25}, OCN=2, G=2, P=(2, 2), PM=SAME, OCV/CPU)|0.001|0.001|1.10| |conv1d::Conv1D::(GFLOPS=0.000, K=[3], IN={1, 6, 10}, OCN=6, PM=VALID, BIAS, OCV/CPU)|0.002|0.002|0.97| |conv3d::Conv3D::(GFLOPS=0.000, K=[1 x 1 x 1], IN={1, 4, 9, 10, 10}, OCN=4, S=[1 x 1 x 2], P=(1, 1) x (1, 1) x (1, 1), PM=VALID, OCV/CPU)|0.003|0.003|0.84| |conv3d::Conv3D::(GFLOPS=0.000, K=[1 x 1 x 1], IN={1, 8, 1, 10, 10}, OCN=8, G=8, P=(1, 1) x (1, 1) x (1, 1), BIAS, OCV/CPU)|0.009|0.009|1.00| |conv3d::Conv3D::(GFLOPS=0.000, K=[3 x 3 x 3], IN={1, 2, 19, 19, 19}, OCN=2, G=2, S=[2 x 2 x 2], P=(1, 1) x (1, 1) x (1, 1), BIAS, OCV/CPU)|0.027|0.030|0.90| |conv3d::Conv3D::(GFLOPS=0.000, K=[3 x 4 x 2], IN={1, 4, 8, 10, 10}, OCN=4, G=4, S=[1 x 2 x 1], BIAS, OCV/CPU)|0.008|0.007|1.07| |conv3d::Conv3D::(GFLOPS=0.001, K=[3 x 3 x 3], IN={1, 2, 25, 19, 19}, OCN=2, G=2, S=[1 x 2 x 2], P=(2, 2) x (2, 2) x (2, 2), PM=SAME, OCV/CPU)|0.066|0.072|0.91| |conv3d::Conv3D::(GFLOPS=0.002, K=[3 x 1 x 4], IN={1, 14, 5, 10, 10}, OCN=14, PM=SAME, OCV/CPU)|0.090|0.054|1.68| |conv3d::Conv3D::(GFLOPS=0.006, K=[5 x 5 x 5], IN={1, 4, 50, 19, 19}, OCN=4, S=[2 x 2 x 2], P=(1, 1) x (1, 1) x (1, 1), PM=VALID, OCV/CPU)|0.328|0.409|0.80| |conv3d::Conv3D::(GFLOPS=0.027, K=[3 x 3 x 3], IN={1, 6, 10, 38, 50}, OCN=6, PM=VALID, BIAS, OCV/CPU)|0.659|0.697|0.95| |conv3d::Conv3D::(GFLOPS=0.030, K=[5 x 5 x 5], IN={1, 6, 19, 19, 19}, OCN=6, G=2, OCV/CPU)|1.266|1.403|0.90| |conv3d::Conv3D::(GFLOPS=0.045, K=[7 x 7 x 7], IN={1, 2, 38, 38, 38}, OCN=2, S=[1 x 2 x 1], OCV/CPU)|3.550|4.145|0.86| |conv3d::Conv3D::(GFLOPS=0.053, K=[3 x 3 x 3], IN={1, 10, 98, 10, 10}, OCN=10, PM=SAME, OCV/CPU)|1.188|1.375|0.86| |conv3d::Conv3D::(GFLOPS=0.071, K=[7 x 7 x 7], IN={1, 6, 15, 19, 19}, OCN=6, S=[2 x 1 x 1], P=(3, 3) x (3, 3) x (3, 3), PM=SAME, BIAS, OCV/CPU)|2.683|3.236|0.83| |conv3d::Conv3D::(GFLOPS=0.093, K=[5 x 5 x 5], IN={1, 4, 40, 75, 75}, OCN=4, S=[2 x 2 x 2], OCV/CPU)|4.491|5.501|0.82| |conv3d::Conv3D::(GFLOPS=0.116, K=[5 x 5 x 5], IN={1, 2, 21, 75, 100}, OCN=2, BIAS, OCV/CPU)|8.916|10.181|0.88| |conv3d::Conv3D::(GFLOPS=1.267, K=[5 x 5 x 5], IN={1, 3, 75, 75, 100}, OCN=3, PM=SAME, BIAS, OCV/CPU)|69.995|72.296|0.97| |conv3d::Conv3D::(GFLOPS=1.343, K=[3 x 3 x 3], IN={1, 11, 9, 150, 200}, OCN=11, PM=VALID, BIAS, OCV/CPU)|22.531|23.139|0.97| |conv::Conv::(GFLOPS=0.177, K=[1 x 1], IN={1, 512, 26, 26}, OCN=256, OCV/CPU)|2.239|1.933|1.16| |conv::Conv::(GFLOPS=0.177, K=[1 x 1], IN={1, 512, 26, 26}, OCN=256, OCV/CPU_FP16)|-|1.010|-| |conv::Conv::(GFLOPS=0.177, K=[1 x 1], IN={1, 1024, 13, 13}, OCN=512, OCV/CPU)|3.134|2.068|1.52| |conv::Conv::(GFLOPS=0.177, K=[1 x 1], IN={1, 1024, 13, 13}, OCN=512, OCV/CPU_FP16)|-|1.062|-| |conv::Conv::(GFLOPS=0.178, K=[1 x 1], IN={1, 256, 52, 52}, OCN=128, OCV/CPU)|1.918|1.920|1.00| |conv::Conv::(GFLOPS=0.178, K=[1 x 1], IN={1, 256, 52, 52}, OCN=128, OCV/CPU_FP16)|-|1.014|-| |conv::Conv::(GFLOPS=0.210, K=[1 x 1], IN={1, 576, 38, 50}, OCN=96, PM=SAME, BIAS, OCV/CPU)|2.340|2.352|0.99| |conv::Conv::(GFLOPS=0.210, K=[1 x 1], IN={1, 576, 38, 50}, OCN=96, PM=SAME, BIAS, OCV/CPU_FP16)|-|1.247|-| |conv::Conv::(GFLOPS=0.231, K=[3 x 3], IN={1, 128, 56, 56}, OCN=32, P=[1 x 1], OCV/CPU)|1.116|1.111|1.00| |conv::Conv::(GFLOPS=0.231, K=[3 x 3], IN={1, 128, 56, 56}, OCN=32, P=[1 x 1], OCV/CPU_FP16)|-|1.114|-| |conv::Conv::(GFLOPS=0.231, K=[3 x 3], IN={1, 256, 14, 14}, OCN=256, P=[1 x 1], OCV/CPU)|1.116|1.112|1.00| |conv::Conv::(GFLOPS=0.231, K=[3 x 3], IN={1, 256, 14, 14}, OCN=256, P=[1 x 1], OCV/CPU_FP16)|-|1.113|-| |conv::Conv::(GFLOPS=0.280, K=[1 x 1], IN={1, 576, 38, 50}, OCN=128, PM=SAME, BIAS, OCV/CPU)|3.067|3.085|0.99| |conv::Conv::(GFLOPS=0.280, K=[1 x 1], IN={1, 576, 38, 50}, OCN=128, PM=SAME, BIAS, OCV/CPU_FP16)|-|1.622|-| |conv::Conv::(GFLOPS=0.302, K=[3 x 3], IN={1, 64, 64, 64}, OCN=64, PM=SAME, OCV/CPU)|1.153|1.187|0.97| |conv::Conv::(GFLOPS=0.302, K=[3 x 3], IN={1, 64, 64, 64}, OCN=64, PM=SAME, OCV/CPU_FP16)|-|1.150|-| |conv::Conv::(GFLOPS=0.357, K=[1 x 1], IN={1, 64, 208, 208}, OCN=64, OCV/CPU)|4.804|4.849|0.99| |conv::Conv::(GFLOPS=0.357, K=[1 x 1], IN={1, 64, 208, 208}, OCN=64, OCV/CPU_FP16)|-|2.922|-| |conv::Conv::(GFLOPS=0.420, K=[3 x 3], IN={1, 96, 38, 50}, OCN=128, PM=SAME, BIAS, OCV/CPU)|1.463|1.469|1.00| |conv::Conv::(GFLOPS=0.420, K=[3 x 3], IN={1, 96, 38, 50}, OCN=128, PM=SAME, BIAS, OCV/CPU_FP16)|-|1.459|-| |conv::Conv::(GFLOPS=0.472, K=[3 x 3], IN={1, 128, 40, 40}, OCN=128, PM=SAME, OCV/CPU)|1.577|1.580|1.00| |conv::Conv::(GFLOPS=0.472, K=[3 x 3], IN={1, 128, 40, 40}, OCN=128, PM=SAME, OCV/CPU_FP16)|-|1.580|-| |conv::Conv::(GFLOPS=0.472, K=[3 x 3], IN={1, 256, 20, 20}, OCN=256, PM=SAME, OCV/CPU)|1.826|1.818|1.00| |conv::Conv::(GFLOPS=0.472, K=[3 x 3], IN={1, 256, 20, 20}, OCN=256, PM=SAME, OCV/CPU_FP16)|-|1.817|-| |conv::Conv::(GFLOPS=0.472, K=[3 x 3], IN={1, 512, 10, 10}, OCN=512, PM=SAME, OCV/CPU)|6.541|5.081|1.29| |conv::Conv::(GFLOPS=0.472, K=[3 x 3], IN={1, 512, 10, 10}, OCN=512, PM=SAME, OCV/CPU_FP16)|-|2.809|-| |conv::Conv::(GFLOPS=0.561, K=[3 x 3], IN={1, 128, 38, 50}, OCN=128, PM=SAME, BIAS, OCV/CPU)|1.912|1.919|1.00| |conv::Conv::(GFLOPS=0.561, K=[3 x 3], IN={1, 128, 38, 50}, OCN=128, PM=SAME, BIAS, OCV/CPU_FP16)|-|1.919|-| |conv::Conv::(GFLOPS=0.624, K=[3 x 3], IN={1, 128, 46, 46}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|1.961|1.971|0.99| |conv::Conv::(GFLOPS=0.624, K=[3 x 3], IN={1, 128, 46, 46}, OCN=128, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|1.961|-| |conv::Conv::(GFLOPS=0.701, K=[3 x 3], IN={1, 128, 38, 50}, OCN=160, PM=SAME, BIAS, OCV/CPU)|2.317|2.329|0.99| |conv::Conv::(GFLOPS=0.701, K=[3 x 3], IN={1, 128, 38, 50}, OCN=160, PM=SAME, BIAS, OCV/CPU_FP16)|-|2.322|-| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 64, 104, 104}, OCN=64, P=[1 x 1], OCV/CPU)|2.920|2.947|0.99| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 64, 104, 104}, OCN=64, P=[1 x 1], OCV/CPU_FP16)|-|2.924|-| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 128, 52, 52}, OCN=128, P=[1 x 1], OCV/CPU)|2.467|2.466|1.00| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 128, 52, 52}, OCN=128, P=[1 x 1], OCV/CPU_FP16)|-|2.496|-| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 256, 26, 26}, OCN=256, P=[1 x 1], OCV/CPU)|3.028|2.997|1.01| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 256, 26, 26}, OCN=256, P=[1 x 1], OCV/CPU_FP16)|-|2.986|-| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 512, 13, 13}, OCN=512, P=[1 x 1], OCV/CPU)|4.353|4.355|1.00| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 512, 13, 13}, OCN=512, P=[1 x 1], OCV/CPU_FP16)|-|4.355|-| |conv::Conv::(GFLOPS=0.830, K=[3 x 3], IN={1, 64, 75, 100}, OCN=96, PM=SAME, BIAS, OCV/CPU)|2.762|2.793|0.99| |conv::Conv::(GFLOPS=0.830, K=[3 x 3], IN={1, 64, 75, 100}, OCN=96, PM=SAME, BIAS, OCV/CPU_FP16)|-|2.797|-| |conv::Conv::(GFLOPS=0.958, K=[3 x 3], IN={1, 192, 38, 38}, OCN=192, PM=SAME, OCV/CPU)|3.428|3.226|1.06| |conv::Conv::(GFLOPS=0.958, K=[3 x 3], IN={1, 192, 38, 38}, OCN=192, PM=SAME, OCV/CPU_FP16)|-|3.223|-| |conv::Conv::(GFLOPS=0.958, K=[3 x 3], IN={1, 384, 19, 19}, OCN=384, PM=SAME, OCV/CPU)|3.967|3.957|1.00| |conv::Conv::(GFLOPS=0.958, K=[3 x 3], IN={1, 384, 19, 19}, OCN=384, PM=SAME, OCV/CPU_FP16)|-|3.960|-| |conv::Conv::(GFLOPS=1.022, K=[3 x 3], IN={1, 576, 19, 19}, OCN=273, PM=SAME, BIAS, OCV/CPU)|4.806|4.387|1.10| |conv::Conv::(GFLOPS=1.022, K=[3 x 3], IN={1, 576, 19, 19}, OCN=273, PM=SAME, BIAS, OCV/CPU_FP16)|-|4.366|-| |conv::Conv::(GFLOPS=1.112, K=[3 x 3], IN={1, 512, 10, 10}, OCN=1206, P=[1 x 1], BIAS, OCV/CPU)|14.509|11.756|1.23| |conv::Conv::(GFLOPS=1.112, K=[3 x 3], IN={1, 512, 10, 10}, OCN=1206, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|6.510|-| |conv::Conv::(GFLOPS=1.181, K=[3 x 3], IN={1, 64, 160, 200}, OCN=128, S=[2 x 2], P=[1 x 1], BIAS, OCV/CPU)|13.718|13.287|1.03| |conv::Conv::(GFLOPS=1.181, K=[3 x 3], IN={1, 64, 160, 200}, OCN=128, S=[2 x 2], P=[1 x 1], BIAS, OCV/CPU_FP16)|-|7.190|-| |conv::Conv::(GFLOPS=1.182, K=[3 x 3], IN={1, 32, 320, 400}, OCN=64, S=[2 x 2], P=[1 x 1], BIAS, OCV/CPU)|15.133|14.853|1.02| |conv::Conv::(GFLOPS=1.182, K=[3 x 3], IN={1, 32, 320, 400}, OCN=64, S=[2 x 2], P=[1 x 1], BIAS, OCV/CPU_FP16)|-|8.671|-| |conv::Conv::(GFLOPS=1.195, K=[9 x 9], IN={1, 32, 240, 320}, OCN=3, P=[4 x 4], BIAS, OCV/CPU)|41.928|43.328|0.97| |conv::Conv::(GFLOPS=1.195, K=[9 x 9], IN={1, 32, 240, 320}, OCN=3, P=[4 x 4], BIAS, OCV/CPU_FP16)|-|38.072|-| |conv::Conv::(GFLOPS=1.196, K=[3 x 3], IN={1, 384, 26, 26}, OCN=256, P=[1 x 1], OCV/CPU)|4.409|4.428|1.00| |conv::Conv::(GFLOPS=1.196, K=[3 x 3], IN={1, 384, 26, 26}, OCN=256, P=[1 x 1], OCV/CPU_FP16)|-|4.427|-| |conv::Conv::(GFLOPS=1.210, K=[3 x 3], IN={1, 32, 256, 256}, OCN=32, PM=SAME, OCV/CPU)|6.144|5.363|1.15| |conv::Conv::(GFLOPS=1.210, K=[3 x 3], IN={1, 32, 256, 256}, OCN=32, PM=SAME, OCV/CPU_FP16)|-|5.368|-| |conv::Conv::(GFLOPS=1.245, K=[3 x 3], IN={1, 64, 75, 75}, OCN=192, PM=SAME, BIAS, OCV/CPU)|3.926|3.932|1.00| |conv::Conv::(GFLOPS=1.245, K=[3 x 3], IN={1, 64, 75, 75}, OCN=192, PM=SAME, BIAS, OCV/CPU_FP16)|-|3.938|-| |conv::Conv::(GFLOPS=1.245, K=[3 x 3], IN={1, 96, 75, 100}, OCN=96, PM=SAME, BIAS, OCV/CPU)|3.920|3.915|1.00| |conv::Conv::(GFLOPS=1.245, K=[3 x 3], IN={1, 96, 75, 100}, OCN=96, PM=SAME, BIAS, OCV/CPU_FP16)|-|3.950|-| |conv::Conv::(GFLOPS=1.248, K=[3 x 3], IN={1, 256, 46, 46}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|3.767|3.764|1.00| |conv::Conv::(GFLOPS=1.248, K=[3 x 3], IN={1, 256, 46, 46}, OCN=128, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|3.762|-| |conv::Conv::(GFLOPS=1.258, K=[3 x 3], IN={1, 1280, 10, 10}, OCN=546, PM=SAME, BIAS, OCV/CPU)|19.959|13.875|1.44| |conv::Conv::(GFLOPS=1.258, K=[3 x 3], IN={1, 1280, 10, 10}, OCN=546, PM=SAME, BIAS, OCV/CPU_FP16)|-|7.781|-| |conv::Conv::(GFLOPS=1.261, K=[3 x 3], IN={1, 192, 38, 50}, OCN=192, PM=SAME, BIAS, OCV/CPU)|3.951|3.955|1.00| |conv::Conv::(GFLOPS=1.261, K=[3 x 3], IN={1, 192, 38, 50}, OCN=192, PM=SAME, BIAS, OCV/CPU_FP16)|-|3.969|-| |conv::Conv::(GFLOPS=1.416, K=[3 x 3], IN={1, 128, 62, 82}, OCN=128, BIAS, OCV/CPU)|4.050|4.034|1.00| |conv::Conv::(GFLOPS=1.416, K=[3 x 3], IN={1, 128, 62, 82}, OCN=128, BIAS, OCV/CPU_FP16)|-|4.093|-| |conv::Conv::(GFLOPS=1.500, K=[3 x 3], IN={1, 128, 64, 84}, OCN=128, BIAS, OCV/CPU)|4.923|4.506|1.09| |conv::Conv::(GFLOPS=1.500, K=[3 x 3], IN={1, 128, 64, 84}, OCN=128, BIAS, OCV/CPU_FP16)|-|4.509|-| |conv::Conv::(GFLOPS=1.586, K=[3 x 3], IN={1, 128, 66, 86}, OCN=128, BIAS, OCV/CPU)|4.759|4.476|1.06| |conv::Conv::(GFLOPS=1.586, K=[3 x 3], IN={1, 128, 66, 86}, OCN=128, BIAS, OCV/CPU_FP16)|-|4.447|-| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 256, 26, 26}, OCN=512, P=[1 x 1], OCV/CPU)|6.079|5.628|1.08| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 256, 26, 26}, OCN=512, P=[1 x 1], OCV/CPU_FP16)|-|5.625|-| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 256, 52, 52}, OCN=512, S=[2 x 2], P=[1 x 1], OCV/CPU)|19.843|17.523|1.13| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 256, 52, 52}, OCN=512, S=[2 x 2], P=[1 x 1], OCV/CPU_FP16)|-|8.917|-| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 512, 13, 13}, OCN=1024, P=[1 x 1], OCV/CPU)|8.334|8.247|1.01| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 512, 13, 13}, OCN=1024, P=[1 x 1], OCV/CPU_FP16)|-|8.246|-| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 512, 26, 26}, OCN=1024, S=[2 x 2], P=[1 x 1], OCV/CPU)|23.164|18.199|1.27| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 512, 26, 26}, OCN=1024, S=[2 x 2], P=[1 x 1], OCV/CPU_FP16)|-|9.305|-| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 64, 104, 104}, OCN=128, P=[1 x 1], OCV/CPU)|5.184|5.178|1.00| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 64, 104, 104}, OCN=128, P=[1 x 1], OCV/CPU_FP16)|-|5.149|-| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 64, 208, 208}, OCN=128, S=[2 x 2], P=[1 x 1], OCV/CPU)|17.990|18.103|0.99| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 64, 208, 208}, OCN=128, S=[2 x 2], P=[1 x 1], OCV/CPU_FP16)|-|9.777|-| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 128, 52, 52}, OCN=256, P=[1 x 1], OCV/CPU)|4.831|4.522|1.07| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 128, 52, 52}, OCN=256, P=[1 x 1], OCV/CPU_FP16)|-|4.523|-| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 128, 104, 104}, OCN=256, S=[2 x 2], P=[1 x 1], OCV/CPU)|17.328|17.319|1.00| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 128, 104, 104}, OCN=256, S=[2 x 2], P=[1 x 1], OCV/CPU_FP16)|-|8.948|-| |conv::Conv::(GFLOPS=1.598, K=[3 x 3], IN={1, 32, 208, 208}, OCN=64, P=[1 x 1], OCV/CPU)|5.944|5.961|1.00| |conv::Conv::(GFLOPS=1.598, K=[3 x 3], IN={1, 32, 208, 208}, OCN=64, P=[1 x 1], OCV/CPU_FP16)|-|5.936|-| |conv::Conv::(GFLOPS=1.598, K=[3 x 3], IN={1, 32, 416, 416}, OCN=64, S=[2 x 2], P=[1 x 1], OCV/CPU)|19.811|20.064|0.99| |conv::Conv::(GFLOPS=1.598, K=[3 x 3], IN={1, 32, 416, 416}, OCN=64, S=[2 x 2], P=[1 x 1], OCV/CPU_FP16)|-|11.705|-| |conv::Conv::(GFLOPS=1.659, K=[3 x 3], IN={1, 960, 10, 10}, OCN=960, PM=SAME, OCV/CPU)|22.398|17.686|1.27| |conv::Conv::(GFLOPS=1.659, K=[3 x 3], IN={1, 960, 10, 10}, OCN=960, PM=SAME, OCV/CPU_FP16)|-|9.859|-| |conv::Conv::(GFLOPS=1.660, K=[3 x 3], IN={1, 128, 75, 75}, OCN=128, G=128, P=[1 x 1], BIAS, OCV/CPU)|0.416|0.416|1.00| |conv::Conv::(GFLOPS=1.660, K=[3 x 3], IN={1, 128, 75, 75}, OCN=128, G=128, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|0.417|-| |conv::Conv::(GFLOPS=1.660, K=[3 x 3], IN={1, 128, 75, 75}, OCN=128, PM=SAME, OCV/CPU)|5.356|5.110|1.05| |conv::Conv::(GFLOPS=1.660, K=[3 x 3], IN={1, 128, 75, 75}, OCN=128, PM=SAME, OCV/CPU_FP16)|-|5.114|-| |conv::Conv::(GFLOPS=1.675, K=[3 x 3], IN={1, 128, 68, 88}, OCN=128, BIAS, OCV/CPU)|5.092|4.748|1.07| |conv::Conv::(GFLOPS=1.675, K=[3 x 3], IN={1, 128, 68, 88}, OCN=128, BIAS, OCV/CPU_FP16)|-|4.754|-| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 256, 38, 38}, OCN=256, G=256, P=[1 x 1], BIAS, OCV/CPU)|0.260|0.229|1.13| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 256, 38, 38}, OCN=256, G=256, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|0.229|-| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 256, 38, 38}, OCN=256, PM=SAME, OCV/CPU)|5.872|5.460|1.08| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 256, 38, 38}, OCN=256, PM=SAME, OCV/CPU_FP16)|-|5.460|-| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 512, 19, 19}, OCN=512, G=512, P=[1 x 1], BIAS, OCV/CPU)|0.161|0.161|1.00| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 512, 19, 19}, OCN=512, G=512, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|0.161|-| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 512, 19, 19}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|7.176|7.175|1.00| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 512, 19, 19}, OCN=512, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|7.162|-| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 512, 19, 19}, OCN=512, PM=SAME, OCV/CPU)|7.174|7.185|1.00| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 512, 19, 19}, OCN=512, PM=SAME, OCV/CPU_FP16)|-|7.157|-| |conv::Conv::(GFLOPS=1.766, K=[3 x 3], IN={1, 128, 70, 90}, OCN=128, BIAS, OCV/CPU)|5.400|5.180|1.04| |conv::Conv::(GFLOPS=1.766, K=[3 x 3], IN={1, 128, 70, 90}, OCN=128, BIAS, OCV/CPU_FP16)|-|5.201|-| |conv::Conv::(GFLOPS=1.859, K=[3 x 3], IN={1, 128, 72, 92}, OCN=128, BIAS, OCV/CPU)|5.330|5.188|1.03| |conv::Conv::(GFLOPS=1.859, K=[3 x 3], IN={1, 128, 72, 92}, OCN=128, BIAS, OCV/CPU_FP16)|-|5.177|-| |conv::Conv::(GFLOPS=1.888, K=[3 x 3], IN={1, 1024, 10, 10}, OCN=1024, G=1024, P=[1 x 1], BIAS, OCV/CPU)|0.115|0.115|1.00| |conv::Conv::(GFLOPS=1.888, K=[3 x 3], IN={1, 1024, 10, 10}, OCN=1024, G=1024, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|0.115|-| |conv::Conv::(GFLOPS=1.888, K=[3 x 3], IN={1, 1024, 10, 10}, OCN=1024, PM=SAME, OCV/CPU)|26.156|20.222|1.29| |conv::Conv::(GFLOPS=1.888, K=[3 x 3], IN={1, 1024, 10, 10}, OCN=1024, PM=SAME, OCV/CPU_FP16)|-|11.203|-| |conv::Conv::(GFLOPS=1.954, K=[3 x 3], IN={1, 128, 74, 94}, OCN=128, BIAS, OCV/CPU)|5.627|5.543|1.02| |conv::Conv::(GFLOPS=1.954, K=[3 x 3], IN={1, 128, 74, 94}, OCN=128, BIAS, OCV/CPU_FP16)|-|5.506|-| |conv::Conv::(GFLOPS=1.995, K=[9 x 9], IN={1, 3, 320, 400}, OCN=32, P=[4 x 4], BIAS, OCV/CPU)|27.925|27.741|1.01| |conv::Conv::(GFLOPS=1.995, K=[9 x 9], IN={1, 3, 320, 400}, OCN=32, P=[4 x 4], BIAS, OCV/CPU_FP16)|-|17.217|-| |conv::Conv::(GFLOPS=2.052, K=[3 x 3], IN={1, 128, 76, 96}, OCN=128, BIAS, OCV/CPU)|6.359|6.062|1.05| |conv::Conv::(GFLOPS=2.052, K=[3 x 3], IN={1, 128, 76, 96}, OCN=128, BIAS, OCV/CPU_FP16)|-|6.048|-| |conv::Conv::(GFLOPS=2.100, K=[3 x 3], IN={1, 144, 75, 75}, OCN=144, PM=SAME, OCV/CPU)|6.559|6.322|1.04| |conv::Conv::(GFLOPS=2.100, K=[3 x 3], IN={1, 144, 75, 75}, OCN=144, PM=SAME, OCV/CPU_FP16)|-|6.280|-| |conv::Conv::(GFLOPS=2.153, K=[3 x 3], IN={1, 128, 78, 98}, OCN=128, BIAS, OCV/CPU)|6.412|6.200|1.03| |conv::Conv::(GFLOPS=2.153, K=[3 x 3], IN={1, 128, 78, 98}, OCN=128, BIAS, OCV/CPU_FP16)|-|6.197|-| |conv::Conv::(GFLOPS=2.156, K=[3 x 3], IN={1, 576, 19, 19}, OCN=576, PM=SAME, OCV/CPU)|9.167|8.624|1.06| |conv::Conv::(GFLOPS=2.156, K=[3 x 3], IN={1, 576, 19, 19}, OCN=576, PM=SAME, OCV/CPU_FP16)|-|8.626|-| |conv::Conv::(GFLOPS=2.255, K=[3 x 3], IN={1, 128, 80, 100}, OCN=128, BIAS, OCV/CPU)|6.755|6.491|1.04| |conv::Conv::(GFLOPS=2.255, K=[3 x 3], IN={1, 128, 80, 100}, OCN=128, BIAS, OCV/CPU_FP16)|-|6.520|-| |conv::Conv::(GFLOPS=2.719, K=[3 x 3], IN={1, 96, 256, 256}, OCN=96, S=[2 x 2], PM=SAME, OCV/CPU)|35.664|34.752|1.03| |conv::Conv::(GFLOPS=2.719, K=[3 x 3], IN={1, 96, 256, 256}, OCN=96, S=[2 x 2], PM=SAME, OCV/CPU_FP16)|-|20.260|-| |conv::Conv::(GFLOPS=3.319, K=[3 x 3], IN={1, 128, 75, 75}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|9.514|9.414|1.01| |conv::Conv::(GFLOPS=3.319, K=[3 x 3], IN={1, 128, 75, 75}, OCN=256, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|9.462|-| |conv::Conv::(GFLOPS=3.321, K=[3 x 3], IN={1, 64, 150, 150}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|10.631|9.963|1.07| |conv::Conv::(GFLOPS=3.321, K=[3 x 3], IN={1, 64, 150, 150}, OCN=128, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|9.935|-| |conv::Conv::(GFLOPS=3.398, K=[7 x 7], IN={1, 128, 46, 46}, OCN=128, P=[3 x 3], BIAS, OCV/CPU)|37.465|36.798|1.02| |conv::Conv::(GFLOPS=3.398, K=[7 x 7], IN={1, 128, 46, 46}, OCN=128, P=[3 x 3], BIAS, OCV/CPU_FP16)|-|19.569|-| |conv::Conv::(GFLOPS=3.407, K=[3 x 3], IN={1, 512, 19, 19}, OCN=1024, D=[6 x 6], P=[6 x 6], BIAS, OCV/CPU)|38.157|36.157|1.06| |conv::Conv::(GFLOPS=3.407, K=[3 x 3], IN={1, 512, 19, 19}, OCN=1024, D=[6 x 6], P=[6 x 6], BIAS, OCV/CPU_FP16)|-|18.902|-| |conv::Conv::(GFLOPS=3.408, K=[3 x 3], IN={1, 256, 38, 38}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|10.356|10.401|1.00| |conv::Conv::(GFLOPS=3.408, K=[3 x 3], IN={1, 256, 38, 38}, OCN=512, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|10.360|-| |conv::Conv::(GFLOPS=4.247, K=[3 x 3], IN={1, 480, 32, 32}, OCN=480, PM=SAME, OCV/CPU)|12.641|12.150|1.04| |conv::Conv::(GFLOPS=4.247, K=[3 x 3], IN={1, 480, 32, 32}, OCN=480, PM=SAME, OCV/CPU_FP16)|-|12.162|-| |conv::Conv::(GFLOPS=4.247, K=[5 x 5], IN={1, 144, 128, 128}, OCN=144, S=[2 x 2], PM=SAME, OCV/CPU)|50.545|50.505|1.00| |conv::Conv::(GFLOPS=4.247, K=[5 x 5], IN={1, 144, 128, 128}, OCN=144, S=[2 x 2], PM=SAME, OCV/CPU_FP16)|-|27.950|-| |conv::Conv::(GFLOPS=4.566, K=[7 x 7], IN={1, 172, 46, 46}, OCN=128, P=[3 x 3], BIAS, OCV/CPU)|54.233|49.603|1.09| |conv::Conv::(GFLOPS=4.566, K=[7 x 7], IN={1, 172, 46, 46}, OCN=128, P=[3 x 3], BIAS, OCV/CPU_FP16)|-|26.515|-| |conv::Conv::(GFLOPS=4.993, K=[3 x 3], IN={1, 256, 46, 46}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|13.779|12.968|1.06| |conv::Conv::(GFLOPS=4.993, K=[3 x 3], IN={1, 256, 46, 46}, OCN=512, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|12.984|-| |conv::Conv::(GFLOPS=4.993, K=[3 x 3], IN={1, 512, 46, 46}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|15.809|15.329|1.03| |conv::Conv::(GFLOPS=4.993, K=[3 x 3], IN={1, 512, 46, 46}, OCN=256, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|15.433|-| |conv::Conv::(GFLOPS=4.994, K=[3 x 3], IN={1, 128, 92, 92}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|14.563|14.527|1.00| |conv::Conv::(GFLOPS=4.994, K=[3 x 3], IN={1, 128, 92, 92}, OCN=256, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|14.480|-| |conv::Conv::(GFLOPS=4.997, K=[3 x 3], IN={1, 64, 184, 184}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|16.714|16.484|1.01| |conv::Conv::(GFLOPS=4.997, K=[3 x 3], IN={1, 64, 184, 184}, OCN=128, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|16.362|-| |conv::Conv::(GFLOPS=5.780, K=[5 x 5], IN={1, 672, 32, 32}, OCN=672, S=[2 x 2], PM=SAME, OCV/CPU)|77.832|65.729|1.18| |conv::Conv::(GFLOPS=5.780, K=[5 x 5], IN={1, 672, 32, 32}, OCN=672, S=[2 x 2], PM=SAME, OCV/CPU_FP16)|-|32.065|-| |conv::Conv::(GFLOPS=6.116, K=[3 x 3], IN={1, 1152, 16, 16}, OCN=1152, PM=SAME, OCV/CPU)|21.903|20.386|1.07| |conv::Conv::(GFLOPS=6.116, K=[3 x 3], IN={1, 1152, 16, 16}, OCN=1152, PM=SAME, OCV/CPU_FP16)|-|20.416|-| |conv::Conv::(GFLOPS=6.118, K=[3 x 3], IN={1, 144, 128, 128}, OCN=144, PM=SAME, OCV/CPU)|20.405|18.148|1.12| |conv::Conv::(GFLOPS=6.118, K=[3 x 3], IN={1, 144, 128, 128}, OCN=144, PM=SAME, OCV/CPU_FP16)|-|18.128|-| |conv::Conv::(GFLOPS=6.637, K=[3 x 3], IN={1, 256, 75, 75}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|20.334|18.521|1.10| |conv::Conv::(GFLOPS=6.637, K=[3 x 3], IN={1, 256, 75, 75}, OCN=256, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|18.495|-| |conv::Conv::(GFLOPS=6.638, K=[3 x 3], IN={1, 128, 150, 150}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|21.527|19.584|1.10| |conv::Conv::(GFLOPS=6.638, K=[3 x 3], IN={1, 128, 150, 150}, OCN=128, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|19.630|-| |conv::Conv::(GFLOPS=6.641, K=[3 x 3], IN={1, 64, 150, 200}, OCN=192, PM=SAME, BIAS, OCV/CPU)|22.715|20.057|1.13| |conv::Conv::(GFLOPS=6.641, K=[3 x 3], IN={1, 64, 150, 200}, OCN=192, PM=SAME, BIAS, OCV/CPU_FP16)|-|20.068|-| |conv::Conv::(GFLOPS=6.641, K=[3 x 3], IN={1, 64, 300, 300}, OCN=64, P=[1 x 1], BIAS, OCV/CPU)|26.228|24.992|1.05| |conv::Conv::(GFLOPS=6.641, K=[3 x 3], IN={1, 64, 300, 300}, OCN=64, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|24.957|-| |conv::Conv::(GFLOPS=6.814, K=[3 x 3], IN={1, 512, 38, 38}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|21.524|21.581|1.00| |conv::Conv::(GFLOPS=6.814, K=[3 x 3], IN={1, 512, 38, 38}, OCN=512, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|21.782|-| |conv::Conv::(GFLOPS=8.025, K=[3 x 3], IN={1, 1024, 19, 19}, OCN=1206, P=[1 x 1], BIAS, OCV/CPU)|34.094|31.964|1.07| |conv::Conv::(GFLOPS=8.025, K=[3 x 3], IN={1, 1024, 19, 19}, OCN=1206, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|31.925|-| |conv::Conv::(GFLOPS=9.986, K=[3 x 3], IN={1, 512, 46, 46}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|28.677|27.813|1.03| |conv::Conv::(GFLOPS=9.986, K=[3 x 3], IN={1, 512, 46, 46}, OCN=512, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|27.808|-| |conv::Conv::(GFLOPS=9.987, K=[3 x 3], IN={1, 256, 92, 92}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|31.274|27.892|1.12| |conv::Conv::(GFLOPS=9.987, K=[3 x 3], IN={1, 256, 92, 92}, OCN=256, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|27.910|-| |conv::Conv::(GFLOPS=9.989, K=[3 x 3], IN={1, 128, 184, 184}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|30.533|30.007|1.02| |conv::Conv::(GFLOPS=9.989, K=[3 x 3], IN={1, 128, 184, 184}, OCN=128, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|30.089|-| |conv::Conv::(GFLOPS=9.993, K=[3 x 3], IN={1, 64, 368, 368}, OCN=64, P=[1 x 1], BIAS, OCV/CPU)|39.837|38.312|1.04| |conv::Conv::(GFLOPS=9.993, K=[3 x 3], IN={1, 64, 368, 368}, OCN=64, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|38.477|-| |conv::Conv::(GFLOPS=10.087, K=[3 x 3], IN={1, 576, 38, 50}, OCN=512, PM=SAME, BIAS, OCV/CPU)|32.480|29.237|1.11| |conv::Conv::(GFLOPS=10.087, K=[3 x 3], IN={1, 576, 38, 50}, OCN=512, PM=SAME, BIAS, OCV/CPU_FP16)|-|29.452|-| |conv::Conv::(GFLOPS=10.701, K=[3 x 3], IN={1, 512, 38, 38}, OCN=804, P=[1 x 1], BIAS, OCV/CPU)|33.544|32.832|1.02| |conv::Conv::(GFLOPS=10.701, K=[3 x 3], IN={1, 512, 38, 38}, OCN=804, P=[1 x 1], BIAS, OCV/CPU_FP16)|-|32.784|-| |conv::Conv::(GFLOPS=11.797, K=[5 x 5], IN={1, 240, 64, 64}, OCN=240, PM=SAME, OCV/CPU)|134.481|130.678|1.03| |conv::Conv::(GFLOPS=11.797, K=[5 x 5], IN={1, 240, 64, 64}, OCN=240, PM=SAME, OCV/CPU_FP16)|-|70.134|-| |conv::Conv::(GFLOPS=11.797, K=[5 x 5], IN={1, 480, 32, 32}, OCN=480, PM=SAME, OCV/CPU)|127.930|126.530|1.01| |conv::Conv::(GFLOPS=11.797, K=[5 x 5], IN={1, 480, 32, 32}, OCN=480, PM=SAME, OCV/CPU_FP16)|-|65.261|-| |conv::Conv::(GFLOPS=16.987, K=[5 x 5], IN={1, 1152, 16, 16}, OCN=1152, PM=SAME, OCV/CPU)|201.346|187.007|1.08| |conv::Conv::(GFLOPS=16.987, K=[5 x 5], IN={1, 1152, 16, 16}, OCN=1152, PM=SAME, OCV/CPU_FP16)|-|91.525|-| |conv::Conv::(GFLOPS=23.122, K=[5 x 5], IN={1, 672, 32, 32}, OCN=672, PM=SAME, OCV/CPU)|252.038|245.587|1.03| |conv::Conv::(GFLOPS=23.122, K=[5 x 5], IN={1, 672, 32, 32}, OCN=672, PM=SAME, OCV/CPU_FP16)|-|125.477|-| ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
2023-05-17 14:38:33 +08:00
if (target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CPU_FP16)
{
default_l1 = 7e-3;
default_lInf = 2e-2;
}
else
{
default_l1 = 1e-5;
default_lInf = 1e-4;
}
}
bool checkFallbacks(Net& net) const
{
// Check if all the layers are supported with current backend and target.
// Some layers might be fused so their timings equal to zero.
std::vector<double> timings;
net.getPerfProfile(timings);
std::vector<std::string> names = net.getLayerNames();
CV_CheckEQ(names.size(), timings.size(), "DNN critical error");
bool hasFallbacks = false;
for (int i = 0; i < names.size(); ++i)
{
Ptr<dnn::Layer> l = net.getLayer(net.getLayerId(names[i]));
bool fused = timings[i] == 0.;
if ((!l->supportBackend(backend) || l->preferableTarget != target) && !fused)
{
hasFallbacks = true;
std::cout << "FALLBACK: Layer [" << l->type << "]:[" << l->name << "] is expected to has backend implementation" << endl;
}
}
return hasFallbacks;
}
static void SetUpTestCase()
{
parser_deny_list = {
#include "test_onnx_conformance_layer_parser_denylist.inl.hpp"
};
global_deny_list = {
#include "test_onnx_conformance_layer_filter_opencv_all_denylist.inl.hpp"
};
opencv_deny_list = {
#include "test_onnx_conformance_layer_filter_opencv_denylist.inl.hpp"
};
opencl_fp16_deny_list = {
#include "test_onnx_conformance_layer_filter_opencv_ocl_fp16_denylist.inl.hpp"
};
opencl_deny_list = {
#include "test_onnx_conformance_layer_filter_opencv_ocl_fp32_denylist.inl.hpp"
};
cpu_deny_list = {
#include "test_onnx_conformance_layer_filter_opencv_cpu_denylist.inl.hpp"
};
#ifdef HAVE_HALIDE
halide_deny_list = {
#include "test_onnx_conformance_layer_filter__halide_denylist.inl.hpp"
};
#endif
#ifdef HAVE_VULKAN
vulkan_deny_list = {
#include "test_onnx_conformance_layer_filter__vulkan_denylist.inl.hpp"
};
#endif
#ifdef HAVE_CUDA
cuda_deny_list = {
#include "test_onnx_conformance_layer_filter__cuda_denylist.inl.hpp"
};
#endif
}
};
std::set<std::string> Test_ONNX_conformance::parser_deny_list;
std::set<std::string> Test_ONNX_conformance::global_deny_list;
std::set<std::string> Test_ONNX_conformance::opencv_deny_list;
std::set<std::string> Test_ONNX_conformance::opencl_fp16_deny_list;
std::set<std::string> Test_ONNX_conformance::opencl_deny_list;
std::set<std::string> Test_ONNX_conformance::cpu_deny_list;
#ifdef HAVE_HALIDE
std::set<std::string> Test_ONNX_conformance::halide_deny_list;
#endif
#ifdef HAVE_VULKAN
std::set<std::string> Test_ONNX_conformance::vulkan_deny_list;
#endif
#ifdef HAVE_CUDA
std::set<std::string> Test_ONNX_conformance::cuda_deny_list;
#endif
TEST_P(Test_ONNX_conformance, Layer_Test)
{
const std::string& name = test_case.name;
ASSERT_FALSE(name.empty());
bool checkLayersFallbacks = true;
bool checkAccuracy = true;
// SKIP when the test case is in the parser deny list.
if (parser_deny_list.find(name) != parser_deny_list.end())
{
applyTestTag(CV_TEST_TAG_DNN_SKIP_PARSER, CV_TEST_TAG_DNN_SKIP_ONNX_CONFORMANCE);
}
// SKIP when the test case is in the global deny list.
if (global_deny_list.find(name) != global_deny_list.end())
{
applyTestTag(CV_TEST_TAG_DNN_SKIP_GLOBAL, CV_TEST_TAG_DNN_SKIP_ONNX_CONFORMANCE);
}
if (backend == DNN_BACKEND_OPENCV)
{
if (opencv_deny_list.find(name) != opencv_deny_list.end())
{
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCV_BACKEND, CV_TEST_TAG_DNN_SKIP_ONNX_CONFORMANCE);
}
if ((target == DNN_TARGET_OPENCL_FP16) && (opencl_fp16_deny_list.find(name) != opencl_fp16_deny_list.end()))
{
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_OPENCV_BACKEND, CV_TEST_TAG_DNN_SKIP_ONNX_CONFORMANCE);
}
if ((target == DNN_TARGET_OPENCL) && (opencl_deny_list.find(name) != opencl_deny_list.end()))
{
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL, CV_TEST_TAG_DNN_SKIP_OPENCV_BACKEND, CV_TEST_TAG_DNN_SKIP_ONNX_CONFORMANCE);
}
if ((target == DNN_TARGET_CPU) && (cpu_deny_list.find(name) != cpu_deny_list.end()))
{
applyTestTag(CV_TEST_TAG_DNN_SKIP_CPU, CV_TEST_TAG_DNN_SKIP_OPENCV_BACKEND, CV_TEST_TAG_DNN_SKIP_ONNX_CONFORMANCE);
}
}
#ifdef HAVE_HALIDE
else if (backend == DNN_BACKEND_HALIDE)
{
if (halide_deny_list.find(name) != halide_deny_list.end())
{
applyTestTag(CV_TEST_TAG_DNN_SKIP_HALIDE, CV_TEST_TAG_DNN_SKIP_ONNX_CONFORMANCE);
}
}
#endif
#ifdef HAVE_INF_ENGINE
else if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{
#include "test_onnx_conformance_layer_filter__openvino.inl.hpp"
}
#endif
#ifdef HAVE_VULKAN
else if (backend == DNN_BACKEND_VKCOM)
{
if (vulkan_deny_list.find(name) != vulkan_deny_list.end())
{
applyTestTag(CV_TEST_TAG_DNN_SKIP_VULKAN, CV_TEST_TAG_DNN_SKIP_ONNX_CONFORMANCE);
}
}
#endif
#ifdef HAVE_CUDA
else if (backend == DNN_BACKEND_CUDA)
{
if (cuda_deny_list.find(name) != cuda_deny_list.end())
{
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA, CV_TEST_TAG_DNN_SKIP_ONNX_CONFORMANCE);
}
}
#endif
else
{
std::ostringstream ss;
ss << "No test filter available for backend ";
PrintTo(backend, &ss);
ss << ". Run test by default";
std::cout << ss.str() << std::endl;
}
std::vector<Mat> inputs;
std::vector<Mat> ref_outputs;
std::string prefix = cv::format("dnn/onnx/conformance/node/%s", test_case.name);
Net net;
try
{
std::string model_path = findDataFile(prefix + "/model.onnx");
//cout << "Read ONNX inputs..." << endl;
for (int i = 0; i < test_case.inputs; ++i)
{
Mat input = readTensorFromONNX(findDataFile(prefix + cv::format("/test_data_set_0/input_%d.pb", i)));
inputs.push_back(input);
}
//cout << "Read ONNX reference outputs..." << endl;
for (int i = 0; i < test_case.outputs; ++i)
{
Mat output = readTensorFromONNX(findDataFile(prefix + cv::format("/test_data_set_0/output_%d.pb", i)));
ref_outputs.push_back(output);
}
//cout << "Parse model..." << endl;
net = readNetFromONNX(model_path);
if (net.empty())
{
applyTestTag(CV_TEST_TAG_DNN_ERROR_PARSER);
}
}
catch (...)
{
cout << "Exception during ONNX model parse / loading input / loading reference data!" << endl;
applyTestTag(CV_TEST_TAG_DNN_ERROR_PARSER);
throw;
}
ASSERT_FALSE(net.empty());
std::vector<std::string> inputNames;
for (int i = 0; i < inputs.size(); ++i)
inputNames.push_back(cv::format("%d", i));
net.setInputsNames(inputNames);
try
{
net.setPreferableBackend(backend);
net.setPreferableTarget(target);
for (int i = 0; i < inputs.size(); ++i)
{
net.setInput(inputs[i], inputNames[i]);
}
}
catch (...)
{
cout << "Exception during network configuration!" << endl;
applyTestTag(CV_TEST_TAG_DNN_ERROR_NET_SETUP);
throw;
}
std::vector<std::string> layerNames = net.getUnconnectedOutLayersNames();
std::vector<Mat> outputs;
try
{
net.forward(outputs, layerNames);
}
catch (...)
{
cout << "Exception during net.forward() call!" << endl;
applyTestTag(CV_TEST_TAG_DNN_ERROR_FORWARD);
throw;
}
ASSERT_GE(outputs.size(), 1);
if (checkLayersFallbacks && checkFallbacks(net))
{
applyTestTag(CV_TEST_TAG_DNN_LAYER_FALLBACK);
}
if (checkAccuracy)
{
try
{
if (ref_outputs.size() == 1)
{
// probably we found random unconnected layers.
normAssert(ref_outputs[0], outputs[0], "", default_l1, default_lInf);
}
else
{
ASSERT_EQ(outputs.size(), ref_outputs.size());
for (size_t i = 0; i < ref_outputs.size(); ++i)
{
normAssert(ref_outputs[i], outputs[i], "", default_l1, default_lInf);
}
}
}
catch (...)
{
cout << "Exception during accuracy check!" << endl;
throw;
}
}
else
{
applyTestTag(CV_TEST_TAG_DNN_NO_ACCURACY_CHECK);
}
if (!HasFailure())
cout << "Test passed!" << endl;
}
INSTANTIATE_TEST_CASE_P(/**/, Test_ONNX_conformance,
testing::Combine(
testing::ValuesIn(testConformanceConfig),
dnnBackendsAndTargets(/*withInferenceEngine=*/true, /*withHalide=*/true)
),
printOnnxConfParams
);
}