opencv/modules/dnn/src/init.cpp

240 lines
12 KiB
C++
Raw Normal View History

/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
2017-06-28 01:34:17 +08:00
#include <opencv2/dnn/layer.details.hpp>
#if defined(HAVE_PROTOBUF) && !defined(BUILD_PLUGIN)
2017-07-25 21:13:56 +08:00
#include <google/protobuf/stubs/common.h>
2022-03-18 14:19:30 +08:00
#endif
2017-07-25 21:13:56 +08:00
namespace cv {
namespace dnn {
CV__DNN_INLINE_NS_BEGIN
2017-06-28 01:34:17 +08:00
static Mutex* __initialization_mutex = NULL;
Mutex& getInitializationMutex()
{
2017-06-28 01:34:17 +08:00
if (__initialization_mutex == NULL)
__initialization_mutex = new Mutex();
return *__initialization_mutex;
}
// force initialization (single-threaded environment)
Mutex* __initialization_mutex_initializer = &getInitializationMutex();
#if defined(HAVE_PROTOBUF) && !defined(BUILD_PLUGIN)
2017-07-25 21:13:56 +08:00
namespace {
using namespace google::protobuf;
class ProtobufShutdown {
public:
bool initialized;
ProtobufShutdown() : initialized(true) {}
~ProtobufShutdown()
{
initialized = false;
google::protobuf::ShutdownProtobufLibrary();
}
};
} // namespace
2022-03-18 14:19:30 +08:00
#endif
2017-06-28 01:34:17 +08:00
void initializeLayerFactory()
{
2017-06-28 19:46:58 +08:00
CV_TRACE_FUNCTION();
#if defined(HAVE_PROTOBUF) && !defined(BUILD_PLUGIN)
2018-09-07 19:33:52 +08:00
static ProtobufShutdown protobufShutdown; CV_UNUSED(protobufShutdown);
2022-03-18 14:19:30 +08:00
#endif
2017-07-25 21:13:56 +08:00
2017-06-28 01:34:17 +08:00
CV_DNN_REGISTER_LAYER_CLASS(Slice, SliceLayer);
CV_DNN_REGISTER_LAYER_CLASS(Split, SplitLayer);
CV_DNN_REGISTER_LAYER_CLASS(Concat, ConcatLayer);
CV_DNN_REGISTER_LAYER_CLASS(Reshape, ReshapeLayer);
CV_DNN_REGISTER_LAYER_CLASS(Flatten, FlattenLayer);
CV_DNN_REGISTER_LAYER_CLASS(Resize, ResizeLayer);
CV_DNN_REGISTER_LAYER_CLASS(Interp, InterpLayer);
CV_DNN_REGISTER_LAYER_CLASS(CropAndResize, CropAndResizeLayer);
2017-06-28 01:34:17 +08:00
CV_DNN_REGISTER_LAYER_CLASS(Convolution, ConvolutionLayer);
CV_DNN_REGISTER_LAYER_CLASS(Deconvolution, DeconvolutionLayer);
CV_DNN_REGISTER_LAYER_CLASS(Pooling, PoolingLayer);
2017-12-04 23:45:30 +08:00
CV_DNN_REGISTER_LAYER_CLASS(ROIPooling, PoolingLayer);
2017-12-20 00:43:49 +08:00
CV_DNN_REGISTER_LAYER_CLASS(PSROIPooling, PoolingLayer);
CV_DNN_REGISTER_LAYER_CLASS(Reduce, ReduceLayer);
2017-06-28 01:34:17 +08:00
CV_DNN_REGISTER_LAYER_CLASS(LRN, LRNLayer);
CV_DNN_REGISTER_LAYER_CLASS(InnerProduct, InnerProductLayer);
dnn: add gemm_layer in place of fully_connected_layer for onnx models (#23897) * first commit * turned C from input to constant; force C constant in impl; better handling 0d/1d cases * integrate with gemm from ficus nn * fix const inputs * adjust threshold for int8 tryQuantize * adjust threshold for int8 quantized 2 * support batched gemm and matmul; tune threshold for rcnn_ilsvrc13; update googlenet * add gemm perf against innerproduct * add perf tests for innerproduct with bias * fix perf * add memset * renamings for next step * add dedicated perf gemm * add innerproduct in perf_gemm * remove gemm and innerproduct perf tests from perf_layer * add perf cases for vit sizes; prepack constants * remove batched gemm; fix wrong trans; optimize KC * remove prepacking for const A; several fixes for const B prepacking * add todos and gemm expression * add optimized branch for avx/avx2 * trigger build * update macros and signature * update signature * fix macro * fix bugs for neon aarch64 & x64 * add backends: cuda, cann, inf_ngraph and vkcom * fix cuda backend * test commit for cuda * test cuda backend * remove debug message from cuda backend * use cpu dispatcher * fix neon macro undef in dispatcher * fix dispatcher * fix inner kernel for neon aarch64 * fix compiling issue on armv7; try fixing accuracy issue on other platforms * broadcast C with beta multiplied; improve func namings * fix bug for avx and avx2 * put all platform-specific kernels in dispatcher * fix typos * attempt to fix compile issues on x64 * run old gemm when neon, avx, avx2 are all not available; add kernel for armv7 neon * fix typo * quick fix: add macros for pack4 * quick fix: use vmlaq_f32 for armv7 * quick fix for missing macro of fast gemm pack f32 4 * disable conformance tests when optimized branches are not supported * disable perf tests when optimized branches are not supported * decouple cv_try_neon and cv_neon_aarch64 * drop googlenet_2023; add fastGemmBatched * fix step in fastGemmBatched * cpu: fix initialization ofb; gpu: support batch * quick followup fix for cuda * add default kernels * quick followup fix to avoid macro redef * optmized kernels for lasx * resolve mis-alignment; remove comments * tune performance for x64 platform * tune performance for neon aarch64 * tune for armv7 * comment time consuming tests * quick follow-up fix
2023-09-20 05:53:34 +08:00
CV_DNN_REGISTER_LAYER_CLASS(Gemm, GemmLayer);
Merge pull request #24694 from fengyuentau:matmul_refactor dnn: refactor ONNX MatMul with fastGemm #24694 Done: - [x] add backends - [x] CUDA - [x] OpenVINO - [x] CANN - [x] OpenCL - [x] Vulkan - [x] add perf tests - [x] const B case ### Benchmark Tests are done on M1. All data is in milliseconds (ms). | Configuration | MatMul (Prepacked) | MatMul | InnerProduct | | - | - | - | - | | A=[12, 197, 197], B=[12, 197, 64], trans_a=0, trans_b=0 | **0.39** | 0.41 | 1.33 | | A=[12, 197, 64], B=[12, 64, 197], trans_a=0, trans_b=0 | **0.42** | 0.42 | 1.17 | | A=[12, 50, 64], B=[12, 64, 50], trans_a=0, trans_b=0 | **0.13** | 0.15 | 0.33 | | A=[12, 50, 50], B=[12, 50, 64], trans_a=0, trans_b=0 | **0.11** | 0.13 | 0.22 | | A=[16, 197, 197], B=[16, 197, 64], trans_a=0, trans_b=0 | **0.46** | 0.54 | 1.46 | | A=[16, 197, 64], B=[16, 64, 197], trans_a=0, trans_b=0 | **0.46** | 0.95 | 1.74 | | A=[16, 50, 64], B=[16, 64, 50], trans_a=0, trans_b=0 | **0.18** | 0.32 | 0.43 | | A=[16, 50, 50], B=[16, 50, 64], trans_a=0, trans_b=0 | **0.15** | 0.25 | 0.25 | ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
2023-12-20 00:36:41 +08:00
CV_DNN_REGISTER_LAYER_CLASS(MatMul, MatMulLayer);
2017-06-28 01:34:17 +08:00
CV_DNN_REGISTER_LAYER_CLASS(Softmax, SoftmaxLayer);
2020-03-22 23:50:15 +08:00
CV_DNN_REGISTER_LAYER_CLASS(SoftMax, SoftmaxLayer); // For compatibility. See https://github.com/opencv/opencv/issues/16877
2017-06-28 01:34:17 +08:00
CV_DNN_REGISTER_LAYER_CLASS(MVN, MVNLayer);
2017-06-28 01:34:17 +08:00
CV_DNN_REGISTER_LAYER_CLASS(ReLU, ReLULayer);
2017-09-14 03:18:02 +08:00
CV_DNN_REGISTER_LAYER_CLASS(ReLU6, ReLU6Layer);
2017-06-28 01:34:17 +08:00
CV_DNN_REGISTER_LAYER_CLASS(ChannelsPReLU, ChannelsPReLULayer);
2017-10-09 22:47:46 +08:00
CV_DNN_REGISTER_LAYER_CLASS(PReLU, ChannelsPReLULayer);
2017-06-28 01:34:17 +08:00
CV_DNN_REGISTER_LAYER_CLASS(Sigmoid, SigmoidLayer);
CV_DNN_REGISTER_LAYER_CLASS(TanH, TanHLayer);
CV_DNN_REGISTER_LAYER_CLASS(Swish, SwishLayer);
CV_DNN_REGISTER_LAYER_CLASS(Mish, MishLayer);
2017-08-01 21:58:34 +08:00
CV_DNN_REGISTER_LAYER_CLASS(ELU, ELULayer);
2017-06-28 01:34:17 +08:00
CV_DNN_REGISTER_LAYER_CLASS(BNLL, BNLLLayer);
CV_DNN_REGISTER_LAYER_CLASS(AbsVal, AbsLayer);
CV_DNN_REGISTER_LAYER_CLASS(Power, PowerLayer);
2021-02-21 00:46:00 +08:00
CV_DNN_REGISTER_LAYER_CLASS(Exp, ExpLayer);
CV_DNN_REGISTER_LAYER_CLASS(Ceil, CeilLayer);
CV_DNN_REGISTER_LAYER_CLASS(Floor, FloorLayer);
CV_DNN_REGISTER_LAYER_CLASS(Log, LogLayer);
CV_DNN_REGISTER_LAYER_CLASS(Round, RoundLayer);
CV_DNN_REGISTER_LAYER_CLASS(Sqrt, SqrtLayer);
CV_DNN_REGISTER_LAYER_CLASS(Not, NotLayer);
CV_DNN_REGISTER_LAYER_CLASS(Acos, AcosLayer);
CV_DNN_REGISTER_LAYER_CLASS(Acosh, AcoshLayer);
CV_DNN_REGISTER_LAYER_CLASS(Asin, AsinLayer);
CV_DNN_REGISTER_LAYER_CLASS(Asinh, AsinhLayer);
CV_DNN_REGISTER_LAYER_CLASS(Atan, AtanLayer);
CV_DNN_REGISTER_LAYER_CLASS(Atanh, AtanhLayer);
CV_DNN_REGISTER_LAYER_CLASS(Cos, CosLayer);
CV_DNN_REGISTER_LAYER_CLASS(Cosh, CoshLayer);
CV_DNN_REGISTER_LAYER_CLASS(Erf, ErfLayer);
CV_DNN_REGISTER_LAYER_CLASS(HardSwish, HardSwishLayer);
CV_DNN_REGISTER_LAYER_CLASS(Sin, SinLayer);
CV_DNN_REGISTER_LAYER_CLASS(Sinh, SinhLayer);
CV_DNN_REGISTER_LAYER_CLASS(Sign, SignLayer);
CV_DNN_REGISTER_LAYER_CLASS(Shrink, ShrinkLayer);
CV_DNN_REGISTER_LAYER_CLASS(Softplus, SoftplusLayer);
CV_DNN_REGISTER_LAYER_CLASS(Softsign, SoftsignLayer);
CV_DNN_REGISTER_LAYER_CLASS(Tan, TanLayer);
CV_DNN_REGISTER_LAYER_CLASS(Celu, CeluLayer);
CV_DNN_REGISTER_LAYER_CLASS(HardSigmoid, HardSigmoidLayer);
CV_DNN_REGISTER_LAYER_CLASS(Selu, SeluLayer);
CV_DNN_REGISTER_LAYER_CLASS(ThresholdedRelu,ThresholdedReluLayer);
CV_DNN_REGISTER_LAYER_CLASS(Gelu, GeluLayer);
CV_DNN_REGISTER_LAYER_CLASS(GeluApproximation, GeluApproximationLayer);
2017-06-28 01:34:17 +08:00
CV_DNN_REGISTER_LAYER_CLASS(BatchNorm, BatchNormLayer);
CV_DNN_REGISTER_LAYER_CLASS(MaxUnpool, MaxUnpoolLayer);
CV_DNN_REGISTER_LAYER_CLASS(Dropout, BlankLayer);
CV_DNN_REGISTER_LAYER_CLASS(Identity, BlankLayer);
CV_DNN_REGISTER_LAYER_CLASS(Silence, BlankLayer);
CV_DNN_REGISTER_LAYER_CLASS(Const, ConstLayer);
2021-12-07 00:33:59 +08:00
CV_DNN_REGISTER_LAYER_CLASS(Arg, ArgLayer);
CV_DNN_REGISTER_LAYER_CLASS(Reciprocal, ReciprocalLayer);
2022-03-16 23:41:39 +08:00
CV_DNN_REGISTER_LAYER_CLASS(Gather, GatherLayer);
CV_DNN_REGISTER_LAYER_CLASS(GatherElements, GatherElementsLayer);
CV_DNN_REGISTER_LAYER_CLASS(LayerNormalization, LayerNormLayer);
CV_DNN_REGISTER_LAYER_CLASS(Expand, ExpandLayer);
CV_DNN_REGISTER_LAYER_CLASS(InstanceNormalization, InstanceNormLayer);
2017-06-28 01:34:17 +08:00
CV_DNN_REGISTER_LAYER_CLASS(Crop, CropLayer);
CV_DNN_REGISTER_LAYER_CLASS(Eltwise, EltwiseLayer);
CV_DNN_REGISTER_LAYER_CLASS(NaryEltwise, NaryEltwiseLayer);
2017-06-28 01:34:17 +08:00
CV_DNN_REGISTER_LAYER_CLASS(Permute, PermuteLayer);
2018-06-19 19:35:07 +08:00
CV_DNN_REGISTER_LAYER_CLASS(ShuffleChannel, ShuffleChannelLayer);
2017-06-28 01:34:17 +08:00
CV_DNN_REGISTER_LAYER_CLASS(PriorBox, PriorBoxLayer);
2017-11-27 20:59:31 +08:00
CV_DNN_REGISTER_LAYER_CLASS(PriorBoxClustered, PriorBoxLayer);
CV_DNN_REGISTER_LAYER_CLASS(Reorg, ReorgLayer);
CV_DNN_REGISTER_LAYER_CLASS(Region, RegionLayer);
2017-06-28 01:34:17 +08:00
CV_DNN_REGISTER_LAYER_CLASS(DetectionOutput, DetectionOutputLayer);
CV_DNN_REGISTER_LAYER_CLASS(NormalizeBBox, NormalizeBBoxLayer);
CV_DNN_REGISTER_LAYER_CLASS(Normalize, NormalizeBBoxLayer);
CV_DNN_REGISTER_LAYER_CLASS(Shift, ShiftLayer);
CV_DNN_REGISTER_LAYER_CLASS(Padding, PaddingLayer);
2017-12-14 00:06:30 +08:00
CV_DNN_REGISTER_LAYER_CLASS(Proposal, ProposalLayer);
2017-06-28 01:34:17 +08:00
CV_DNN_REGISTER_LAYER_CLASS(Scale, ScaleLayer);
CV_DNN_REGISTER_LAYER_CLASS(Compare, CompareLayer);
CV_DNN_REGISTER_LAYER_CLASS(DataAugmentation, DataAugmentationLayer);
CV_DNN_REGISTER_LAYER_CLASS(Correlation, CorrelationLayer);
CV_DNN_REGISTER_LAYER_CLASS(Accum, AccumLayer);
CV_DNN_REGISTER_LAYER_CLASS(FlowWarp, FlowWarpLayer);
2017-08-25 19:45:03 +08:00
CV_DNN_REGISTER_LAYER_CLASS(LSTM, LSTMLayer);
CV_DNN_REGISTER_LAYER_CLASS(GRU, GRULayer);
CV_DNN_REGISTER_LAYER_CLASS(CumSum, CumSumLayer);
Merge pull request #24037 from Abdurrahheem:ash/dev_einsum Add Support for Einsum Layer #24037 ### This PR adding support for [Einsum Layer](https://pytorch.org/docs/stable/generated/torch.einsum.html) (in progress). This PR is currently not to be merged but only reviewed. Test cases are located in [#1090](https://github.com/opencv/opencv_extra/pull/1090)RP in OpenCV extra **DONE**: - [x] 2-5D GMM support added - [x] Matrix transpose support added - [x] Reduction type comupte 'ij->j' - [x] 2nd shape computation - during forward **Next PRs**: - [ ] Broadcasting reduction "...ii ->...i" - [ ] Add lazy shape deduction. "...ij, ...jk->...ik" - [ ] Add implicit output computation support. "bij,bjk ->" (output subscripts should be "bik") - [ ] Add support for CUDA backend - [ ] BatchWiseMultiply optimize **Later in 5.x version (requires support for 1D matrices)**: - [ ] Add 1D vector multiplication support - [ ] Inter product "i, i" (problems with 1D shapes) ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
2023-09-22 16:25:02 +08:00
CV_DNN_REGISTER_LAYER_CLASS(Einsum, EinsumLayer);
CV_DNN_REGISTER_LAYER_CLASS(Scatter, ScatterLayer);
CV_DNN_REGISTER_LAYER_CLASS(ScatterND, ScatterNDLayer);
2022-11-15 14:29:30 +08:00
CV_DNN_REGISTER_LAYER_CLASS(Tile, TileLayer);
CV_DNN_REGISTER_LAYER_CLASS(Quantize, QuantizeLayer);
CV_DNN_REGISTER_LAYER_CLASS(Dequantize, DequantizeLayer);
CV_DNN_REGISTER_LAYER_CLASS(Requantize, RequantizeLayer);
CV_DNN_REGISTER_LAYER_CLASS(ConvolutionInt8, ConvolutionLayerInt8);
CV_DNN_REGISTER_LAYER_CLASS(InnerProductInt8, InnerProductLayerInt8);
CV_DNN_REGISTER_LAYER_CLASS(PoolingInt8, PoolingLayerInt8);
CV_DNN_REGISTER_LAYER_CLASS(EltwiseInt8, EltwiseLayerInt8);
CV_DNN_REGISTER_LAYER_CLASS(BatchNormInt8, BatchNormLayerInt8);
CV_DNN_REGISTER_LAYER_CLASS(ScaleInt8, ScaleLayerInt8);
CV_DNN_REGISTER_LAYER_CLASS(ShiftInt8, ShiftLayerInt8);
CV_DNN_REGISTER_LAYER_CLASS(ReLUInt8, ActivationLayerInt8);
CV_DNN_REGISTER_LAYER_CLASS(ReLU6Int8, ActivationLayerInt8);
CV_DNN_REGISTER_LAYER_CLASS(SigmoidInt8, ActivationLayerInt8);
CV_DNN_REGISTER_LAYER_CLASS(TanHInt8, ActivationLayerInt8);
CV_DNN_REGISTER_LAYER_CLASS(SwishInt8, ActivationLayerInt8);
CV_DNN_REGISTER_LAYER_CLASS(MishInt8, ActivationLayerInt8);
CV_DNN_REGISTER_LAYER_CLASS(ELUInt8, ActivationLayerInt8);
CV_DNN_REGISTER_LAYER_CLASS(BNLLInt8, ActivationLayerInt8);
CV_DNN_REGISTER_LAYER_CLASS(AbsValInt8, ActivationLayerInt8);
CV_DNN_REGISTER_LAYER_CLASS(SoftmaxInt8, SoftmaxLayerInt8);
CV_DNN_REGISTER_LAYER_CLASS(SoftMaxInt8, SoftmaxLayerInt8);
CV_DNN_REGISTER_LAYER_CLASS(ConcatInt8, ConcatLayer);
CV_DNN_REGISTER_LAYER_CLASS(FlattenInt8, FlattenLayer);
CV_DNN_REGISTER_LAYER_CLASS(PaddingInt8, PaddingLayer);
CV_DNN_REGISTER_LAYER_CLASS(BlankInt8, BlankLayer);
CV_DNN_REGISTER_LAYER_CLASS(DropoutInt8, BlankLayer);
CV_DNN_REGISTER_LAYER_CLASS(IdentityInt8, BlankLayer);
CV_DNN_REGISTER_LAYER_CLASS(SilenceInt8, BlankLayer);
CV_DNN_REGISTER_LAYER_CLASS(ConstInt8, ConstLayer);
CV_DNN_REGISTER_LAYER_CLASS(ReshapeInt8, ReshapeLayer);
CV_DNN_REGISTER_LAYER_CLASS(ResizeInt8, ResizeLayer);
CV_DNN_REGISTER_LAYER_CLASS(SplitInt8, SplitLayer);
CV_DNN_REGISTER_LAYER_CLASS(SliceInt8, SliceLayer);
CV_DNN_REGISTER_LAYER_CLASS(CropInt8, CropLayer);
CV_DNN_REGISTER_LAYER_CLASS(PermuteInt8, PermuteLayer);
CV_DNN_REGISTER_LAYER_CLASS(ReorgInt8, ReorgLayer);
CV_DNN_REGISTER_LAYER_CLASS(ShuffleChannelInt8, ShuffleChannelLayer);
}
CV__DNN_INLINE_NS_END
}} // namespace