mirror of
https://github.com/opencv/opencv.git
synced 2024-11-28 05:06:29 +08:00
Merge pull request #21190 from rogday:acts
This commit is contained in:
commit
30df77fa4c
@ -738,6 +738,40 @@ CV__DNN_INLINE_NS_BEGIN
|
||||
static Ptr<TanLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS CeluLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
float alpha;
|
||||
|
||||
static Ptr<CeluLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS HardSigmoidLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
float alpha;
|
||||
float beta;
|
||||
|
||||
static Ptr<HardSigmoidLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS SeluLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
float alpha;
|
||||
float gamma;
|
||||
|
||||
static Ptr<SeluLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS ThresholdedReluLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
float alpha;
|
||||
|
||||
static Ptr<ThresholdedReluLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS ActivationLayerInt8 : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
|
@ -233,6 +233,26 @@ void tan(const Stream& stream, Span<T> output, View<T> input) {
|
||||
generic_op<T, TanFunctor<T>>(stream, output, input);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void celu(const Stream& stream, Span<T> output, View<T> input, T alpha) {
|
||||
generic_op<T, CeluFunctor<T>>(stream, output, input, {alpha});
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void hardsigmoid(const Stream& stream, Span<T> output, View<T> input, T alpha, T beta) {
|
||||
generic_op<T, HardSigmoidFunctor<T>>(stream, output, input, {alpha, beta});
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void selu(const Stream& stream, Span<T> output, View<T> input, T alpha, T gamma) {
|
||||
generic_op<T, SeluFunctor<T>>(stream, output, input, {alpha, gamma});
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void thresholdedrelu(const Stream& stream, Span<T> output, View<T> input, T alpha) {
|
||||
generic_op<T, ThresholdedReluFunctor<T>>(stream, output, input, {alpha});
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void abs(const Stream& stream, Span<T> output, View<T> input) {
|
||||
generic_op<T, AbsFunctor<T>>(stream, output, input);
|
||||
@ -286,6 +306,10 @@ template void sinh<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void softplus<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void softsign<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void tan<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void celu<__half>(const Stream&, Span<__half>, View<__half>, __half);
|
||||
template void hardsigmoid<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
|
||||
template void selu<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
|
||||
template void thresholdedrelu<__half>(const Stream&, Span<__half>, View<__half>, __half);
|
||||
template void power<__half>(const Stream&, Span<__half>, View<__half>, __half, __half, __half);
|
||||
template void exp<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
|
||||
#endif
|
||||
@ -321,6 +345,10 @@ template void sinh<float>(const Stream&, Span<float>, View<float>);
|
||||
template void softplus<float>(const Stream&, Span<float>, View<float>);
|
||||
template void softsign<float>(const Stream&, Span<float>, View<float>);
|
||||
template void tan<float>(const Stream&, Span<float>, View<float>);
|
||||
template void celu<float>(const Stream&, Span<float>, View<float>, float);
|
||||
template void hardsigmoid<float>(const Stream&, Span<float>, View<float>, float, float);
|
||||
template void selu<float>(const Stream&, Span<float>, View<float>, float, float);
|
||||
template void thresholdedrelu<float>(const Stream&, Span<float>, View<float>, float);
|
||||
template void power<float>(const Stream&, Span<float>, View<float>, float, float, float);
|
||||
template void exp<float>(const Stream&, Span<float>, View<float>, float, float);
|
||||
|
||||
|
@ -528,6 +528,84 @@ struct TanFunctor {
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct CeluFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() : alpha(1) { }
|
||||
CUDA4DNN_HOST_DEVICE Params(T alpha_) : alpha(alpha_) { }
|
||||
T alpha;
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE CeluFunctor() : CeluFunctor(Params{}) { }
|
||||
CUDA4DNN_DEVICE CeluFunctor(const Params& params) : alpha{params.alpha} { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
using csl::device::min;
|
||||
using csl::device::max;
|
||||
using csl::device::expm1;
|
||||
return max(T(0), value) + min(T(0), alpha * expm1(value / alpha));
|
||||
}
|
||||
|
||||
T alpha;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct HardSigmoidFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() : alpha(0.2), beta(0.5) { }
|
||||
CUDA4DNN_HOST_DEVICE Params(T alpha_, T beta_) : alpha(alpha_), beta(beta_) { }
|
||||
T alpha, beta;
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE HardSigmoidFunctor() : HardSigmoidFunctor(Params{}) { }
|
||||
CUDA4DNN_DEVICE HardSigmoidFunctor(const Params& params): alpha{params.alpha}, beta{params.beta} { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
using csl::device::clamp;
|
||||
return clamp(alpha * value + beta, T(0), T(1));
|
||||
}
|
||||
|
||||
T alpha, beta;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct SeluFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() : alpha(1.6732632423543772848170429916717),
|
||||
gamma(1.0507009873554804934193349852946) { }
|
||||
CUDA4DNN_HOST_DEVICE Params(T alpha_, T gamma_) : alpha(alpha_), gamma(gamma_) { }
|
||||
T alpha, gamma;
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE SeluFunctor() : SeluFunctor(Params{}) { }
|
||||
CUDA4DNN_DEVICE SeluFunctor(const Params& params): alpha{params.alpha}, gamma{params.gamma} { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
using csl::device::expm1;
|
||||
return gamma * (value > T(0) ? value : alpha * expm1(value));
|
||||
}
|
||||
|
||||
T alpha, gamma;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct ThresholdedReluFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() : alpha(1) { }
|
||||
CUDA4DNN_HOST_DEVICE Params(T alpha_) : alpha(alpha_) { }
|
||||
T alpha;
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE ThresholdedReluFunctor() : ThresholdedReluFunctor(Params{}) { }
|
||||
CUDA4DNN_DEVICE ThresholdedReluFunctor(const Params& params) : alpha{params.alpha} { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
return (value > alpha) ? value : T(0);
|
||||
}
|
||||
|
||||
T alpha;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct PowerFunctor {
|
||||
struct Params {
|
||||
|
@ -105,6 +105,18 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
template <class T>
|
||||
void tan(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
|
||||
|
||||
template <class T>
|
||||
void celu(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, T alpha);
|
||||
|
||||
template <class T>
|
||||
void hardsigmoid(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, T alpha, T beta);
|
||||
|
||||
template <class T>
|
||||
void selu(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, T alpha, T gamma);
|
||||
|
||||
template <class T>
|
||||
void thresholdedrelu(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, T alpha);
|
||||
|
||||
template <class T>
|
||||
void power(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, T exp, T scale, T shift);
|
||||
|
||||
|
@ -490,6 +490,68 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
||||
csl::Stream stream;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class CeluOp final : public BaseOp<CeluOp, T> {
|
||||
public:
|
||||
CeluOp(csl::Stream stream_, T alpha_) : stream(std::move(stream_)), alpha{ alpha_ } { }
|
||||
|
||||
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
|
||||
{
|
||||
kernels::celu<T>(stream, output, input, alpha);
|
||||
}
|
||||
|
||||
private:
|
||||
csl::Stream stream;
|
||||
const T alpha;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class HardSigmoidOp final : public BaseOp<HardSigmoidOp, T> {
|
||||
public:
|
||||
HardSigmoidOp(csl::Stream stream_, T alpha_, T beta_)
|
||||
: stream(std::move(stream_)), alpha{ alpha_ }, beta{ beta_ } { }
|
||||
|
||||
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
|
||||
{
|
||||
kernels::hardsigmoid<T>(stream, output, input, alpha, beta);
|
||||
}
|
||||
|
||||
private:
|
||||
csl::Stream stream;
|
||||
const T alpha, beta;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class SeluOp final : public BaseOp<SeluOp, T> {
|
||||
public:
|
||||
SeluOp(csl::Stream stream_, T alpha_, T gamma_)
|
||||
: stream(std::move(stream_)), alpha{ alpha_ }, gamma{ gamma_ } { }
|
||||
|
||||
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
|
||||
{
|
||||
kernels::selu<T>(stream, output, input, alpha, gamma);
|
||||
}
|
||||
|
||||
private:
|
||||
csl::Stream stream;
|
||||
const T alpha, gamma;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class ThresholdedReluOp final : public BaseOp<ThresholdedReluOp, T> {
|
||||
public:
|
||||
ThresholdedReluOp(csl::Stream stream_, T alpha_) : stream(std::move(stream_)), alpha{ alpha_ } { }
|
||||
|
||||
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
|
||||
{
|
||||
kernels::thresholdedrelu<T>(stream, output, input, alpha);
|
||||
}
|
||||
|
||||
private:
|
||||
csl::Stream stream;
|
||||
const T alpha;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class PowerOp final : public BaseOp<PowerOp, T> {
|
||||
public:
|
||||
|
@ -132,6 +132,10 @@ void initializeLayerFactory()
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Softplus, SoftplusLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Softsign, SoftsignLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Tan, TanLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Celu, CeluLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(HardSigmoid, HardSigmoidLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Selu, SeluLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(ThresholdedRelu,ThresholdedReluLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(BatchNorm, BatchNormLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(MaxUnpool, MaxUnpoolLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Dropout, BlankLayer);
|
||||
|
@ -71,6 +71,7 @@ namespace dnn
|
||||
|
||||
using std::abs;
|
||||
using std::exp;
|
||||
using std::expm1;
|
||||
using std::tanh;
|
||||
using std::pow;
|
||||
using std::ceil;
|
||||
@ -728,6 +729,20 @@ struct BaseDefaultFunctor : public BaseFunctor
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_HALIDE
|
||||
void attachHalide(const Halide::Expr& input, Halide::Func& top)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
}
|
||||
#endif // HAVE_HALIDE
|
||||
|
||||
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
|
||||
InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
|
||||
{
|
||||
@ -746,8 +761,6 @@ struct BaseDefaultFunctor : public BaseFunctor
|
||||
ml::Operand initWebnnAPI(const ml::GraphBuilder& builder, const ml::Operand& input)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
ml::Operand operand;
|
||||
return operand;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -759,20 +772,6 @@ struct BaseDefaultFunctor : public BaseFunctor
|
||||
}
|
||||
#endif // HAVE_VULKAN
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_HALIDE
|
||||
void attachHalide(const Halide::Expr& input, Halide::Func& top)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
}
|
||||
#endif // HAVE_HALIDE
|
||||
|
||||
private:
|
||||
static const char* const ocl_kernel_name;
|
||||
};
|
||||
@ -823,15 +822,6 @@ struct TanHFunctor : public BaseDefaultFunctor<TanHFunctor>
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
#ifdef HAVE_WEBNN
|
||||
ml::Operand initWebnnAPI(const ml::GraphBuilder& builder, const ml::Operand& input)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
ml::Operand operand;
|
||||
return operand;
|
||||
}
|
||||
#endif
|
||||
|
||||
int64 getFLOPSPerElement() const { return 1; }
|
||||
};
|
||||
|
||||
@ -935,15 +925,6 @@ struct MishFunctor : public BaseDefaultFunctor<MishFunctor>
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
#ifdef HAVE_WEBNN
|
||||
ml::Operand initWebnnAPI(const ml::GraphBuilder& builder, const ml::Operand& input)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
ml::Operand operand;
|
||||
return operand;
|
||||
}
|
||||
#endif
|
||||
|
||||
int64 getFLOPSPerElement() const { return 3; }
|
||||
};
|
||||
|
||||
@ -996,15 +977,6 @@ struct SigmoidFunctor : public BaseDefaultFunctor<SigmoidFunctor>
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
#ifdef HAVE_WEBNN
|
||||
ml::Operand initWebnnAPI(const ml::GraphBuilder& builder, const ml::Operand& input)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
ml::Operand operand;
|
||||
return operand;
|
||||
}
|
||||
#endif
|
||||
|
||||
int64 getFLOPSPerElement() const { return 3; }
|
||||
};
|
||||
|
||||
@ -1123,15 +1095,6 @@ struct AbsValFunctor : public BaseDefaultFunctor<AbsValFunctor>
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
#ifdef HAVE_WEBNN
|
||||
ml::Operand initWebnnAPI(const ml::GraphBuilder& builder, const ml::Operand& input)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
ml::Operand operand;
|
||||
return operand;
|
||||
}
|
||||
#endif
|
||||
|
||||
int64 getFLOPSPerElement() const { return 1; }
|
||||
};
|
||||
|
||||
@ -1261,15 +1224,6 @@ struct LogFunctor : public BaseDefaultFunctor<LogFunctor>
|
||||
return log(x);
|
||||
}
|
||||
|
||||
#ifdef HAVE_WEBNN
|
||||
ml::Operand initWebnnAPI(const ml::GraphBuilder& builder, const ml::Operand& input)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
ml::Operand operand;
|
||||
return operand;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
|
||||
{
|
||||
@ -1367,15 +1321,6 @@ struct SqrtFunctor : public BaseDefaultFunctor<SqrtFunctor>
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
#ifdef HAVE_WEBNN
|
||||
ml::Operand initWebnnAPI(const ml::GraphBuilder& builder, const ml::Operand& input)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
ml::Operand operand;
|
||||
return operand;
|
||||
}
|
||||
#endif
|
||||
|
||||
int64 getFLOPSPerElement() const { return 1; }
|
||||
};
|
||||
|
||||
@ -1822,6 +1767,156 @@ struct TanFunctor : public BaseDefaultFunctor<TanFunctor>
|
||||
template<>
|
||||
const char* const BaseDefaultFunctor<TanFunctor>::ocl_kernel_name = "TanForward";
|
||||
|
||||
struct CeluFunctor : public BaseDefaultFunctor<CeluFunctor>
|
||||
{
|
||||
typedef CeluLayer Layer;
|
||||
|
||||
float alpha;
|
||||
|
||||
explicit CeluFunctor(float alpha_ = 1.f) : alpha(alpha_) {}
|
||||
|
||||
bool supportBackend(int backendId, int)
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
|
||||
}
|
||||
|
||||
inline float calculate(float x) const
|
||||
{
|
||||
return max(0.f, x) + min(0.f, alpha * expm1(x / alpha));
|
||||
}
|
||||
|
||||
inline void setKernelParams(ocl::Kernel& kernel) const
|
||||
{
|
||||
kernel.set(3, alpha);
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
|
||||
{
|
||||
return make_cuda_node<cuda4dnn::CeluOp>(target, stream, alpha);
|
||||
}
|
||||
#endif
|
||||
|
||||
int64 getFLOPSPerElement() const { return 1; }
|
||||
};
|
||||
|
||||
template<>
|
||||
const char* const BaseDefaultFunctor<CeluFunctor>::ocl_kernel_name = "CeluForward";
|
||||
|
||||
struct HardSigmoidFunctor : public BaseDefaultFunctor<HardSigmoidFunctor>
|
||||
{
|
||||
typedef HardSigmoidLayer Layer;
|
||||
|
||||
float alpha;
|
||||
float beta;
|
||||
|
||||
explicit HardSigmoidFunctor(float alpha_ = 0.2f, float beta_ = 0.5f) : alpha(alpha_), beta(beta_) {}
|
||||
|
||||
bool supportBackend(int backendId, int)
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
|
||||
}
|
||||
|
||||
inline float calculate(float x) const
|
||||
{
|
||||
return max(0.f, min(1.f, alpha * x + beta));
|
||||
}
|
||||
|
||||
inline void setKernelParams(ocl::Kernel& kernel) const
|
||||
{
|
||||
kernel.set(3, alpha);
|
||||
kernel.set(4, beta);
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
|
||||
{
|
||||
return make_cuda_node<cuda4dnn::HardSigmoidOp>(target, stream, alpha, beta);
|
||||
}
|
||||
#endif
|
||||
|
||||
int64 getFLOPSPerElement() const { return 1; }
|
||||
};
|
||||
|
||||
template<>
|
||||
const char* const BaseDefaultFunctor<HardSigmoidFunctor>::ocl_kernel_name = "HardSigmoidForward";
|
||||
|
||||
struct SeluFunctor : public BaseDefaultFunctor<SeluFunctor>
|
||||
{
|
||||
typedef SeluLayer Layer;
|
||||
|
||||
float alpha;
|
||||
float gamma;
|
||||
|
||||
explicit SeluFunctor(float alpha_ = 1.67326319217681884765625f,
|
||||
float gamma_ = 1.05070102214813232421875f) : alpha(alpha_), gamma(gamma_) {}
|
||||
|
||||
bool supportBackend(int backendId, int)
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
|
||||
}
|
||||
|
||||
inline float calculate(float x) const
|
||||
{
|
||||
return gamma * (x > 0.f ? x : alpha * expm1(x));
|
||||
}
|
||||
|
||||
inline void setKernelParams(ocl::Kernel& kernel) const
|
||||
{
|
||||
kernel.set(3, alpha);
|
||||
kernel.set(4, gamma);
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
|
||||
{
|
||||
return make_cuda_node<cuda4dnn::SeluOp>(target, stream, alpha, gamma);
|
||||
}
|
||||
#endif
|
||||
|
||||
int64 getFLOPSPerElement() const { return 1; }
|
||||
};
|
||||
|
||||
template<>
|
||||
const char* const BaseDefaultFunctor<SeluFunctor>::ocl_kernel_name = "SeluForward";
|
||||
|
||||
struct ThresholdedReluFunctor : public BaseDefaultFunctor<ThresholdedReluFunctor>
|
||||
{
|
||||
typedef ThresholdedReluLayer Layer;
|
||||
|
||||
float alpha;
|
||||
|
||||
explicit ThresholdedReluFunctor(float alpha_ = 1.f) : alpha(alpha_) {}
|
||||
|
||||
|
||||
bool supportBackend(int backendId, int)
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
|
||||
}
|
||||
|
||||
inline float calculate(float x) const
|
||||
{
|
||||
return x > alpha ? x : 0.f;
|
||||
}
|
||||
|
||||
inline void setKernelParams(ocl::Kernel& kernel) const
|
||||
{
|
||||
kernel.set(3, alpha);
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
|
||||
{
|
||||
return make_cuda_node<cuda4dnn::ThresholdedReluOp>(target, stream, alpha);
|
||||
}
|
||||
#endif
|
||||
|
||||
int64 getFLOPSPerElement() const { return 1; }
|
||||
};
|
||||
|
||||
template<>
|
||||
const char* const BaseDefaultFunctor<ThresholdedReluFunctor>::ocl_kernel_name = "ThresholdedReluForward";
|
||||
|
||||
struct PowerFunctor : public BaseFunctor
|
||||
{
|
||||
typedef PowerLayer Layer;
|
||||
@ -2074,15 +2169,6 @@ struct ExpFunctor : public BaseDefaultFunctor<ExpFunctor>
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
#ifdef HAVE_WEBNN
|
||||
ml::Operand initWebnnAPI(const ml::GraphBuilder& builder, const ml::Operand& input)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
ml::Operand operand;
|
||||
return operand;
|
||||
}
|
||||
#endif
|
||||
|
||||
int64 getFLOPSPerElement() const { return 3; }
|
||||
};
|
||||
|
||||
@ -2489,6 +2575,50 @@ Ptr<TanLayer> TanLayer::create(const LayerParams& params)
|
||||
return l;
|
||||
}
|
||||
|
||||
Ptr<CeluLayer> CeluLayer::create(const LayerParams& params)
|
||||
{
|
||||
float alpha = params.get<float>("alpha", 1.f);
|
||||
Ptr<CeluLayer> l(new ElementWiseLayer<CeluFunctor>(CeluFunctor(alpha)));
|
||||
l->setParamsFrom(params);
|
||||
l->alpha = alpha;
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
Ptr<HardSigmoidLayer> HardSigmoidLayer::create(const LayerParams& params)
|
||||
{
|
||||
float alpha = params.get<float>("alpha", 0.2f);
|
||||
float beta = params.get<float>("beta", 0.5f);
|
||||
Ptr<HardSigmoidLayer> l(new ElementWiseLayer<HardSigmoidFunctor>(HardSigmoidFunctor(alpha, beta)));
|
||||
l->setParamsFrom(params);
|
||||
l->alpha = alpha;
|
||||
l->beta = beta;
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
Ptr<SeluLayer> SeluLayer::create(const LayerParams& params)
|
||||
{
|
||||
float alpha = params.get<float>("alpha", 1.67326319217681884765625f);
|
||||
float gamma = params.get<float>("gamma", 1.05070102214813232421875f);
|
||||
Ptr<SeluLayer> l(new ElementWiseLayer<SeluFunctor>(SeluFunctor(alpha, gamma)));
|
||||
l->setParamsFrom(params);
|
||||
l->alpha = alpha;
|
||||
l->gamma = gamma;
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
Ptr<ThresholdedReluLayer> ThresholdedReluLayer::create(const LayerParams& params)
|
||||
{
|
||||
float alpha = params.get<float>("alpha", 1.f);
|
||||
Ptr<ThresholdedReluLayer> l(new ElementWiseLayer<ThresholdedReluFunctor>(ThresholdedReluFunctor(alpha)));
|
||||
l->setParamsFrom(params);
|
||||
l->alpha = alpha;
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
Ptr<PowerLayer> PowerLayer::create(const LayerParams& params)
|
||||
{
|
||||
float power = params.get<float>("power", 1.0f);
|
||||
|
@ -242,6 +242,70 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class CeluSubgraph : public Subgraph
|
||||
{
|
||||
public:
|
||||
CeluSubgraph() : alpha(1.f)
|
||||
{
|
||||
int input = addNodeToMatch("");
|
||||
int div = addNodeToMatch("Div", input, addNodeToMatch(""));
|
||||
int elu = addNodeToMatch("Elu", div);
|
||||
addNodeToMatch("Mul", addNodeToMatch(""), elu);
|
||||
setFusedNode("Celu", input);
|
||||
}
|
||||
|
||||
static float extractAlpha(const Ptr<ImportGraphWrapper>& net, int node_id, int input_id)
|
||||
{
|
||||
const Ptr<ImportNodeWrapper> node = net->getNode(node_id);
|
||||
int const_id = getInputNodeId(net, node, input_id);
|
||||
Ptr<ImportNodeWrapper> alpha_ptr = net->getNode(const_id);
|
||||
opencv_onnx::NodeProto* alpha_node = alpha_ptr.dynamicCast<ONNXNodeWrapper>()->node;
|
||||
opencv_onnx::TensorProto alpha_proto = alpha_node->attribute(0).t();
|
||||
Mat alpha_mat = getMatFromTensor(alpha_proto);
|
||||
return *alpha_mat.ptr<float>();
|
||||
}
|
||||
|
||||
virtual bool match(const Ptr<ImportGraphWrapper>& net, int nodeId,
|
||||
std::vector<int>& matchedNodesIds,
|
||||
std::vector<int>& targetNodesIds) CV_OVERRIDE
|
||||
{
|
||||
if (Subgraph::match(net, nodeId, matchedNodesIds, targetNodesIds))
|
||||
{
|
||||
float alpha_div = extractAlpha(net, matchedNodesIds[0], 1);
|
||||
float alpha_mul = extractAlpha(net, matchedNodesIds[2], 0);
|
||||
float alpha_elu = 1.f;
|
||||
|
||||
Ptr<ImportNodeWrapper> elu_ptr = net->getNode(matchedNodesIds[1]);
|
||||
opencv_onnx::NodeProto* elu_node = elu_ptr.dynamicCast<ONNXNodeWrapper>()->node;
|
||||
|
||||
for (int i = 0; i < elu_node->attribute_size(); i++)
|
||||
{
|
||||
opencv_onnx::AttributeProto attr = elu_node->attribute(i);
|
||||
if (attr.name() != "alpha")
|
||||
continue;
|
||||
alpha_elu = attr.f();
|
||||
}
|
||||
|
||||
alpha = alpha_div;
|
||||
return alpha_elu == 1.f && alpha_div == alpha_mul;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual void finalize(const Ptr<ImportGraphWrapper>&,
|
||||
const Ptr<ImportNodeWrapper>& fusedNode,
|
||||
std::vector<Ptr<ImportNodeWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
opencv_onnx::NodeProto* node = fusedNode.dynamicCast<ONNXNodeWrapper>()->node;
|
||||
opencv_onnx::AttributeProto* alpha_attr = node->add_attribute();
|
||||
alpha_attr->set_name("alpha");
|
||||
alpha_attr->set_f(alpha);
|
||||
}
|
||||
|
||||
protected:
|
||||
float alpha;
|
||||
};
|
||||
|
||||
class NormalizeSubgraphBase : public Subgraph
|
||||
{
|
||||
public:
|
||||
@ -662,6 +726,7 @@ void simplifySubgraphs(opencv_onnx::GraphProto& net)
|
||||
subgraphs.push_back(makePtr<SoftMaxSubgraph2>());
|
||||
subgraphs.push_back(makePtr<LogSoftMaxSubgraph>());
|
||||
subgraphs.push_back(makePtr<HardSwishSubgraph>());
|
||||
subgraphs.push_back(makePtr<CeluSubgraph>());
|
||||
subgraphs.push_back(makePtr<NormalizeSubgraph1>());
|
||||
subgraphs.push_back(makePtr<NormalizeSubgraph2>());
|
||||
subgraphs.push_back(makePtr<NormalizeSubgraph2_2>());
|
||||
|
@ -272,3 +272,37 @@ __kernel void TanForward(const int n, __global T* in, __global T* out) {
|
||||
if(index < n)
|
||||
out[index] = tan(in[index]);
|
||||
}
|
||||
|
||||
__kernel void CeluForward(const int n, __global T* in, __global T* out,
|
||||
const KERNEL_ARG_DTYPE alpha)
|
||||
{
|
||||
int index = get_global_id(0);
|
||||
if(index < n)
|
||||
out[index] = max(0.f, in[index]) + min(0.f, alpha * expm1(in[index] / alpha));
|
||||
}
|
||||
|
||||
__kernel void HardSigmoidForward(const int n, __global T* in, __global T* out,
|
||||
const KERNEL_ARG_DTYPE alpha,
|
||||
const KERNEL_ARG_DTYPE beta)
|
||||
{
|
||||
int index = get_global_id(0);
|
||||
if(index < n)
|
||||
out[index] = max(0.f, min(1.f, alpha * in[index] + beta));
|
||||
}
|
||||
|
||||
__kernel void SeluForward(const int n, __global T* in, __global T* out,
|
||||
const KERNEL_ARG_DTYPE alpha,
|
||||
const KERNEL_ARG_DTYPE gamma)
|
||||
{
|
||||
int index = get_global_id(0);
|
||||
if(index < n)
|
||||
out[index] = gamma * (in[index] > 0.f ? in[index] : alpha * expm1(in[index]));
|
||||
}
|
||||
|
||||
__kernel void ThresholdedReluForward(const int n, __global T* in, __global T* out,
|
||||
const KERNEL_ARG_DTYPE alpha)
|
||||
{
|
||||
int index = get_global_id(0);
|
||||
if(index < n)
|
||||
out[index] = (in[index] > alpha ? in[index] : 0.f);
|
||||
}
|
||||
|
@ -57,7 +57,6 @@
|
||||
"test_castlike_FLOAT_to_FLOAT16_expanded",
|
||||
"test_castlike_FLOAT_to_STRING",
|
||||
"test_castlike_STRING_to_FLOAT",
|
||||
"test_celu",
|
||||
"test_clip",
|
||||
"test_clip_default_inbounds",
|
||||
"test_clip_default_int8_inbounds",
|
||||
@ -173,9 +172,6 @@
|
||||
"test_hardmax_example",
|
||||
"test_hardmax_negative_axis",
|
||||
"test_hardmax_one_hot",
|
||||
"test_hardsigmoid",
|
||||
"test_hardsigmoid_default",
|
||||
"test_hardsigmoid_example",
|
||||
"test_identity_opt",
|
||||
"test_identity_sequence",
|
||||
"test_if",
|
||||
@ -524,9 +520,6 @@
|
||||
"test_sce_sum_expanded",
|
||||
"test_sce_sum_log_prob",
|
||||
"test_sce_sum_log_prob_expanded",
|
||||
"test_selu",
|
||||
"test_selu_default",
|
||||
"test_selu_example",
|
||||
"test_sequence_insert_at_back",
|
||||
"test_sequence_insert_at_front",
|
||||
"test_shape",
|
||||
@ -579,9 +572,6 @@
|
||||
"test_tfidfvectorizer_tf_onlybigrams_levelempty",
|
||||
"test_tfidfvectorizer_tf_onlybigrams_skip5",
|
||||
"test_tfidfvectorizer_tf_uniandbigrams_skip5",
|
||||
"test_thresholdedrelu",
|
||||
"test_thresholdedrelu_default",
|
||||
"test_thresholdedrelu_example",
|
||||
"test_tile",
|
||||
"test_tile_precomputed",
|
||||
"test_top_k",
|
||||
|
Loading…
Reference in New Issue
Block a user