mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 09:25:45 +08:00
Add acos, acosh, asin, asinh, atan, atanh, cos, cosh, erf, hardswish, sin, sinh, softplus, softsign, tan layers
This commit is contained in:
parent
db4ab1c936
commit
1bd382c1d0
@ -648,6 +648,96 @@ CV__DNN_INLINE_NS_BEGIN
|
||||
static Ptr<NotLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS AcosLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
static Ptr<AcosLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS AcoshLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
static Ptr<AcoshLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS AsinLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
static Ptr<AsinLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS AsinhLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
static Ptr<AsinhLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS AtanLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
static Ptr<AtanLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS AtanhLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
static Ptr<AtanhLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS CosLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
static Ptr<CosLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS CoshLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
static Ptr<CoshLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS ErfLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
static Ptr<ErfLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS HardSwishLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
static Ptr<HardSwishLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS SinLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
static Ptr<SinLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS SinhLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
static Ptr<SinhLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS SoftplusLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
static Ptr<SoftplusLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS SoftsignLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
static Ptr<SoftsignLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS TanLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
static Ptr<TanLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS ActivationLayerInt8 : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
|
@ -158,6 +158,81 @@ void not_k(const Stream& stream, Span<T> output, View<T> input) {
|
||||
generic_op<T, NotFunctor<T>>(stream, output, input);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void acos(const Stream& stream, Span<T> output, View<T> input) {
|
||||
generic_op<T, AcosFunctor<T>>(stream, output, input);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void acosh(const Stream& stream, Span<T> output, View<T> input) {
|
||||
generic_op<T, AcoshFunctor<T>>(stream, output, input);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void asin(const Stream& stream, Span<T> output, View<T> input) {
|
||||
generic_op<T, AsinFunctor<T>>(stream, output, input);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void asinh(const Stream& stream, Span<T> output, View<T> input) {
|
||||
generic_op<T, AsinhFunctor<T>>(stream, output, input);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void atan(const Stream& stream, Span<T> output, View<T> input) {
|
||||
generic_op<T, AtanFunctor<T>>(stream, output, input);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void atanh(const Stream& stream, Span<T> output, View<T> input) {
|
||||
generic_op<T, AtanhFunctor<T>>(stream, output, input);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void cos(const Stream& stream, Span<T> output, View<T> input) {
|
||||
generic_op<T, CosFunctor<T>>(stream, output, input);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void cosh(const Stream& stream, Span<T> output, View<T> input) {
|
||||
generic_op<T, CoshFunctor<T>>(stream, output, input);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void erf(const Stream& stream, Span<T> output, View<T> input) {
|
||||
generic_op<T, ErfFunctor<T>>(stream, output, input);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void hardswish(const Stream& stream, Span<T> output, View<T> input) {
|
||||
generic_op<T, HardSwishFunctor<T>>(stream, output, input);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void sin(const Stream& stream, Span<T> output, View<T> input) {
|
||||
generic_op<T, SinFunctor<T>>(stream, output, input);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void sinh(const Stream& stream, Span<T> output, View<T> input) {
|
||||
generic_op<T, SinhFunctor<T>>(stream, output, input);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void softplus(const Stream& stream, Span<T> output, View<T> input) {
|
||||
generic_op<T, SoftplusFunctor<T>>(stream, output, input);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void softsign(const Stream& stream, Span<T> output, View<T> input) {
|
||||
generic_op<T, SoftsignFunctor<T>>(stream, output, input);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void tan(const Stream& stream, Span<T> output, View<T> input) {
|
||||
generic_op<T, TanFunctor<T>>(stream, output, input);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void abs(const Stream& stream, Span<T> output, View<T> input) {
|
||||
generic_op<T, AbsFunctor<T>>(stream, output, input);
|
||||
@ -196,6 +271,21 @@ template void log<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void rint<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void sqrt<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void not_k<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void acos<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void acosh<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void asin<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void asinh<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void atan<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void atanh<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void cos<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void cosh<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void erf<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void hardswish<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void sin<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void sinh<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void softplus<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void softsign<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void tan<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void power<__half>(const Stream&, Span<__half>, View<__half>, __half, __half, __half);
|
||||
template void exp<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
|
||||
#endif
|
||||
@ -216,6 +306,21 @@ template void log<float>(const Stream&, Span<float>, View<float>);
|
||||
template void rint<float>(const Stream&, Span<float>, View<float>);
|
||||
template void sqrt<float>(const Stream&, Span<float>, View<float>);
|
||||
template void not_k<float>(const Stream&, Span<float>, View<float>);
|
||||
template void acos<float>(const Stream&, Span<float>, View<float>);
|
||||
template void acosh<float>(const Stream&, Span<float>, View<float>);
|
||||
template void asin<float>(const Stream&, Span<float>, View<float>);
|
||||
template void asinh<float>(const Stream&, Span<float>, View<float>);
|
||||
template void atan<float>(const Stream&, Span<float>, View<float>);
|
||||
template void atanh<float>(const Stream&, Span<float>, View<float>);
|
||||
template void cos<float>(const Stream&, Span<float>, View<float>);
|
||||
template void cosh<float>(const Stream&, Span<float>, View<float>);
|
||||
template void erf<float>(const Stream&, Span<float>, View<float>);
|
||||
template void hardswish<float>(const Stream&, Span<float>, View<float>);
|
||||
template void sin<float>(const Stream&, Span<float>, View<float>);
|
||||
template void sinh<float>(const Stream&, Span<float>, View<float>);
|
||||
template void softplus<float>(const Stream&, Span<float>, View<float>);
|
||||
template void softsign<float>(const Stream&, Span<float>, View<float>);
|
||||
template void tan<float>(const Stream&, Span<float>, View<float>);
|
||||
template void power<float>(const Stream&, Span<float>, View<float>, float, float, float);
|
||||
template void exp<float>(const Stream&, Span<float>, View<float>, float, float);
|
||||
|
||||
|
@ -303,6 +303,231 @@ struct NotFunctor {
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct AcosFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE AcosFunctor() { }
|
||||
CUDA4DNN_DEVICE AcosFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
using csl::device::acos;
|
||||
return acos(value);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct AcoshFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE AcoshFunctor() { }
|
||||
CUDA4DNN_DEVICE AcoshFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
using csl::device::acosh;
|
||||
return acosh(value);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct AsinFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE AsinFunctor() { }
|
||||
CUDA4DNN_DEVICE AsinFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
using csl::device::asin;
|
||||
return asin(value);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct AsinhFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE AsinhFunctor() { }
|
||||
CUDA4DNN_DEVICE AsinhFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
using csl::device::asinh;
|
||||
return asinh(value);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct AtanFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE AtanFunctor() { }
|
||||
CUDA4DNN_DEVICE AtanFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
using csl::device::atan;
|
||||
return atan(value);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct AtanhFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE AtanhFunctor() { }
|
||||
CUDA4DNN_DEVICE AtanhFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
using csl::device::atanh;
|
||||
return atanh(value);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct CosFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE CosFunctor() { }
|
||||
CUDA4DNN_DEVICE CosFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
using csl::device::cos;
|
||||
return cos(value);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct CoshFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE CoshFunctor() { }
|
||||
CUDA4DNN_DEVICE CoshFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
using csl::device::cosh;
|
||||
return cosh(value);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct ErfFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE ErfFunctor() { }
|
||||
CUDA4DNN_DEVICE ErfFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
using csl::device::erf;
|
||||
return erf(value);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct HardSwishFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE HardSwishFunctor() { }
|
||||
CUDA4DNN_DEVICE HardSwishFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
using csl::device::clamp; // saturate?
|
||||
return value * clamp(value / static_cast<T>(6.f) + static_cast<T>(0.5f), static_cast<T>(0.f), static_cast<T>(1.f));
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct SinFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE SinFunctor() { }
|
||||
CUDA4DNN_DEVICE SinFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
using csl::device::sin;
|
||||
return sin(value);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct SinhFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE SinhFunctor() { }
|
||||
CUDA4DNN_DEVICE SinhFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
using csl::device::sinh;
|
||||
return sinh(value);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct SoftplusFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE SoftplusFunctor() { }
|
||||
CUDA4DNN_DEVICE SoftplusFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
using csl::device::log1pexp;
|
||||
return log1pexp(value);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct SoftsignFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE SoftsignFunctor() { }
|
||||
CUDA4DNN_DEVICE SoftsignFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
using csl::device::abs;
|
||||
return value / (static_cast<T>(1.f) + abs(value));
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct TanFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE TanFunctor() { }
|
||||
CUDA4DNN_DEVICE TanFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
using csl::device::tan;
|
||||
return tan(value);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct PowerFunctor {
|
||||
struct Params {
|
||||
|
@ -140,6 +140,90 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace de
|
||||
template <> inline __device__ __half rint(__half value) { return hrint(value); }
|
||||
#endif
|
||||
|
||||
template <class T> __device__ T acos(T value);
|
||||
template <> inline __device__ double acos(double value) { return ::acos(value); }
|
||||
template <> inline __device__ float acos(float value) { return acosf(value); }
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template <> inline __device__ __half acos(__half value) { return acosf(value); }
|
||||
#endif
|
||||
|
||||
template <class T> __device__ T acosh(T value);
|
||||
template <> inline __device__ double acosh(double value) { return ::acosh(value); }
|
||||
template <> inline __device__ float acosh(float value) { return acoshf(value); }
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template <> inline __device__ __half acosh(__half value) { return acoshf(value); }
|
||||
#endif
|
||||
|
||||
template <class T> __device__ T asin(T value);
|
||||
template <> inline __device__ double asin(double value) { return ::asin(value); }
|
||||
template <> inline __device__ float asin(float value) { return asinf(value); }
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template <> inline __device__ __half asin(__half value) { return asinf(value); }
|
||||
#endif
|
||||
|
||||
template <class T> __device__ T asinh(T value);
|
||||
template <> inline __device__ double asinh(double value) { return ::asinh(value); }
|
||||
template <> inline __device__ float asinh(float value) { return asinhf(value); }
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template <> inline __device__ __half asinh(__half value) { return asinhf(value); }
|
||||
#endif
|
||||
|
||||
template <class T> __device__ T atan(T value);
|
||||
template <> inline __device__ double atan(double value) { return ::atan(value); }
|
||||
template <> inline __device__ float atan(float value) { return atanf(value); }
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template <> inline __device__ __half atan(__half value) { return atanf(value); }
|
||||
#endif
|
||||
|
||||
template <class T> __device__ T atanh(T value);
|
||||
template <> inline __device__ double atanh(double value) { return ::atanh(value); }
|
||||
template <> inline __device__ float atanh(float value) { return atanhf(value); }
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template <> inline __device__ __half atanh(__half value) { return atanhf(value); }
|
||||
#endif
|
||||
|
||||
template <class T> __device__ T cos(T value);
|
||||
template <> inline __device__ double cos(double value) { return ::cos(value); }
|
||||
template <> inline __device__ float cos(float value) { return cosf(value); }
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template <> inline __device__ __half cos(__half value) { return hcos(value); }
|
||||
#endif
|
||||
|
||||
template <class T> __device__ T cosh(T value);
|
||||
template <> inline __device__ double cosh(double value) { return ::cosh(value); }
|
||||
template <> inline __device__ float cosh(float value) { return coshf(value); }
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template <> inline __device__ __half cosh(__half value) { return coshf(value); }
|
||||
#endif
|
||||
|
||||
template <class T> __device__ T erf(T value);
|
||||
template <> inline __device__ double erf(double value) { return ::erf(value); }
|
||||
template <> inline __device__ float erf(float value) { return erff(value); }
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template <> inline __device__ __half erf(__half value) { return erff(value); }
|
||||
#endif
|
||||
|
||||
template <class T> __device__ T sin(T value);
|
||||
template <> inline __device__ double sin(double value) { return ::sin(value); }
|
||||
template <> inline __device__ float sin(float value) { return sinf(value); }
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template <> inline __device__ __half sin(__half value) { return hsin(value); }
|
||||
#endif
|
||||
|
||||
template <class T> __device__ T sinh(T value);
|
||||
template <> inline __device__ double sinh(double value) { return ::sinh(value); }
|
||||
template <> inline __device__ float sinh(float value) { return sinhf(value); }
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template <> inline __device__ __half sinh(__half value) { return sinhf(value); }
|
||||
#endif
|
||||
|
||||
template <class T> __device__ T tan(T value);
|
||||
template <> inline __device__ double tan(double value) { return ::tan(value); }
|
||||
template <> inline __device__ float tan(float value) { return tanf(value); }
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template <> inline __device__ __half tan(__half value) { return tanf(value); }
|
||||
#endif
|
||||
|
||||
template <class T> __device__ T ceil(T value);
|
||||
template <> inline __device__ double ceil(double value) { return ::ceil(value); }
|
||||
template <> inline __device__ float ceil(float value) { return ceilf(value); }
|
||||
|
@ -60,6 +60,51 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
template <class T>
|
||||
void not_k(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
|
||||
|
||||
template <class T>
|
||||
void acos(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
|
||||
|
||||
template <class T>
|
||||
void acosh(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
|
||||
|
||||
template <class T>
|
||||
void asin(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
|
||||
|
||||
template <class T>
|
||||
void asinh(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
|
||||
|
||||
template <class T>
|
||||
void atan(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
|
||||
|
||||
template <class T>
|
||||
void atanh(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
|
||||
|
||||
template <class T>
|
||||
void cos(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
|
||||
|
||||
template <class T>
|
||||
void cosh(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
|
||||
|
||||
template <class T>
|
||||
void erf(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
|
||||
|
||||
template <class T>
|
||||
void hardswish(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
|
||||
|
||||
template <class T>
|
||||
void sin(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
|
||||
|
||||
template <class T>
|
||||
void sinh(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
|
||||
|
||||
template <class T>
|
||||
void softplus(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
|
||||
|
||||
template <class T>
|
||||
void softsign(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
|
||||
|
||||
template <class T>
|
||||
void tan(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
|
||||
|
||||
template <class T>
|
||||
void power(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, T exp, T scale, T shift);
|
||||
|
||||
|
@ -280,6 +280,216 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
||||
csl::Stream stream;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class AcosOp final : public BaseOp<AcosOp, T> {
|
||||
public:
|
||||
AcosOp(csl::Stream stream_) : stream(std::move(stream_)) { }
|
||||
|
||||
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
|
||||
{
|
||||
kernels::acos<T>(stream, output, input);
|
||||
}
|
||||
|
||||
private:
|
||||
csl::Stream stream;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class AcoshOp final : public BaseOp<AcoshOp, T> {
|
||||
public:
|
||||
AcoshOp(csl::Stream stream_) : stream(std::move(stream_)) { }
|
||||
|
||||
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
|
||||
{
|
||||
kernels::acosh<T>(stream, output, input);
|
||||
}
|
||||
|
||||
private:
|
||||
csl::Stream stream;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class AsinOp final : public BaseOp<AsinOp, T> {
|
||||
public:
|
||||
AsinOp(csl::Stream stream_) : stream(std::move(stream_)) { }
|
||||
|
||||
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
|
||||
{
|
||||
kernels::asin<T>(stream, output, input);
|
||||
}
|
||||
|
||||
private:
|
||||
csl::Stream stream;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class AsinhOp final : public BaseOp<AsinhOp, T> {
|
||||
public:
|
||||
AsinhOp(csl::Stream stream_) : stream(std::move(stream_)) { }
|
||||
|
||||
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
|
||||
{
|
||||
kernels::asinh<T>(stream, output, input);
|
||||
}
|
||||
|
||||
private:
|
||||
csl::Stream stream;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class AtanOp final : public BaseOp<AtanOp, T> {
|
||||
public:
|
||||
AtanOp(csl::Stream stream_) : stream(std::move(stream_)) { }
|
||||
|
||||
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
|
||||
{
|
||||
kernels::atan<T>(stream, output, input);
|
||||
}
|
||||
|
||||
private:
|
||||
csl::Stream stream;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class AtanhOp final : public BaseOp<AtanhOp, T> {
|
||||
public:
|
||||
AtanhOp(csl::Stream stream_) : stream(std::move(stream_)) { }
|
||||
|
||||
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
|
||||
{
|
||||
kernels::atanh<T>(stream, output, input);
|
||||
}
|
||||
|
||||
private:
|
||||
csl::Stream stream;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class CosOp final : public BaseOp<CosOp, T> {
|
||||
public:
|
||||
CosOp(csl::Stream stream_) : stream(std::move(stream_)) { }
|
||||
|
||||
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
|
||||
{
|
||||
kernels::cos<T>(stream, output, input);
|
||||
}
|
||||
|
||||
private:
|
||||
csl::Stream stream;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class CoshOp final : public BaseOp<CoshOp, T> {
|
||||
public:
|
||||
CoshOp(csl::Stream stream_) : stream(std::move(stream_)) { }
|
||||
|
||||
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
|
||||
{
|
||||
kernels::cosh<T>(stream, output, input);
|
||||
}
|
||||
|
||||
private:
|
||||
csl::Stream stream;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class ErfOp final : public BaseOp<ErfOp, T> {
|
||||
public:
|
||||
ErfOp(csl::Stream stream_) : stream(std::move(stream_)) { }
|
||||
|
||||
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
|
||||
{
|
||||
kernels::erf<T>(stream, output, input);
|
||||
}
|
||||
|
||||
private:
|
||||
csl::Stream stream;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class HardSwishOp final : public BaseOp<HardSwishOp, T> {
|
||||
public:
|
||||
HardSwishOp(csl::Stream stream_) : stream(std::move(stream_)) { }
|
||||
|
||||
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
|
||||
{
|
||||
kernels::hardswish<T>(stream, output, input);
|
||||
}
|
||||
|
||||
private:
|
||||
csl::Stream stream;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class SinOp final : public BaseOp<SinOp, T> {
|
||||
public:
|
||||
SinOp(csl::Stream stream_) : stream(std::move(stream_)) { }
|
||||
|
||||
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
|
||||
{
|
||||
kernels::sin<T>(stream, output, input);
|
||||
}
|
||||
|
||||
private:
|
||||
csl::Stream stream;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class SinhOp final : public BaseOp<SinhOp, T> {
|
||||
public:
|
||||
SinhOp(csl::Stream stream_) : stream(std::move(stream_)) { }
|
||||
|
||||
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
|
||||
{
|
||||
kernels::sinh<T>(stream, output, input);
|
||||
}
|
||||
|
||||
private:
|
||||
csl::Stream stream;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class SoftplusOp final : public BaseOp<SoftplusOp, T> {
|
||||
public:
|
||||
SoftplusOp(csl::Stream stream_) : stream(std::move(stream_)) { }
|
||||
|
||||
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
|
||||
{
|
||||
kernels::softplus<T>(stream, output, input);
|
||||
}
|
||||
|
||||
private:
|
||||
csl::Stream stream;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class SoftsignOp final : public BaseOp<SoftsignOp, T> {
|
||||
public:
|
||||
SoftsignOp(csl::Stream stream_) : stream(std::move(stream_)) { }
|
||||
|
||||
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
|
||||
{
|
||||
kernels::softsign<T>(stream, output, input);
|
||||
}
|
||||
|
||||
private:
|
||||
csl::Stream stream;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class TanOp final : public BaseOp<TanOp, T> {
|
||||
public:
|
||||
TanOp(csl::Stream stream_) : stream(std::move(stream_)) { }
|
||||
|
||||
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
|
||||
{
|
||||
kernels::tan<T>(stream, output, input);
|
||||
}
|
||||
|
||||
private:
|
||||
csl::Stream stream;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class PowerOp final : public BaseOp<PowerOp, T> {
|
||||
public:
|
||||
|
@ -117,6 +117,21 @@ void initializeLayerFactory()
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Round, RoundLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Sqrt, SqrtLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Not, NotLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Acos, AcosLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Acosh, AcoshLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Asin, AsinLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Asinh, AsinhLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Atan, AtanLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Atanh, AtanhLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Cos, CosLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Cosh, CoshLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Erf, ErfLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(HardSwish, HardSwishLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Sin, SinLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Sinh, SinhLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Softplus, SoftplusLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Softsign, SoftsignLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Tan, TanLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(BatchNorm, BatchNormLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(MaxUnpool, MaxUnpoolLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Dropout, BlankLayer);
|
||||
|
@ -76,8 +76,21 @@ using std::pow;
|
||||
using std::ceil;
|
||||
using std::floor;
|
||||
using std::log;
|
||||
using std::log1p;
|
||||
using std::sqrt;
|
||||
using std::round;
|
||||
using std::acos;
|
||||
using std::acosh;
|
||||
using std::asin;
|
||||
using std::asinh;
|
||||
using std::atan;
|
||||
using std::atanh;
|
||||
using std::cos;
|
||||
using std::cosh;
|
||||
using std::erf;
|
||||
using std::sin;
|
||||
using std::sinh;
|
||||
using std::tan;
|
||||
|
||||
template<typename Func>
|
||||
class ElementWiseLayer : public Func::Layer
|
||||
@ -746,6 +759,20 @@ struct BaseDefaultFunctor : public BaseFunctor
|
||||
}
|
||||
#endif // HAVE_VULKAN
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_HALIDE
|
||||
void attachHalide(const Halide::Expr& input, Halide::Func& top)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
}
|
||||
#endif // HAVE_HALIDE
|
||||
|
||||
private:
|
||||
static const char* const ocl_kernel_name;
|
||||
};
|
||||
@ -1390,6 +1417,411 @@ struct NotFunctor : public BaseDefaultFunctor<NotFunctor>
|
||||
template<>
|
||||
const char* const BaseDefaultFunctor<NotFunctor>::ocl_kernel_name = "NotForward";
|
||||
|
||||
struct AcosFunctor : public BaseDefaultFunctor<AcosFunctor>
|
||||
{
|
||||
typedef AcosLayer Layer;
|
||||
|
||||
bool supportBackend(int backendId, int)
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
|
||||
}
|
||||
|
||||
inline float calculate(float x) const
|
||||
{
|
||||
return acos(x);
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
|
||||
{
|
||||
return make_cuda_node<cuda4dnn::AcosOp>(target, stream);
|
||||
}
|
||||
#endif
|
||||
|
||||
int64 getFLOPSPerElement() const { return 1; }
|
||||
};
|
||||
|
||||
template<>
|
||||
const char* const BaseDefaultFunctor<AcosFunctor>::ocl_kernel_name = "AcosForward";
|
||||
|
||||
struct AcoshFunctor : public BaseDefaultFunctor<AcoshFunctor>
|
||||
{
|
||||
typedef AcoshLayer Layer;
|
||||
|
||||
bool supportBackend(int backendId, int)
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
|
||||
}
|
||||
|
||||
inline float calculate(float x) const
|
||||
{
|
||||
return acosh(x);
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
|
||||
{
|
||||
return make_cuda_node<cuda4dnn::AcoshOp>(target, stream);
|
||||
}
|
||||
#endif
|
||||
|
||||
int64 getFLOPSPerElement() const { return 1; }
|
||||
};
|
||||
|
||||
template<>
|
||||
const char* const BaseDefaultFunctor<AcoshFunctor>::ocl_kernel_name = "AcoshForward";
|
||||
|
||||
struct AsinFunctor : public BaseDefaultFunctor<AsinFunctor>
|
||||
{
|
||||
typedef AsinLayer Layer;
|
||||
|
||||
bool supportBackend(int backendId, int)
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
|
||||
}
|
||||
|
||||
inline float calculate(float x) const
|
||||
{
|
||||
return asin(x);
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
|
||||
{
|
||||
return make_cuda_node<cuda4dnn::AsinOp>(target, stream);
|
||||
}
|
||||
#endif
|
||||
|
||||
int64 getFLOPSPerElement() const { return 1; }
|
||||
};
|
||||
|
||||
template<>
|
||||
const char* const BaseDefaultFunctor<AsinFunctor>::ocl_kernel_name = "AsinForward";
|
||||
|
||||
struct AsinhFunctor : public BaseDefaultFunctor<AsinhFunctor>
|
||||
{
|
||||
typedef AsinhLayer Layer;
|
||||
|
||||
bool supportBackend(int backendId, int)
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
|
||||
}
|
||||
|
||||
inline float calculate(float x) const
|
||||
{
|
||||
return asinh(x);
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
|
||||
{
|
||||
return make_cuda_node<cuda4dnn::AsinhOp>(target, stream);
|
||||
}
|
||||
#endif
|
||||
|
||||
int64 getFLOPSPerElement() const { return 1; }
|
||||
};
|
||||
|
||||
template<>
|
||||
const char* const BaseDefaultFunctor<AsinhFunctor>::ocl_kernel_name = "AsinhForward";
|
||||
|
||||
struct AtanFunctor : public BaseDefaultFunctor<AtanFunctor>
|
||||
{
|
||||
typedef AtanLayer Layer;
|
||||
|
||||
bool supportBackend(int backendId, int)
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
|
||||
}
|
||||
|
||||
inline float calculate(float x) const
|
||||
{
|
||||
return atan(x);
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
|
||||
{
|
||||
return make_cuda_node<cuda4dnn::AtanOp>(target, stream);
|
||||
}
|
||||
#endif
|
||||
|
||||
int64 getFLOPSPerElement() const { return 1; }
|
||||
};
|
||||
|
||||
template<>
|
||||
const char* const BaseDefaultFunctor<AtanFunctor>::ocl_kernel_name = "AtanForward";
|
||||
|
||||
struct AtanhFunctor : public BaseDefaultFunctor<AtanhFunctor>
|
||||
{
|
||||
typedef AtanhLayer Layer;
|
||||
|
||||
bool supportBackend(int backendId, int)
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
|
||||
}
|
||||
|
||||
inline float calculate(float x) const
|
||||
{
|
||||
return atanh(x);
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
|
||||
{
|
||||
return make_cuda_node<cuda4dnn::AtanhOp>(target, stream);
|
||||
}
|
||||
#endif
|
||||
|
||||
int64 getFLOPSPerElement() const { return 1; }
|
||||
};
|
||||
|
||||
template<>
|
||||
const char* const BaseDefaultFunctor<AtanhFunctor>::ocl_kernel_name = "AtanhForward";
|
||||
|
||||
struct CosFunctor : public BaseDefaultFunctor<CosFunctor>
|
||||
{
|
||||
typedef CosLayer Layer;
|
||||
|
||||
bool supportBackend(int backendId, int)
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
|
||||
}
|
||||
|
||||
inline float calculate(float x) const
|
||||
{
|
||||
return cos(x);
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
|
||||
{
|
||||
return make_cuda_node<cuda4dnn::CosOp>(target, stream);
|
||||
}
|
||||
#endif
|
||||
|
||||
int64 getFLOPSPerElement() const { return 1; }
|
||||
};
|
||||
|
||||
template<>
|
||||
const char* const BaseDefaultFunctor<CosFunctor>::ocl_kernel_name = "CosForward";
|
||||
|
||||
struct CoshFunctor : public BaseDefaultFunctor<CoshFunctor>
|
||||
{
|
||||
typedef CoshLayer Layer;
|
||||
|
||||
bool supportBackend(int backendId, int)
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
|
||||
}
|
||||
|
||||
inline float calculate(float x) const
|
||||
{
|
||||
return cosh(x);
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
|
||||
{
|
||||
return make_cuda_node<cuda4dnn::CoshOp>(target, stream);
|
||||
}
|
||||
#endif
|
||||
|
||||
int64 getFLOPSPerElement() const { return 1; }
|
||||
};
|
||||
|
||||
template<>
|
||||
const char* const BaseDefaultFunctor<CoshFunctor>::ocl_kernel_name = "CoshForward";
|
||||
|
||||
struct ErfFunctor : public BaseDefaultFunctor<ErfFunctor>
|
||||
{
|
||||
typedef ErfLayer Layer;
|
||||
|
||||
bool supportBackend(int backendId, int)
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
|
||||
}
|
||||
|
||||
inline float calculate(float x) const
|
||||
{
|
||||
return erf(x);
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
|
||||
{
|
||||
return make_cuda_node<cuda4dnn::ErfOp>(target, stream);
|
||||
}
|
||||
#endif
|
||||
|
||||
int64 getFLOPSPerElement() const { return 1; }
|
||||
};
|
||||
|
||||
template<>
|
||||
const char* const BaseDefaultFunctor<ErfFunctor>::ocl_kernel_name = "ErfForward";
|
||||
|
||||
struct HardSwishFunctor : public BaseDefaultFunctor<HardSwishFunctor>
|
||||
{
|
||||
typedef HardSwishLayer Layer;
|
||||
|
||||
bool supportBackend(int backendId, int)
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
|
||||
}
|
||||
|
||||
inline float calculate(float x) const
|
||||
{
|
||||
return x * max(0.f, min(1.f, x / 6.f + 0.5f));
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
|
||||
{
|
||||
return make_cuda_node<cuda4dnn::HardSwishOp>(target, stream);
|
||||
}
|
||||
#endif
|
||||
|
||||
int64 getFLOPSPerElement() const { return 1; }
|
||||
};
|
||||
|
||||
template<>
|
||||
const char* const BaseDefaultFunctor<HardSwishFunctor>::ocl_kernel_name = "HardSwishForward";
|
||||
|
||||
struct SinFunctor : public BaseDefaultFunctor<SinFunctor>
|
||||
{
|
||||
typedef SinLayer Layer;
|
||||
|
||||
bool supportBackend(int backendId, int)
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
|
||||
}
|
||||
|
||||
inline float calculate(float x) const
|
||||
{
|
||||
return sin(x);
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
|
||||
{
|
||||
return make_cuda_node<cuda4dnn::SinOp>(target, stream);
|
||||
}
|
||||
#endif
|
||||
|
||||
int64 getFLOPSPerElement() const { return 1; }
|
||||
};
|
||||
|
||||
template<>
|
||||
const char* const BaseDefaultFunctor<SinFunctor>::ocl_kernel_name = "SinForward";
|
||||
|
||||
struct SinhFunctor : public BaseDefaultFunctor<SinhFunctor>
|
||||
{
|
||||
typedef SinhLayer Layer;
|
||||
|
||||
bool supportBackend(int backendId, int)
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
|
||||
}
|
||||
|
||||
inline float calculate(float x) const
|
||||
{
|
||||
return sinh(x);
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
|
||||
{
|
||||
return make_cuda_node<cuda4dnn::SinhOp>(target, stream);
|
||||
}
|
||||
#endif
|
||||
|
||||
int64 getFLOPSPerElement() const { return 1; }
|
||||
};
|
||||
|
||||
template<>
|
||||
const char* const BaseDefaultFunctor<SinhFunctor>::ocl_kernel_name = "SinhForward";
|
||||
|
||||
struct SoftplusFunctor : public BaseDefaultFunctor<SoftplusFunctor>
|
||||
{
|
||||
typedef SoftplusLayer Layer;
|
||||
|
||||
bool supportBackend(int backendId, int)
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
|
||||
}
|
||||
|
||||
inline float calculate(float x) const
|
||||
{
|
||||
return log1p(exp(x));
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
|
||||
{
|
||||
return make_cuda_node<cuda4dnn::SoftplusOp>(target, stream);
|
||||
}
|
||||
#endif
|
||||
|
||||
int64 getFLOPSPerElement() const { return 1; }
|
||||
};
|
||||
|
||||
template<>
|
||||
const char* const BaseDefaultFunctor<SoftplusFunctor>::ocl_kernel_name = "SoftplusForward";
|
||||
|
||||
struct SoftsignFunctor : public BaseDefaultFunctor<SoftsignFunctor>
|
||||
{
|
||||
typedef SoftsignLayer Layer;
|
||||
|
||||
bool supportBackend(int backendId, int)
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
|
||||
}
|
||||
|
||||
inline float calculate(float x) const
|
||||
{
|
||||
return x / (1.f + abs(x));
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
|
||||
{
|
||||
return make_cuda_node<cuda4dnn::SoftsignOp>(target, stream);
|
||||
}
|
||||
#endif
|
||||
|
||||
int64 getFLOPSPerElement() const { return 1; }
|
||||
};
|
||||
|
||||
template<>
|
||||
const char* const BaseDefaultFunctor<SoftsignFunctor>::ocl_kernel_name = "SoftsignForward";
|
||||
|
||||
struct TanFunctor : public BaseDefaultFunctor<TanFunctor>
|
||||
{
|
||||
typedef TanLayer Layer;
|
||||
|
||||
bool supportBackend(int backendId, int)
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
|
||||
}
|
||||
|
||||
inline float calculate(float x) const
|
||||
{
|
||||
return tan(x);
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
|
||||
{
|
||||
return make_cuda_node<cuda4dnn::TanOp>(target, stream);
|
||||
}
|
||||
#endif
|
||||
|
||||
int64 getFLOPSPerElement() const { return 1; }
|
||||
};
|
||||
|
||||
template<>
|
||||
const char* const BaseDefaultFunctor<TanFunctor>::ocl_kernel_name = "TanForward";
|
||||
|
||||
struct PowerFunctor : public BaseFunctor
|
||||
{
|
||||
typedef PowerLayer Layer;
|
||||
@ -1937,6 +2369,126 @@ Ptr<NotLayer> NotLayer::create(const LayerParams& params)
|
||||
return l;
|
||||
}
|
||||
|
||||
Ptr<AcosLayer> AcosLayer::create(const LayerParams& params)
|
||||
{
|
||||
Ptr<AcosLayer> l(new ElementWiseLayer<AcosFunctor>());
|
||||
l->setParamsFrom(params);
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
Ptr<AcoshLayer> AcoshLayer::create(const LayerParams& params)
|
||||
{
|
||||
Ptr<AcoshLayer> l(new ElementWiseLayer<AcoshFunctor>());
|
||||
l->setParamsFrom(params);
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
Ptr<AsinLayer> AsinLayer::create(const LayerParams& params)
|
||||
{
|
||||
Ptr<AsinLayer> l(new ElementWiseLayer<AsinFunctor>());
|
||||
l->setParamsFrom(params);
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
Ptr<AsinhLayer> AsinhLayer::create(const LayerParams& params)
|
||||
{
|
||||
Ptr<AsinhLayer> l(new ElementWiseLayer<AsinhFunctor>());
|
||||
l->setParamsFrom(params);
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
Ptr<AtanLayer> AtanLayer::create(const LayerParams& params)
|
||||
{
|
||||
Ptr<AtanLayer> l(new ElementWiseLayer<AtanFunctor>());
|
||||
l->setParamsFrom(params);
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
Ptr<AtanhLayer> AtanhLayer::create(const LayerParams& params)
|
||||
{
|
||||
Ptr<AtanhLayer> l(new ElementWiseLayer<AtanhFunctor>());
|
||||
l->setParamsFrom(params);
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
Ptr<CosLayer> CosLayer::create(const LayerParams& params)
|
||||
{
|
||||
Ptr<CosLayer> l(new ElementWiseLayer<CosFunctor>());
|
||||
l->setParamsFrom(params);
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
Ptr<CoshLayer> CoshLayer::create(const LayerParams& params)
|
||||
{
|
||||
Ptr<CoshLayer> l(new ElementWiseLayer<CoshFunctor>());
|
||||
l->setParamsFrom(params);
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
Ptr<ErfLayer> ErfLayer::create(const LayerParams& params)
|
||||
{
|
||||
Ptr<ErfLayer> l(new ElementWiseLayer<ErfFunctor>());
|
||||
l->setParamsFrom(params);
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
Ptr<HardSwishLayer> HardSwishLayer::create(const LayerParams& params)
|
||||
{
|
||||
Ptr<HardSwishLayer> l(new ElementWiseLayer<HardSwishFunctor>());
|
||||
l->setParamsFrom(params);
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
Ptr<SinLayer> SinLayer::create(const LayerParams& params)
|
||||
{
|
||||
Ptr<SinLayer> l(new ElementWiseLayer<SinFunctor>());
|
||||
l->setParamsFrom(params);
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
Ptr<SinhLayer> SinhLayer::create(const LayerParams& params)
|
||||
{
|
||||
Ptr<SinhLayer> l(new ElementWiseLayer<SinhFunctor>());
|
||||
l->setParamsFrom(params);
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
Ptr<SoftplusLayer> SoftplusLayer::create(const LayerParams& params)
|
||||
{
|
||||
Ptr<SoftplusLayer> l(new ElementWiseLayer<SoftplusFunctor>());
|
||||
l->setParamsFrom(params);
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
Ptr<SoftsignLayer> SoftsignLayer::create(const LayerParams& params)
|
||||
{
|
||||
Ptr<SoftsignLayer> l(new ElementWiseLayer<SoftsignFunctor>());
|
||||
l->setParamsFrom(params);
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
Ptr<TanLayer> TanLayer::create(const LayerParams& params)
|
||||
{
|
||||
Ptr<TanLayer> l(new ElementWiseLayer<TanFunctor>());
|
||||
l->setParamsFrom(params);
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
Ptr<PowerLayer> PowerLayer::create(const LayerParams& params)
|
||||
{
|
||||
float power = params.get<float>("power", 1.0f);
|
||||
|
@ -206,6 +206,42 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class HardSwishSubgraph : public Subgraph
|
||||
{
|
||||
public:
|
||||
HardSwishSubgraph()
|
||||
{
|
||||
int input = addNodeToMatch("");
|
||||
int hardSigmoid = addNodeToMatch("HardSigmoid", input);
|
||||
addNodeToMatch("Mul", input, hardSigmoid);
|
||||
setFusedNode("HardSwish", input);
|
||||
}
|
||||
|
||||
virtual bool match(const Ptr<ImportGraphWrapper>& net, int nodeId,
|
||||
std::vector<int>& matchedNodesIds,
|
||||
std::vector<int>& targetNodesIds) CV_OVERRIDE
|
||||
{
|
||||
if (Subgraph::match(net, nodeId, matchedNodesIds, targetNodesIds))
|
||||
{
|
||||
Ptr<ImportNodeWrapper> hardSigmoid = net->getNode(matchedNodesIds[0]);
|
||||
opencv_onnx::NodeProto* node = hardSigmoid.dynamicCast<ONNXNodeWrapper>()->node;
|
||||
|
||||
uint8_t matched = 0;
|
||||
for (int i = 0; i < node->attribute_size(); i++)
|
||||
{
|
||||
opencv_onnx::AttributeProto attr = node->attribute(i);
|
||||
if ((attr.name() == "alpha" && attr.f() == 1.f / 6.f) ||
|
||||
(attr.name() == "beta" && attr.f() == 0.5f))
|
||||
{
|
||||
++matched;
|
||||
}
|
||||
}
|
||||
return matched == 2;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
class NormalizeSubgraphBase : public Subgraph
|
||||
{
|
||||
public:
|
||||
@ -625,6 +661,7 @@ void simplifySubgraphs(opencv_onnx::GraphProto& net)
|
||||
subgraphs.push_back(makePtr<SoftMaxSubgraph>());
|
||||
subgraphs.push_back(makePtr<SoftMaxSubgraph2>());
|
||||
subgraphs.push_back(makePtr<LogSoftMaxSubgraph>());
|
||||
subgraphs.push_back(makePtr<HardSwishSubgraph>());
|
||||
subgraphs.push_back(makePtr<NormalizeSubgraph1>());
|
||||
subgraphs.push_back(makePtr<NormalizeSubgraph2>());
|
||||
subgraphs.push_back(makePtr<NormalizeSubgraph2_2>());
|
||||
|
@ -188,3 +188,87 @@ __kernel void NotForward(const int n, __global T* in, __global T* out) {
|
||||
if(index < n)
|
||||
out[index] = floor(1.0f - in[index]);
|
||||
}
|
||||
|
||||
__kernel void AcosForward(const int n, __global T* in, __global T* out) {
|
||||
int index = get_global_id(0);
|
||||
if(index < n)
|
||||
out[index] = acos(in[index]);
|
||||
}
|
||||
|
||||
__kernel void AcoshForward(const int n, __global T* in, __global T* out) {
|
||||
int index = get_global_id(0);
|
||||
if(index < n)
|
||||
out[index] = acosh(in[index]);
|
||||
}
|
||||
|
||||
__kernel void AsinForward(const int n, __global T* in, __global T* out) {
|
||||
int index = get_global_id(0);
|
||||
if(index < n)
|
||||
out[index] = asin(in[index]);
|
||||
}
|
||||
|
||||
__kernel void AsinhForward(const int n, __global T* in, __global T* out) {
|
||||
int index = get_global_id(0);
|
||||
if(index < n)
|
||||
out[index] = asinh(in[index]);
|
||||
}
|
||||
|
||||
__kernel void AtanForward(const int n, __global T* in, __global T* out) {
|
||||
int index = get_global_id(0);
|
||||
if(index < n)
|
||||
out[index] = atan(in[index]);
|
||||
}
|
||||
|
||||
__kernel void AtanhForward(const int n, __global T* in, __global T* out) {
|
||||
int index = get_global_id(0);
|
||||
if(index < n)
|
||||
out[index] = atanh(in[index]);
|
||||
}
|
||||
|
||||
__kernel void CosForward(const int n, __global T* in, __global T* out) {
|
||||
int index = get_global_id(0);
|
||||
if(index < n)
|
||||
out[index] = cos(in[index]);
|
||||
}
|
||||
|
||||
__kernel void CoshForward(const int n, __global T* in, __global T* out) {
|
||||
int index = get_global_id(0);
|
||||
if(index < n)
|
||||
out[index] = cosh(in[index]);
|
||||
}
|
||||
|
||||
__kernel void HardSwishForward(const int n, __global T* in, __global T* out) {
|
||||
int index = get_global_id(0);
|
||||
if(index < n)
|
||||
out[index] = in[index] * max(0.f, min(1.f, in[index] / 6.f + 0.5f));
|
||||
}
|
||||
|
||||
__kernel void SinForward(const int n, __global T* in, __global T* out) {
|
||||
int index = get_global_id(0);
|
||||
if(index < n)
|
||||
out[index] = sin(in[index]);
|
||||
}
|
||||
|
||||
__kernel void SinhForward(const int n, __global T* in, __global T* out) {
|
||||
int index = get_global_id(0);
|
||||
if(index < n)
|
||||
out[index] = sinh(in[index]);
|
||||
}
|
||||
|
||||
__kernel void SoftplusForward(const int n, __global T* in, __global T* out) {
|
||||
int index = get_global_id(0);
|
||||
if(index < n)
|
||||
out[index] = log1p(exp(in[index]));
|
||||
}
|
||||
|
||||
__kernel void SoftsignForward(const int n, __global T* in, __global T* out) {
|
||||
int index = get_global_id(0);
|
||||
if(index < n)
|
||||
out[index] = in[index] / (1.f + fabs(in[index]));
|
||||
}
|
||||
|
||||
__kernel void TanForward(const int n, __global T* in, __global T* out) {
|
||||
int index = get_global_id(0);
|
||||
if(index < n)
|
||||
out[index] = tan(in[index]);
|
||||
}
|
||||
|
@ -71,4 +71,5 @@
|
||||
"test_softmax_large_number_expanded", // FP16 only
|
||||
"test_sub_bcast",
|
||||
"test_sub_uint8",
|
||||
"test_tan", // FP16 only
|
||||
"test_upsample_nearest",
|
||||
|
@ -19,3 +19,4 @@
|
||||
"test_split_equal_parts_1d",
|
||||
"test_split_equal_parts_2d",
|
||||
"test_split_equal_parts_default_axis",
|
||||
"test_tan",
|
||||
|
@ -1,9 +1,5 @@
|
||||
// The file is autogenerated
|
||||
// Update note: execute <opencv_extra>/testdata/dnn/onnx/generate_conformance_list.py <gtest_xml_file>
|
||||
"test_acos",
|
||||
"test_acos_example",
|
||||
"test_acosh",
|
||||
"test_acosh_example",
|
||||
"test_adagrad",
|
||||
"test_adagrad_multiple",
|
||||
"test_adam",
|
||||
@ -16,14 +12,6 @@
|
||||
"test_and_bcast4v2d",
|
||||
"test_and_bcast4v3d",
|
||||
"test_and_bcast4v4d",
|
||||
"test_asin",
|
||||
"test_asin_example",
|
||||
"test_asinh",
|
||||
"test_asinh_example",
|
||||
"test_atan",
|
||||
"test_atan_example",
|
||||
"test_atanh",
|
||||
"test_atanh_example",
|
||||
"test_basic_convinteger",
|
||||
"test_batchnorm_epsilon",
|
||||
"test_batchnorm_epsilon_training_mode",
|
||||
@ -102,10 +90,6 @@
|
||||
"test_convtranspose_pad",
|
||||
"test_convtranspose_pads",
|
||||
"test_convtranspose_with_kernel",
|
||||
"test_cos",
|
||||
"test_cos_example",
|
||||
"test_cosh",
|
||||
"test_cosh_example",
|
||||
"test_cumsum_1d",
|
||||
"test_cumsum_1d_exclusive",
|
||||
"test_cumsum_1d_reverse",
|
||||
@ -138,7 +122,6 @@
|
||||
"test_einsum_transpose",
|
||||
"test_equal",
|
||||
"test_equal_bcast",
|
||||
"test_erf",
|
||||
"test_expand_dim_changed",
|
||||
"test_expand_dim_unchanged",
|
||||
"test_eyelike_populate_off_main_diagonal",
|
||||
@ -193,8 +176,6 @@
|
||||
"test_hardsigmoid",
|
||||
"test_hardsigmoid_default",
|
||||
"test_hardsigmoid_example",
|
||||
"test_hardswish",
|
||||
"test_hardswish_expanded",
|
||||
"test_identity_opt",
|
||||
"test_identity_sequence",
|
||||
"test_if",
|
||||
@ -564,10 +545,6 @@
|
||||
"test_simple_rnn_batchwise",
|
||||
"test_simple_rnn_defaults",
|
||||
"test_simple_rnn_with_initial_bias",
|
||||
"test_sin",
|
||||
"test_sin_example",
|
||||
"test_sinh",
|
||||
"test_sinh_example",
|
||||
"test_size",
|
||||
"test_size_example",
|
||||
"test_slice",
|
||||
@ -578,10 +555,6 @@
|
||||
"test_slice_neg_steps",
|
||||
"test_slice_negative_axes",
|
||||
"test_slice_start_out_of_bounds",
|
||||
"test_softplus",
|
||||
"test_softplus_example",
|
||||
"test_softsign",
|
||||
"test_softsign_example",
|
||||
"test_spacetodepth",
|
||||
"test_spacetodepth_example",
|
||||
"test_split_variable_parts_1d",
|
||||
@ -599,8 +572,6 @@
|
||||
"test_sub_example",
|
||||
"test_sum_example",
|
||||
"test_sum_two_inputs",
|
||||
"test_tan",
|
||||
"test_tan_example",
|
||||
"test_tfidfvectorizer_tf_batch_onlybigrams_skip0",
|
||||
"test_tfidfvectorizer_tf_batch_onlybigrams_skip5",
|
||||
"test_tfidfvectorizer_tf_batch_uniandbigrams_skip5",
|
||||
|
Loading…
Reference in New Issue
Block a user