mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 09:25:45 +08:00
Merge pull request #15808 from thebhatman:Mish_swish
* Added Swish and Mish activations * Fixed whitespace errors * Kernel implementation done * Added function for launching kernel * Changed type of 1.0 * Attempt to add test for Swish and Mish * Resolving type mismatch for log * exp from device * Use log1pexp instead of adding 1 * Added openCL kernels
This commit is contained in:
parent
01a28db949
commit
78c5e41c23
@ -462,6 +462,18 @@ CV__DNN_INLINE_NS_BEGIN
|
||||
static Ptr<TanHLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS SwishLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
static Ptr<SwishLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS MishLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
static Ptr<MishLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS SigmoidLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
|
@ -62,6 +62,43 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, std::size_t N>
|
||||
__global__ void swish_vec(Span<T> output, View<T> input) {
|
||||
using vector_type = get_vector_type_t<T, N>;
|
||||
|
||||
auto output_vPtr = vector_type::get_pointer(output.data());
|
||||
auto input_vPtr = vector_type::get_pointer(input.data());
|
||||
|
||||
for (auto i : grid_stride_range(output.size() / vector_type::size())) {
|
||||
vector_type vec;
|
||||
v_load(vec, input_vPtr[i]);
|
||||
for (int j = 0; j < vector_type::size(); j++) {
|
||||
using device::sigmoid;
|
||||
vec.data[j] = vec.data[j] * sigmoid(vec.data[j]);
|
||||
}
|
||||
v_store(output_vPtr[i], vec);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, std::size_t N>
|
||||
__global__ void mish_vec(Span<T> output, View<T> input) {
|
||||
using vector_type = get_vector_type_t<T, N>;
|
||||
|
||||
auto output_vPtr = vector_type::get_pointer(output.data());
|
||||
auto input_vPtr = vector_type::get_pointer(input.data());
|
||||
|
||||
for (auto i : grid_stride_range(output.size() / vector_type::size())) {
|
||||
vector_type vec;
|
||||
v_load(vec, input_vPtr[i]);
|
||||
for (int j = 0; j < vector_type::size(); j++) {
|
||||
using device::tanh;
|
||||
using device::log1pexp;
|
||||
vec.data[j] = vec.data[j] * tanh(log1pexp(vec.data[j]));
|
||||
}
|
||||
v_store(output_vPtr[i], vec);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, std::size_t N>
|
||||
__global__ void sigmoid_vec(Span<T> output, View<T> input) {
|
||||
using vector_type = get_vector_type_t<T, N>;
|
||||
@ -240,6 +277,58 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
template void tanh<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void tanh<float>(const Stream&, Span<float>, View<float>);
|
||||
|
||||
template <class T, std::size_t N>
|
||||
void launch_vectorized_swish(const Stream& stream, Span<T> output, View<T> input) {
|
||||
CV_Assert(is_fully_aligned<T>(output, N));
|
||||
CV_Assert(is_fully_aligned<T>(input, N));
|
||||
|
||||
auto kernel = raw::swish_vec<T, N>;
|
||||
auto policy = make_policy(kernel, output.size() / N, 0, stream);
|
||||
launch_kernel(kernel, policy, output, input);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void swish(const Stream& stream, Span<T> output, View<T> input) {
|
||||
CV_Assert(input.size() == output.size());
|
||||
|
||||
if (is_fully_aligned<T>(output, 4) && is_fully_aligned<T>(input, 4)) {
|
||||
launch_vectorized_swish<T, 4>(stream, output, input);
|
||||
} else if (is_fully_aligned<T>(output, 2) && is_fully_aligned<T>(input, 2)) {
|
||||
launch_vectorized_swish<T, 2>(stream, output, input);
|
||||
} else {
|
||||
launch_vectorized_swish<T, 1>(stream, output, input);
|
||||
}
|
||||
}
|
||||
|
||||
template void swish<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void swish<float>(const Stream&, Span<float>, View<float>);
|
||||
|
||||
template <class T, std::size_t N>
|
||||
void launch_vectorized_mish(const Stream& stream, Span<T> output, View<T> input) {
|
||||
CV_Assert(is_fully_aligned<T>(output, N));
|
||||
CV_Assert(is_fully_aligned<T>(input, N));
|
||||
|
||||
auto kernel = raw::mish_vec<T, N>;
|
||||
auto policy = make_policy(kernel, output.size() / N, 0, stream);
|
||||
launch_kernel(kernel, policy, output, input);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void mish(const Stream& stream, Span<T> output, View<T> input) {
|
||||
CV_Assert(input.size() == output.size());
|
||||
|
||||
if (is_fully_aligned<T>(output, 4) && is_fully_aligned<T>(input, 4)) {
|
||||
launch_vectorized_mish<T, 4>(stream, output, input);
|
||||
} else if (is_fully_aligned<T>(output, 2) && is_fully_aligned<T>(input, 2)) {
|
||||
launch_vectorized_mish<T, 2>(stream, output, input);
|
||||
} else {
|
||||
launch_vectorized_mish<T, 1>(stream, output, input);
|
||||
}
|
||||
}
|
||||
|
||||
template void mish<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void mish<float>(const Stream&, Span<float>, View<float>);
|
||||
|
||||
template <class T, std::size_t N>
|
||||
void launch_vectorized_sigmoid(const Stream& stream, Span<T> output, View<T> input) {
|
||||
CV_Assert(is_fully_aligned<T>(output, N));
|
||||
|
@ -18,6 +18,12 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
template <class T>
|
||||
void tanh(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
|
||||
|
||||
template <class T>
|
||||
void swish(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
|
||||
|
||||
template <class T>
|
||||
void mish(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
|
||||
|
||||
template <class T>
|
||||
void sigmoid(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
|
||||
|
||||
|
@ -143,6 +143,62 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
||||
csl::Stream stream;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class SwishOp final : public CUDABackendNode {
|
||||
public:
|
||||
using wrapper_type = GetCUDABackendWrapperType<T>;
|
||||
|
||||
SwishOp(csl::Stream stream_) : stream(std::move(stream_)) { }
|
||||
|
||||
void forward(
|
||||
const std::vector<cv::Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<cv::Ptr<BackendWrapper>>& outputs,
|
||||
csl::Workspace& workspace) override
|
||||
{
|
||||
for (int i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
auto input_wrapper = inputs[i].dynamicCast<wrapper_type>();
|
||||
auto input = input_wrapper->getView();
|
||||
|
||||
auto output_wrapper = outputs[i].dynamicCast<wrapper_type>();
|
||||
auto output = output_wrapper->getSpan();
|
||||
|
||||
kernels::swish<T>(stream, output, input);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
csl::Stream stream;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class MishOp final : public CUDABackendNode {
|
||||
public:
|
||||
using wrapper_type = GetCUDABackendWrapperType<T>;
|
||||
|
||||
MishOp(csl::Stream stream_) : stream(std::move(stream_)) { }
|
||||
|
||||
void forward(
|
||||
const std::vector<cv::Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<cv::Ptr<BackendWrapper>>& outputs,
|
||||
csl::Workspace& workspace) override
|
||||
{
|
||||
for (int i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
auto input_wrapper = inputs[i].dynamicCast<wrapper_type>();
|
||||
auto input = input_wrapper->getView();
|
||||
|
||||
auto output_wrapper = outputs[i].dynamicCast<wrapper_type>();
|
||||
auto output = output_wrapper->getSpan();
|
||||
|
||||
kernels::mish<T>(stream, output, input);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
csl::Stream stream;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class SigmoidOp final : public CUDABackendNode {
|
||||
public:
|
||||
|
@ -103,6 +103,8 @@ void initializeLayerFactory()
|
||||
CV_DNN_REGISTER_LAYER_CLASS(PReLU, ChannelsPReLULayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Sigmoid, SigmoidLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(TanH, TanHLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Swish, SwishLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(Mish, MishLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(ELU, ELULayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(BNLL, BNLLLayer);
|
||||
CV_DNN_REGISTER_LAYER_CLASS(AbsVal, AbsLayer);
|
||||
|
@ -613,6 +613,184 @@ struct TanHFunctor
|
||||
int64 getFLOPSPerElement() const { return 1; }
|
||||
};
|
||||
|
||||
struct SwishFunctor
|
||||
{
|
||||
typedef SwishLayer Layer;
|
||||
|
||||
bool supportBackend(int backendId, int)
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_CUDA ||
|
||||
backendId == DNN_BACKEND_HALIDE;
|
||||
}
|
||||
|
||||
void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
|
||||
{
|
||||
for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
|
||||
{
|
||||
for( int i = 0; i < len; i++ )
|
||||
{
|
||||
float x = srcptr[i];
|
||||
dstptr[i] = x / (1.0f + exp(-x));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
inps.getUMatVector(inputs);
|
||||
outs.getUMatVector(outputs);
|
||||
String buildopt = oclGetTMacro(inputs[0]);
|
||||
|
||||
for (size_t i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
UMat& src = inputs[i];
|
||||
UMat& dst = outputs[i];
|
||||
|
||||
ocl::Kernel kernel("SwishForward", ocl::dnn::activations_oclsrc, buildopt);
|
||||
kernel.set(0, (int)src.total());
|
||||
kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
|
||||
kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
|
||||
|
||||
size_t gSize = src.total();
|
||||
CV_Assert(kernel.run(1, &gSize, NULL, false));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
|
||||
{
|
||||
return make_cuda_node<cuda4dnn::SwishOp>(target, stream);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_HALIDE
|
||||
void attachHalide(const Halide::Expr& input, Halide::Func& top)
|
||||
{
|
||||
Halide::Var x("x"), y("y"), c("c"), n("n");
|
||||
top(x, y, c, n) = input / (1.0f + exp(-input));
|
||||
}
|
||||
#endif // HAVE_HALIDE
|
||||
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
}
|
||||
#endif // HAVE_INF_ENGINE
|
||||
|
||||
#ifdef HAVE_VULKAN
|
||||
std::shared_ptr<vkcom::OpBase> initVkCom()
|
||||
{
|
||||
// TODO: add vkcom implementation
|
||||
return std::shared_ptr<vkcom::OpBase>();
|
||||
}
|
||||
#endif // HAVE_VULKAN
|
||||
|
||||
bool tryFuse(Ptr<dnn::Layer>&) { return false; }
|
||||
|
||||
void getScaleShift(Mat&, Mat&) const {}
|
||||
|
||||
int64 getFLOPSPerElement() const { return 3; }
|
||||
|
||||
};
|
||||
|
||||
struct MishFunctor
|
||||
{
|
||||
typedef MishLayer Layer;
|
||||
|
||||
bool supportBackend(int backendId, int)
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_CUDA ||
|
||||
backendId == DNN_BACKEND_HALIDE;
|
||||
}
|
||||
|
||||
void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
|
||||
{
|
||||
for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
|
||||
{
|
||||
for( int i = 0; i < len; i++ )
|
||||
{
|
||||
float x = srcptr[i];
|
||||
dstptr[i] = x * tanh(log(1.0f + exp(x)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
inps.getUMatVector(inputs);
|
||||
outs.getUMatVector(outputs);
|
||||
String buildopt = oclGetTMacro(inputs[0]);
|
||||
|
||||
for (size_t i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
UMat& src = inputs[i];
|
||||
UMat& dst = outputs[i];
|
||||
|
||||
ocl::Kernel kernel("MishForward", ocl::dnn::activations_oclsrc, buildopt);
|
||||
kernel.set(0, (int)src.total());
|
||||
kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
|
||||
kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
|
||||
|
||||
size_t gSize = src.total();
|
||||
CV_Assert(kernel.run(1, &gSize, NULL, false));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
|
||||
{
|
||||
return make_cuda_node<cuda4dnn::MishOp>(target, stream);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_HALIDE
|
||||
void attachHalide(const Halide::Expr& input, Halide::Func& top)
|
||||
{
|
||||
Halide::Var x("x"), y("y"), c("c"), n("n");
|
||||
top(x, y, c, n) = input * tanh(log(1.0f + exp(input)));
|
||||
}
|
||||
#endif // HAVE_HALIDE
|
||||
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
}
|
||||
#endif // HAVE_INF_ENGINE
|
||||
|
||||
#ifdef HAVE_VULKAN
|
||||
std::shared_ptr<vkcom::OpBase> initVkCom()
|
||||
{
|
||||
// TODO: add vkcom implementation
|
||||
return std::shared_ptr<vkcom::OpBase>();
|
||||
}
|
||||
#endif // HAVE_VULKAN
|
||||
|
||||
bool tryFuse(Ptr<dnn::Layer>&) { return false; }
|
||||
|
||||
void getScaleShift(Mat&, Mat&) const {}
|
||||
|
||||
int64 getFLOPSPerElement() const { return 3; }
|
||||
|
||||
};
|
||||
|
||||
struct SigmoidFunctor
|
||||
{
|
||||
typedef SigmoidLayer Layer;
|
||||
@ -1292,6 +1470,22 @@ Ptr<TanHLayer> TanHLayer::create(const LayerParams& params)
|
||||
return l;
|
||||
}
|
||||
|
||||
Ptr<SwishLayer> SwishLayer::create(const LayerParams& params)
|
||||
{
|
||||
Ptr<SwishLayer> l(new ElementWiseLayer<SwishFunctor>());
|
||||
l->setParamsFrom(params);
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
Ptr<MishLayer> MishLayer::create(const LayerParams& params)
|
||||
{
|
||||
Ptr<MishLayer> l(new ElementWiseLayer<MishFunctor>());
|
||||
l->setParamsFrom(params);
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
Ptr<SigmoidLayer> SigmoidLayer::create(const LayerParams& params)
|
||||
{
|
||||
Ptr<SigmoidLayer> l(new ElementWiseLayer<SigmoidFunctor>());
|
||||
|
@ -95,6 +95,18 @@ __kernel void SigmoidForward(const int count, __global const T* in, __global T*
|
||||
out[index] = 1.0f / (1.0f + exp(-in[index]));
|
||||
}
|
||||
|
||||
__kernel void SwishForward(const int count, __global const T* in, __global T* out) {
|
||||
int index = get_global_id(0);
|
||||
if(index < count)
|
||||
out[index] = in[index] / (1.0f + exp(-in[index]));
|
||||
}
|
||||
|
||||
__kernel void MishForward(const int count, __global const T* in, __global T* out) {
|
||||
int index = get_global_id(0);
|
||||
if(index < count)
|
||||
out[index] = in[index] * tanh(log(1.0f + exp(in[index])));
|
||||
}
|
||||
|
||||
__kernel void BNLLForward(const int n, __global const T* in, __global T* out) {
|
||||
int index = get_global_id(0);
|
||||
if (index < n) {
|
||||
|
@ -583,7 +583,7 @@ TEST_P(NoParamActivation, Accuracy)
|
||||
testInPlaceActivation(lp, backendId, targetId);
|
||||
}
|
||||
INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, NoParamActivation, Combine(
|
||||
/*type*/ Values("TanH", "Sigmoid", "AbsVal", "BNLL"),
|
||||
/*type*/ Values("TanH", "Sigmoid", "AbsVal", "BNLL", "Swish", "Mish"),
|
||||
dnnBackendsAndTargetsWithHalide()
|
||||
));
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user