Merge pull request #19545 from SamFC10:exp

This commit is contained in:
Alexander Alekhin 2021-02-20 22:47:35 +00:00
commit 0f35412dcd
12 changed files with 253 additions and 3 deletions

View File

@ -499,6 +499,14 @@ CV__DNN_INLINE_NS_BEGIN
static Ptr<PowerLayer> create(const LayerParams &params);
};
class CV_EXPORTS ExpLayer : public ActivationLayer
{
public:
float base, scale, shift;
static Ptr<ExpLayer> create(const LayerParams &params);
};
/* Layers used in semantic segmentation */
class CV_EXPORTS CropLayer : public Layer

View File

@ -145,6 +145,11 @@ void power(const Stream& stream, Span<T> output, View<T> input, T exp, T scale,
generic_op<T, PowerFunctor<T>>(stream, output, input, {exp, scale, shift});
}
template <class T>
void exp(const Stream& stream, Span<T> output, View<T> input, T normScale, T normShift) {
generic_op<T, ExpFunctor<T>>(stream, output, input, {normScale, normShift});
}
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
template void relu<__half>(const Stream&, Span<__half>, View<__half>, __half);
template void clipped_relu<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
@ -156,6 +161,7 @@ template void elu<__half>(const Stream&, Span<__half>, View<__half>);
template void abs<__half>(const Stream& stream, Span<__half> output, View<__half> input);
template void bnll<__half>(const Stream&, Span<__half>, View<__half>);
template void power<__half>(const Stream&, Span<__half>, View<__half>, __half, __half, __half);
template void exp<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
#endif
@ -169,6 +175,7 @@ template void elu<float>(const Stream&, Span<float>, View<float>);
template void abs<float>(const Stream& stream, Span<float> output, View<float> input);
template void bnll<float>(const Stream&, Span<float>, View<float>);
template void power<float>(const Stream&, Span<float>, View<float>, float, float, float);
template void exp<float>(const Stream&, Span<float>, View<float>, float, float);
template <class T, std::size_t N> static
void launch_vectorized_axiswise_relu(const Stream& stream, Span<T> output, View<T> input, std::size_t inner_size, View<T> slope) {

View File

@ -228,6 +228,25 @@ struct PowerFunctor {
T exp, scale, shift;
};
template <class T>
struct ExpFunctor {
struct Params {
CUDA4DNN_HOST_DEVICE Params() : normScale(1), normShift(0) { }
CUDA4DNN_HOST_DEVICE Params(T nScale_, T nShift_) : normScale(nScale_), normShift(nShift_) { }
T normScale, normShift;
};
CUDA4DNN_DEVICE ExpFunctor() : ExpFunctor(Params{}) { }
CUDA4DNN_DEVICE ExpFunctor(const Params& params) : normScale{params.normScale}, normShift{params.normShift} { }
CUDA4DNN_DEVICE T operator()(T value) {
using csl::device::fast_exp;
return fast_exp(normShift + normScale * value);
}
T normScale, normShift;
};
template <class T>
struct MaxFunctor {
struct Params {
@ -297,4 +316,4 @@ struct DivFunctor {
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
#endif /* OPENCV_DNN_SRC_CUDA_FUNCTORS_HPP */
#endif /* OPENCV_DNN_SRC_CUDA_FUNCTORS_HPP */

View File

@ -45,6 +45,9 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
template <class T>
void power(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, T exp, T scale, T shift);
template <class T>
void exp(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, T normScale, T normShift);
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
#endif /* OPENCV_DNN_SRC_CUDA4DNN_KERNELS_ACTIVATIONS_HPP */

View File

@ -341,6 +341,36 @@ namespace cv { namespace dnn { namespace cuda4dnn {
const T exp, scale, shift;
};
template <class T>
class ExpOp final : public CUDABackendNode {
public:
using wrapper_type = GetCUDABackendWrapperType<T>;
ExpOp(csl::Stream stream_, T nScale_, T nShift_)
: stream(std::move(stream_)), normScale{ nScale_ }, normShift{ nShift_ } { }
void forward(
const std::vector<cv::Ptr<BackendWrapper>>& inputs,
const std::vector<cv::Ptr<BackendWrapper>>& outputs,
csl::Workspace& workspace) override
{
for (int i = 0; i < inputs.size(); i++)
{
auto input_wrapper = inputs[i].dynamicCast<wrapper_type>();
auto input = input_wrapper->getView();
auto output_wrapper = outputs[i].dynamicCast<wrapper_type>();
auto output = output_wrapper->getSpan();
kernels::exp<T>(stream, output, input, normScale, normShift);
}
}
private:
csl::Stream stream;
const T normScale, normShift;
};
}}} /* namespace cv::dnn::cuda4dnn */
#endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_ACTIVATION_HPP */

View File

@ -110,6 +110,7 @@ void initializeLayerFactory()
CV_DNN_REGISTER_LAYER_CLASS(BNLL, BNLLLayer);
CV_DNN_REGISTER_LAYER_CLASS(AbsVal, AbsLayer);
CV_DNN_REGISTER_LAYER_CLASS(Power, PowerLayer);
CV_DNN_REGISTER_LAYER_CLASS(Exp, ExpLayer);
CV_DNN_REGISTER_LAYER_CLASS(BatchNorm, BatchNormLayer);
CV_DNN_REGISTER_LAYER_CLASS(MaxUnpool, MaxUnpoolLayer);
CV_DNN_REGISTER_LAYER_CLASS(Dropout, BlankLayer);

View File

@ -1400,6 +1400,125 @@ struct PowerFunctor : public BaseFunctor
int64 getFLOPSPerElement() const { return power == 1 ? 2 : 10; }
};
struct ExpFunctor : public BaseFunctor
{
typedef ExpLayer Layer;
float base, scale, shift;
float normScale, normShift;
ExpFunctor(float base_ = -1.f, float scale_ = 1.f, float shift_ = 0.f)
: base(base_), scale(scale_), shift(shift_)
{
CV_Check(base, base == -1.f || base > 0.f, "Unsupported 'base' value");
}
bool supportBackend(int backendId, int targetId)
{
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA ||
backendId == DNN_BACKEND_HALIDE || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
}
void finalize()
{
// For base > 0 :
// y = base^(scale * input + shift)
// ln(y) = ln(base)*(scale * input + shift)
// y = exp((ln(base)*scale) * input + (ln(base)*shift))
// y = exp(normalized_scale * input + normalized_shift)
float ln_base = (base == -1.f) ? 1.f : log(base);
normScale = scale * ln_base;
normShift = shift * ln_base;
}
void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
{
float a = normScale, b = normShift;
for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
{
for( int i = 0; i < len; i++ )
{
float x = srcptr[i];
dstptr[i] = exp(a*x + b);
}
}
}
#ifdef HAVE_OPENCL
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
String buildopt = oclGetTMacro(inputs[0]);
for (size_t i = 0; i < inputs.size(); i++)
{
UMat& src = inputs[i];
UMat& dst = outputs[i];
ocl::Kernel kernel("ExpForward", ocl::dnn::activations_oclsrc, buildopt);
kernel.set(0, (int)src.total());
kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
kernel.set(3, (float)normScale);
kernel.set(4, (float)normShift);
size_t gSize = src.total();
CV_Assert(kernel.run(1, &gSize, NULL, false));
}
return true;
}
#endif
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
{
return make_cuda_node<cuda4dnn::ExpOp>(target, stream, normScale, normShift);
}
#endif
#ifdef HAVE_HALIDE
void attachHalide(const Halide::Expr& input, Halide::Func& top)
{
Halide::Var x("x"), y("y"), c("c"), n("n");
top(x, y, c, n) = exp(normScale * input + normShift);
}
#endif // HAVE_HALIDE
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
{
CV_Error(Error::StsNotImplemented, "");
}
#endif // HAVE_DNN_IE_NN_BUILDER_2019
#ifdef HAVE_DNN_NGRAPH
std::shared_ptr<ngraph::Node> initNgraphAPI(const std::shared_ptr<ngraph::Node>& node)
{
auto scale_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
ngraph::Shape{1}, &normScale);
auto shift_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
ngraph::Shape{1}, &normShift);
auto mul = std::make_shared<ngraph::op::v1::Multiply>(scale_node, node, ngraph::op::AutoBroadcastType::NUMPY);
auto scale_shift = std::make_shared<ngraph::op::v1::Add>(mul, shift_node, ngraph::op::AutoBroadcastType::NUMPY);
return std::make_shared<ngraph::op::v0::Exp>(scale_shift);
}
#endif // HAVE_DNN_NGRAPH
#ifdef HAVE_VULKAN
std::shared_ptr<vkcom::OpBase> initVkCom()
{
// TODO: add vkcom implementation
return std::shared_ptr<vkcom::OpBase>();
}
#endif // HAVE_VULKAN
int64 getFLOPSPerElement() const { return 3; }
};
struct ChannelsPReLUFunctor : public BaseFunctor
{
typedef ChannelsPReLULayer Layer;
@ -1634,6 +1753,20 @@ Ptr<PowerLayer> PowerLayer::create(const LayerParams& params)
return l;
}
Ptr<ExpLayer> ExpLayer::create(const LayerParams& params)
{
float base = params.get<float>("base", -1.0f);
float scale = params.get<float>("scale", 1.0f);
float shift = params.get<float>("shift", 0.0f);
Ptr<ExpLayer> l(new ElementWiseLayer<ExpFunctor>(ExpFunctor(base, scale, shift)));
l->setParamsFrom(params);
l->base = base;
l->scale = scale;
l->shift = shift;
return l;
}
Ptr<Layer> ChannelsPReLULayer::create(const LayerParams& params)
{
CV_Assert(params.blobs.size() == 1);

View File

@ -140,3 +140,14 @@ __kernel void ELUForward(const int n, __global const T* in, __global T* out)
out[index] = (src >= 0.f) ? src : exp(src) - 1;
}
}
__kernel void ExpForward(const int n, __global const T* in, __global T* out,
const KERNEL_ARG_DTYPE normScale,
const KERNEL_ARG_DTYPE normShift)
{
int index = get_global_id(0);
if (index < n)
{
out[index] = exp(normShift + normScale * in[index]);
}
}

View File

@ -2425,7 +2425,7 @@ void TFImporter::parseNode(const tensorflow::NodeDef& layer_)
connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs);
}
else if (type == "Abs" || type == "Tanh" || type == "Sigmoid" ||
type == "Relu" || type == "Elu" ||
type == "Relu" || type == "Elu" || type == "Exp" ||
type == "Identity" || type == "Relu6")
{
CV_CheckGT(num_inputs, 0, "");

View File

@ -632,6 +632,31 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Power, Combine(
dnnBackendsAndTargetsWithHalide()
));
typedef TestWithParam<tuple<Vec3f, tuple<Backend, Target> > > Exp;
TEST_P(Exp, Accuracy)
{
float base = get<0>(GetParam())[0];
float scale = get<0>(GetParam())[1];
float shift = get<0>(GetParam())[2];
Backend backendId = get<0>(get<1>(GetParam()));
Target targetId = get<1>(get<1>(GetParam()));
LayerParams lp;
lp.set("base", base);
lp.set("scale", scale);
lp.set("shift", shift);
lp.type = "Exp";
lp.name = "testLayer";
testInPlaceActivation(lp, backendId, targetId);
}
INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Exp, Combine(
/*base, scale, shift*/ Values(Vec3f(0.9f, -1.0f, 1.1f), Vec3f(0.9f, 1.1f, -1.0f),
Vec3f(-1.0f, 0.9f, 1.1f), Vec3f(-1.0f, 1.1f, 0.9f),
Vec3f(1.1f, 0.9f, -1.0f), Vec3f(1.1f, -1.0f, 0.9f)),
dnnBackendsAndTargetsWithHalide()
));
TEST_P(Test_Halide_layers, ChannelsPReLU)
{
LayerParams lp;

View File

@ -2152,6 +2152,12 @@ public:
randu(scales, -1.0f, 1.0f);
activationParams.blobs.push_back(scales);
}
else if (activationParams.type == "Exp")
{
activationParams.set("base", -1.0f);
activationParams.set("scale", 0.3f);
activationParams.set("shift", 0.6f);
}
}
static void makeDefaultTestEltwiseLayer(LayerParams& eltwiseParams, const std::string& op, bool withCoefficients)
@ -2223,7 +2229,7 @@ public:
static testing::internal::ParamGenerator<std::string> activationLayersList()
{
// TODO: automate list generation
return Values("ReLU", "ReLU6", "ChannelsPReLU", "TanH", "Swish", "Mish", "Sigmoid", "ELU", "AbsVal", "BNLL", "Power");
return Values("ReLU", "ReLU6", "ChannelsPReLU", "TanH", "Swish", "Mish", "Sigmoid", "ELU", "AbsVal", "BNLL", "Power", "Exp");
}
static testing::internal::ParamGenerator<tuple<Backend, Target> > dnnBackendsAndTargetsForFusionTests()

View File

@ -329,6 +329,13 @@ TEST_P(Test_ONNX_layers, Power)
testONNXModels("pow2", npy, 0, 0, false, false);
}
TEST_P(Test_ONNX_layers, Exp)
{
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
testONNXModels("exp");
}
TEST_P(Test_ONNX_layers, Concatenation)
{
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)