mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00
Merge pull request #20682 from rogday:min
* Add Min layer to CPU, OpenCL, Halide, Inference Engine, NGraph and CUDA * fix indentation * add min to fusion and halide tests; fix doc
This commit is contained in:
parent
2558ab3de7
commit
38b9ec7a18
@ -610,7 +610,7 @@ CV__DNN_INLINE_NS_BEGIN
|
||||
/** @brief Element wise operation on inputs
|
||||
|
||||
Extra optional parameters:
|
||||
- "operation" as string. Values are "sum" (default), "prod", "max", "div"
|
||||
- "operation" as string. Values are "sum" (default), "prod", "max", "div", "min"
|
||||
- "coeff" as float array. Specify weights of inputs for SUM operation
|
||||
- "output_channels_mode" as string. Values are "same" (default, all input must have the same layout), "input_0", "input_0_truncate", "max_input_channels"
|
||||
*/
|
||||
|
@ -74,6 +74,11 @@ void eltwise_max_2(const Stream& stream, Span<T> output, View<T> x, View<T> y) {
|
||||
eltwise_op<T, MaxFunctor<T>>(stream, output, x, y);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void eltwise_min_2(const Stream& stream, Span<T> output, View<T> x, View<T> y) {
|
||||
eltwise_op<T, MinFunctor<T>>(stream, output, x, y);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void eltwise_sum_2(const Stream& stream, Span<T> output, View<T> x, View<T> y) {
|
||||
eltwise_op<T, SumFunctor<T>>(stream, output, x, y);
|
||||
@ -100,11 +105,13 @@ void eltwise_div_2(const Stream& stream, Span<T> output, View<T> x, View<T> y) {
|
||||
template void eltwise_sum_coeff_2(const Stream&, Span<__half>, __half, View<__half>, __half, View<__half>);
|
||||
template void eltwise_sum_2(const Stream& stream, Span<__half> output, View<__half> x, View<__half> y);
|
||||
template void eltwise_max_2(const Stream& stream, Span<__half> output, View<__half> x, View<__half> y);
|
||||
template void eltwise_min_2(const Stream& stream, Span<__half> output, View<__half> x, View<__half> y);
|
||||
#endif
|
||||
template void eltwise_div_2(const Stream& stream, Span<float> output, View<float> x, View<float> y);
|
||||
template void eltwise_prod_2(const Stream& stream, Span<float> output, View<float> x, View<float> y);
|
||||
template void eltwise_sum_coeff_2(const Stream&, Span<float>, float, View<float>, float, View<float>);
|
||||
template void eltwise_sum_2(const Stream& stream, Span<float> output, View<float> x, View<float> y);
|
||||
template void eltwise_max_2(const Stream& stream, Span<float> output, View<float> x, View<float> y);
|
||||
template void eltwise_min_2(const Stream& stream, Span<float> output, View<float> x, View<float> y);
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
|
@ -262,6 +262,21 @@ struct MaxFunctor {
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct MinFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE MinFunctor() { }
|
||||
CUDA4DNN_DEVICE MinFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T x, T y) {
|
||||
using csl::device::min;
|
||||
return min(x, y);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct SumFunctor {
|
||||
struct Params {
|
||||
|
@ -15,6 +15,9 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
template <class T>
|
||||
void eltwise_max_2(const csl::Stream& stream, csl::Span<T> output, csl::View<T> x, csl::View<T> y);
|
||||
|
||||
template <class T>
|
||||
void eltwise_min_2(const csl::Stream& stream, csl::Span<T> output, csl::View<T> x, csl::View<T> y);
|
||||
|
||||
template <class T>
|
||||
void eltwise_sum_2(const csl::Stream& stream, csl::Span<T> output, csl::View<T> x, csl::View<T> y);
|
||||
|
||||
|
@ -25,7 +25,8 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
||||
MAX,
|
||||
SUM,
|
||||
PRODUCT,
|
||||
DIV
|
||||
DIV,
|
||||
MIN,
|
||||
};
|
||||
|
||||
class EltwiseOpBase : public CUDABackendNode {
|
||||
@ -78,6 +79,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
||||
switch (op)
|
||||
{
|
||||
case EltwiseOpType::MAX: kernels::eltwise_max_2<T>(stream, output, input_x, input_y); break;
|
||||
case EltwiseOpType::MIN: kernels::eltwise_min_2<T>(stream, output, input_x, input_y); break;
|
||||
case EltwiseOpType::PRODUCT: kernels::eltwise_prod_2<T>(stream, output, input_x, input_y); break;
|
||||
case EltwiseOpType::DIV: kernels::eltwise_div_2<T>(stream, output, input_x, input_y); break;
|
||||
case EltwiseOpType::SUM:
|
||||
@ -104,6 +106,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
||||
switch (op)
|
||||
{
|
||||
case EltwiseOpType::MAX: kernels::eltwise_max_2<T>(stream, output, output, input); break;
|
||||
case EltwiseOpType::MIN: kernels::eltwise_min_2<T>(stream, output, output, input); break;
|
||||
case EltwiseOpType::PRODUCT: kernels::eltwise_prod_2<T>(stream, output, output, input); break;
|
||||
case EltwiseOpType::DIV: kernels::eltwise_div_2<T>(stream, output, output, input); break;
|
||||
case EltwiseOpType::SUM:
|
||||
|
@ -71,7 +71,8 @@ public:
|
||||
PROD = 0,
|
||||
SUM = 1,
|
||||
MAX = 2,
|
||||
DIV = 3
|
||||
DIV = 3,
|
||||
MIN = 4,
|
||||
} op;
|
||||
std::vector<float> coeffs;
|
||||
|
||||
@ -109,6 +110,8 @@ public:
|
||||
op = SUM;
|
||||
else if (operation == "max")
|
||||
op = MAX;
|
||||
else if (operation == "min")
|
||||
op = MIN;
|
||||
else if (operation == "div")
|
||||
op = DIV;
|
||||
else
|
||||
@ -470,6 +473,13 @@ public:
|
||||
dstptr[j] = std::max(srcptr0[j], srcptrI[j]);
|
||||
}
|
||||
}
|
||||
else if (op == MIN)
|
||||
{
|
||||
for (int j = 0; j < blockSize; j++)
|
||||
{
|
||||
dstptr[j] = std::min(srcptr0[j], srcptrI[j]);
|
||||
}
|
||||
}
|
||||
else if (op == SUM)
|
||||
{
|
||||
if (!coeffsptr || (coeffsptr[0] == 1.0f && coeffsptr[1] == 1.0f))
|
||||
@ -524,6 +534,13 @@ public:
|
||||
dstptr[j] = std::max(dstptr[j], srcptrI[j]);
|
||||
}
|
||||
}
|
||||
else if (op == MIN)
|
||||
{
|
||||
for (int j = 0; j < blockSize; j++)
|
||||
{
|
||||
dstptr[j] = std::min(dstptr[j], srcptrI[j]);
|
||||
}
|
||||
}
|
||||
else if (op == SUM)
|
||||
{
|
||||
if (!coeffsptr || coeffsptr[inputIdx] == 1.0f)
|
||||
@ -641,6 +658,11 @@ public:
|
||||
for (int i = 2; i < inputs.size(); ++i)
|
||||
max(inputs[i], outputs[0], outputs[0]);
|
||||
break;
|
||||
case MIN:
|
||||
min(inputs[0], inputs[1], outputs[0]);
|
||||
for (int i = 2; i < inputs.size(); ++i)
|
||||
min(inputs[i], outputs[0], outputs[0]);
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
@ -745,6 +767,7 @@ public:
|
||||
auto op_ = [this] {
|
||||
switch (op) {
|
||||
case MAX: return cuda4dnn::EltwiseOpType::MAX;
|
||||
case MIN: return cuda4dnn::EltwiseOpType::MIN;
|
||||
case SUM: return cuda4dnn::EltwiseOpType::SUM;
|
||||
case PROD: return cuda4dnn::EltwiseOpType::PRODUCT;
|
||||
case DIV: return cuda4dnn::EltwiseOpType::DIV;
|
||||
@ -799,6 +822,12 @@ public:
|
||||
for (int i = 2; i < inputBuffers.size(); ++i)
|
||||
topExpr = max(topExpr, inputBuffers[i](x, y, c, n));
|
||||
break;
|
||||
case MIN:
|
||||
topExpr = min(inputBuffers[0](x, y, c, n),
|
||||
inputBuffers[1](x, y, c, n));
|
||||
for (int i = 2; i < inputBuffers.size(); ++i)
|
||||
topExpr = min(topExpr, inputBuffers[i](x, y, c, n));
|
||||
break;
|
||||
default:
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
@ -823,6 +852,8 @@ public:
|
||||
ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::DIV);
|
||||
else if (op == MAX)
|
||||
ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::MAX);
|
||||
else if (op == MIN)
|
||||
ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::MIN);
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation");
|
||||
|
||||
@ -857,6 +888,7 @@ public:
|
||||
case PROD: curr_node = std::make_shared<ngraph::op::v1::Multiply>(curr_node, next_node); break;
|
||||
case DIV: curr_node = std::make_shared<ngraph::op::v1::Divide>(curr_node, next_node); break;
|
||||
case MAX: curr_node = std::make_shared<ngraph::op::v1::Maximum>(curr_node, next_node); break;
|
||||
case MIN: curr_node = std::make_shared<ngraph::op::v1::Minimum>(curr_node, next_node); break;
|
||||
default: CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation");
|
||||
}
|
||||
}
|
||||
|
@ -105,7 +105,7 @@ private:
|
||||
void parseSplit (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
||||
void parseBias (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
||||
void parsePow (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
||||
void parseMax (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
||||
void parseMinMax (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
||||
void parseNeg (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
||||
void parseConstant (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
||||
void parseLSTM (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
||||
@ -1105,10 +1105,12 @@ void ONNXImporter::parsePow(LayerParams& layerParams, const opencv_onnx::NodePro
|
||||
addLayer(layerParams, node_proto);
|
||||
}
|
||||
|
||||
void ONNXImporter::parseMax(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
||||
// "Min" "Max"
|
||||
void ONNXImporter::parseMinMax(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
||||
{
|
||||
const std::string& layer_type = node_proto.op_type();
|
||||
layerParams.type = "Eltwise";
|
||||
layerParams.set("operation", "max");
|
||||
layerParams.set("operation", layer_type == "Max" ? "max" : "min");
|
||||
addLayer(layerParams, node_proto);
|
||||
}
|
||||
|
||||
@ -2421,7 +2423,7 @@ const ONNXImporter::DispatchMap ONNXImporter::buildDispatchMap()
|
||||
dispatch["Split"] = &ONNXImporter::parseSplit;
|
||||
dispatch["Add"] = dispatch["Sum"] = dispatch["Sub"] = &ONNXImporter::parseBias;
|
||||
dispatch["Pow"] = &ONNXImporter::parsePow;
|
||||
dispatch["Max"] = &ONNXImporter::parseMax;
|
||||
dispatch["Min"] = dispatch["Max"] = &ONNXImporter::parseMinMax;
|
||||
dispatch["Neg"] = &ONNXImporter::parseNeg;
|
||||
dispatch["Constant"] = &ONNXImporter::parseConstant;
|
||||
dispatch["LSTM"] = &ONNXImporter::parseLSTM;
|
||||
|
@ -893,7 +893,7 @@ TEST_P(Eltwise, Accuracy)
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Eltwise, Combine(
|
||||
/*input size*/ Values(Vec3i(1, 4, 5), Vec3i(2, 8, 6)),
|
||||
/*operation*/ Values("prod", "sum", "div", "max"),
|
||||
/*operation*/ Values("prod", "sum", "div", "max", "min"),
|
||||
/*num convs*/ Values(1, 2, 3),
|
||||
/*weighted(for sum only)*/ Bool(),
|
||||
dnnBackendsAndTargetsWithHalide()
|
||||
|
@ -2340,7 +2340,7 @@ public:
|
||||
static testing::internal::ParamGenerator<std::string> eltwiseOpList()
|
||||
{
|
||||
// TODO: automate list generation
|
||||
return Values("sum", "max", "prod", "div");
|
||||
return Values("sum", "max", "min", "prod", "div");
|
||||
}
|
||||
|
||||
static testing::internal::ParamGenerator<std::string> activationLayersList()
|
||||
|
@ -301,6 +301,11 @@ TEST_P(Test_ONNX_layers, ReduceMax)
|
||||
testONNXModels("reduce_max_axis_1");
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, Min)
|
||||
{
|
||||
testONNXModels("min", npy, 0, 0, false, true, 2);
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, Scale)
|
||||
{
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
||||
|
Loading…
Reference in New Issue
Block a user