diff --git a/modules/dnn/src/cuda4dnn/primitives/layer_norm.hpp b/modules/dnn/src/cuda4dnn/primitives/layer_norm.hpp index 7f4658a50a..baf7691c46 100644 --- a/modules/dnn/src/cuda4dnn/primitives/layer_norm.hpp +++ b/modules/dnn/src/cuda4dnn/primitives/layer_norm.hpp @@ -28,11 +28,20 @@ namespace cv { namespace dnn { namespace cuda4dnn { public: using wrapper_type = GetCUDABackendWrapperType; - LayerNormOp(csl::Stream stream_, int normalized_axis, float epsilon_, size_t loops) + LayerNormOp(csl::Stream stream_, const Mat &scale, const Mat &bias, int normalized_axis, float epsilon_, size_t loops) : stream(std::move(stream_)), epsilon(epsilon_) { CV_CheckGE(normalized_axis, 0, "LayerNorm/CUDA: axis needs to be normalized"); axis = static_cast(normalized_axis); + if (!scale.empty()) { + input_scale_tensor = csl::makeTensorHeader(scale); + csl::copyMatToTensor(scale, input_scale_tensor, stream); + } + if (!bias.empty()) { + input_bias_tensor = csl::makeTensorHeader(bias); + csl::copyMatToTensor(bias, input_bias_tensor, stream); + } + csl::WorkspaceBuilder builder; builder.require(loops); builder.require(loops); @@ -43,10 +52,25 @@ namespace cv { namespace dnn { namespace cuda4dnn { const std::vector>& outputs, csl::Workspace& workspace) override { auto input_wrapper = inputs[0].dynamicCast(); - auto scale_wrapper = inputs[1].dynamicCast(); - auto input = input_wrapper->getView(); - auto scale = scale_wrapper->getView(); + + csl::TensorView scale; + if (input_scale_tensor.empty()) { + auto scale_wrapper = inputs[1].dynamicCast(); + scale = scale_wrapper->getView(); + } else { + scale = csl::TensorView(input_scale_tensor); + } + + csl::TensorView bias; + if (input_bias_tensor.empty()) { + if (inputs.size() >= 3) { + auto bias_wrapper = inputs[2].dynamicCast(); + bias = bias_wrapper->getView(); + } + } else { + bias = csl::TensorView(input_bias_tensor); + } auto output_wrapper = outputs[0].dynamicCast(); auto output = output_wrapper->getSpan(); @@ -67,9 +91,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { kernels::reduce_mean_sqr_sum(stream, mean, inv_stddev, input, norm_size); kernels::compute_normalization_scale(stream, inv_stddev, mean, inv_stddev, norm_size, epsilon); - if (inputs.size() == 3) { - auto bias_wrapper = inputs[2].dynamicCast(); - auto bias = bias_wrapper->getView(); + if (!bias.empty()) { kernels::normalize_mean_variance_layernorm(stream, output, input, scale, bias, mean, inv_stddev, norm_size); } else { kernels::normalize_mean_variance_layernorm(stream, output, input, scale, mean, inv_stddev, norm_size); @@ -81,6 +103,8 @@ namespace cv { namespace dnn { namespace cuda4dnn { private: csl::Stream stream; + csl::Tensor input_scale_tensor; + csl::Tensor input_bias_tensor; float epsilon; size_t axis; diff --git a/modules/dnn/src/layers/attention_layer.cpp b/modules/dnn/src/layers/attention_layer.cpp index 559480d599..2bda1f3b18 100644 --- a/modules/dnn/src/layers/attention_layer.cpp +++ b/modules/dnn/src/layers/attention_layer.cpp @@ -63,10 +63,11 @@ class AttentionLayerImpl CV_FINAL : public AttentionLayer { const int requiredOutputs, std::vector &outputs, std::vector &internals) const CV_OVERRIDE { - CV_CheckEQ(inputs.size(), static_cast(3), "DNN/Attention: three inputs are required"); + int num_inputs = inputs.size() + blobs.size(); + CV_CheckEQ(num_inputs, 3, "DNN/Attention: three inputs are required"); const auto &input_shape = inputs[0]; - const auto &weight_shape = inputs[1]; - const auto &bias_shape = inputs[2]; + const auto &weight_shape = blobs.empty() ? inputs[1] : shape(blobs.front()); + const auto &bias_shape = blobs.empty() ? inputs[2] : shape(blobs.back()); CV_CheckEQ(input_shape.size(), static_cast(3), "DNN/Attention: invalid input dimension"); CV_CheckEQ(weight_shape.size(), static_cast(2), "DNN/Attention: invalid weight dimension"); @@ -109,10 +110,20 @@ class AttentionLayerImpl CV_FINAL : public AttentionLayer { seq_len = static_cast(input_shape[1]); input_hidden_size = static_cast(input_shape[2]); - const auto weight_shape = shape(inputs[1]); + const auto &weight = blobs.empty() ? inputs[1] : blobs.front(); + const auto weight_shape = shape(weight); hidden_size = weight_shape[1]; qkv_hidden_sizes[2] = hidden_size - qkv_hidden_sizes[0] - qkv_hidden_sizes[1]; qkv_head_sizes[2] = static_cast(qkv_hidden_sizes[2] / num_heads); + + if (!blobs.empty()) { + const auto *weight_data = weight.ptr(); + packWeight(num_heads, qkv_head_sizes[0], input_hidden_size, weight_data, hidden_size, packed_weight_q, opt); + packWeight(num_heads, qkv_head_sizes[1], input_hidden_size, weight_data + qkv_hidden_sizes[0], hidden_size, packed_weight_k, opt); + packWeight(num_heads, qkv_head_sizes[2], input_hidden_size, weight_data + qkv_hidden_sizes[0] + qkv_hidden_sizes[1], hidden_size, packed_weight_v, opt); + + is_prepacked = true; + } } void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE { @@ -132,8 +143,7 @@ class AttentionLayerImpl CV_FINAL : public AttentionLayer { // prepack weights if (!is_prepacked) { - // prepack - const auto &weight = inputs[1]; + const auto &weight = blobs.empty() ? inputs[1] : blobs.front(); const auto *weight_data = weight.ptr(); packWeight(num_heads, qkv_head_sizes[0], input_hidden_size, weight_data, hidden_size, packed_weight_q, opt); packWeight(num_heads, qkv_head_sizes[1], input_hidden_size, weight_data + qkv_hidden_sizes[0], hidden_size, packed_weight_k, opt); @@ -153,7 +163,7 @@ class AttentionLayerImpl CV_FINAL : public AttentionLayer { float *QKV[3] = {Q, K, V}; // Q, K, V: [B, N, S, H] { const auto &input = inputs[0]; - const auto &bias = inputs[2]; + const auto &bias = blobs.empty() ? inputs[2] : blobs.back(); const auto *input_data = input.ptr(); const auto *bias_data = bias.ptr(); diff --git a/modules/dnn/src/layers/layer_norm.cpp b/modules/dnn/src/layers/layer_norm.cpp index 5ebebbd32d..487383efdc 100644 --- a/modules/dnn/src/layers/layer_norm.cpp +++ b/modules/dnn/src/layers/layer_norm.cpp @@ -31,6 +31,10 @@ namespace cv { namespace dnn { // https://github.com/onnx/onnx/blob/main/docs/Operators.md#LayerNormalization class LayerNormLayerImpl CV_FINAL : public LayerNormLayer { +#ifdef HAVE_OPENCL + UMat weight_umat, bias_umat; +#endif + public: LayerNormLayerImpl(const LayerParams& params) { @@ -58,22 +62,24 @@ public: std::vector &internals) const CV_OVERRIDE { // check shapes of weight and bias if existed - // inputs >= 2 (X and Weight are requested, bias is optional) - CV_Check(inputs.size(), inputs.size() >= 2 && inputs.size() <= 3, "LayerNorm: require two (x, weight) or three (x, weight, bias) inputs"); + // inputs >= 2 (X and Weight are required, bias is optional) + int num_inputs = inputs.size() + blobs.size(); + CV_Check(num_inputs, num_inputs >= 2 && num_inputs <= 3, "LayerNorm: require two (x, weight) or three (x, weight, bias) inputs"); auto x_shape = inputs[0]; int x_ndims = static_cast(x_shape.size()); - auto w_shape = inputs[1]; + // Weight and bias are either constants or variable + auto w_shape = blobs.empty() ? inputs[1] : shape(blobs.front()); // if axis == last_dim, scale and b are both 1d tensor (represented as 2d mat nx1) int w_ndims = static_cast(w_shape.size()); w_ndims = (axis == x_ndims - 1 && w_ndims == 2) ? w_ndims - 1 : w_ndims; CV_CheckEQ(x_ndims - axis, w_ndims, "LayerNorm: shape of weight does not match with given axis and shape of input"); for (int i = 0; i < w_ndims; ++i) CV_CheckEQ(x_shape[axis+i], w_shape[i], "LayerNorm: weight dimensions does not match with input dimensions"); - if (inputs.size() == static_cast(3)) + if (num_inputs >= 3) { - auto b_shape = inputs[2]; + auto b_shape = blobs.empty() ? inputs[2] : shape(blobs.back()); CV_CheckEQ(w_shape.size(), b_shape.size(), "LayerNorm: shape of weight does not match with shape of bias"); for (size_t i = 0; i < w_shape.size(); ++i) CV_CheckEQ(w_shape[i], b_shape[i], "LayerNorm: bias dimensions does not match with weight dimensions"); @@ -89,6 +95,11 @@ public: const auto input_shape = shape(inputs[0]); axis = normalize_axis(axis, static_cast(input_shape.size())); + +#ifdef HAVE_OPENCL + weight_umat.release(); + bias_umat.release(); +#endif } void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE @@ -110,11 +121,11 @@ public: outputs_arr.getMatVector(outputs); const auto &input = inputs[0]; - const auto &scale = inputs[1]; + const auto &scale = blobs.empty() ? inputs[1] : blobs.front(); auto &output = outputs[0]; - if (inputs.size() == 3) { - const auto &bias = inputs[2]; + if ((inputs.size() + blobs.size()) >= 3) { + const auto &bias = blobs.empty() ? inputs[2] : blobs.back(); fastNorm(input, scale, bias, output, epsilon, static_cast(axis)); } else { fastNorm(input, scale, output, epsilon, static_cast(axis)); @@ -129,7 +140,13 @@ public: inputs_.getUMatVector(inputs); outputs_.getUMatVector(outputs); - const auto &input = inputs[0], &scale = inputs[1]; // &bias = inputs[2]; // bias is optional + const auto &input = inputs[0]; + + // no fp16 support + if (input.depth() == CV_16F) { + return false; + } + auto &output = outputs[0]; const auto input_shape = shape(input); @@ -137,11 +154,23 @@ public: norm_size = static_cast(total(input_shape, axis)); float inv_norm_size = 1.f / norm_size; - const auto &bias = inputs.size() == 3 ? inputs[2] : UMat::zeros(norm_size, 1, CV_32F); - - // no fp16 support - if (input.depth() == CV_16F) { - return false; + if (weight_umat.empty()) { + if (blobs.empty()) { + weight_umat = inputs[1]; + } else { + blobs.front().copyTo(weight_umat); + } + } + if (bias_umat.empty()) { + if ((inputs.size() + blobs.size()) == 3) { + if (blobs.empty()) { + bias_umat = inputs[2]; + } else { + blobs.back().copyTo(bias_umat); + } + } else { + bias_umat = UMat::zeros(norm_size, 1, CV_32F); + } } String base_opts = format(" -DT=float -DT4=float4 -Dconvert_T=convert_float4"); @@ -179,7 +208,7 @@ public: if (!ret) { return false; } - // Calculate instance norm: output = scale * (x - mean) / sqrt(var + eps) + bias + // Calculate instance norm: output = weight * (x - mean) / sqrt(var + eps) + bias String mvn_kernel_name = format("mvn%d", num_vector); build_opt += " -DNORM_VARIANCE -DLAYER_NORM -DKERNEL_MVN"; ocl::Kernel mvn_kernel(mvn_kernel_name.c_str(), ocl::dnn::mvn_oclsrc, build_opt); @@ -192,8 +221,8 @@ public: mvn_kernel.set(3, (float)epsilon); mvn_kernel.set(4, ocl::KernelArg::PtrReadOnly(mean)); mvn_kernel.set(5, ocl::KernelArg::PtrReadOnly(mean_square)); - mvn_kernel.set(6, ocl::KernelArg::PtrReadOnly(scale)); - mvn_kernel.set(7, ocl::KernelArg::PtrReadOnly(bias)); + mvn_kernel.set(6, ocl::KernelArg::PtrReadOnly(weight_umat)); + mvn_kernel.set(7, ocl::KernelArg::PtrReadOnly(bias_umat)); mvn_kernel.set(8, (int)1); mvn_kernel.set(9, (float)0.f); mvn_kernel.set(10, ocl::KernelArg::PtrWriteOnly(output)); @@ -218,15 +247,7 @@ public: CV_CheckNE(axis, static_cast(input_tensor_desc->GetShape().GetDimNum() - 1), "LayerNorm: CANN does not support axis set as last axis due to 1D mat compatibility issue"); - auto scale_tensor_wrapper = inputs[1].dynamicCast(); - auto scale_tensor_desc = scale_tensor_wrapper->getTensorDesc(); - - auto bias_tensor_wrapper = inputs[2].dynamicCast(); - auto bias_tensor_desc = bias_tensor_wrapper->getTensorDesc(); - auto last_node = nodes[0].dynamicCast()->getOp(); - auto scale_node = nodes[1].dynamicCast()->getOp(); - auto bias_node = nodes[2].dynamicCast()->getOp(); auto op = std::make_shared(name); @@ -239,12 +260,34 @@ public: // set inputs : x op->set_input_x_by_name(*last_node, input_tensor_wrapper->name.c_str()); op->update_input_desc_x(*input_tensor_desc); - // set inputs : gamma - op->set_input_gamma_by_name(*scale_node, scale_tensor_wrapper->name.c_str()); - op->update_input_desc_gamma(*scale_tensor_desc); - // set inputs : beta - op->set_input_beta_by_name(*bias_node, bias_tensor_wrapper->name.c_str()); - op->update_input_desc_beta(*bias_tensor_desc); + // set inputs : gamma & beta + if (blobs.empty()) { + auto scale_tensor_wrapper = inputs[1].dynamicCast(); + auto scale_tensor_desc = scale_tensor_wrapper->getTensorDesc(); + auto scale_node = nodes[1].dynamicCast()->getOp(); + op->set_input_gamma_by_name(*scale_node, scale_tensor_wrapper->name.c_str()); + op->update_input_desc_gamma(*scale_tensor_desc); + + if (inputs.size() == 3) { + auto bias_tensor_wrapper = inputs[2].dynamicCast(); + auto bias_tensor_desc = bias_tensor_wrapper->getTensorDesc(); + auto bias_node = nodes[2].dynamicCast()->getOp(); + op->set_input_beta_by_name(*bias_node, bias_tensor_wrapper->name.c_str()); + op->update_input_desc_beta(*bias_tensor_desc); + } + } else { + const auto &scale_mat = blobs.front(); + const auto op_const_scale = std::make_shared(scale_mat.data, scale_mat.type(), shape(scale_mat), cv::format("%s_w", name.c_str())); + op->set_input_gamma(*(op_const_scale->getOp())); + op->update_input_desc_gamma(*(op_const_scale->getTensorDesc())); + + if ((inputs.size() + blobs.size()) >= 3) { + const auto &bias_mat = blobs.back(); + const auto op_const_bias = std::make_shared(bias_mat.data, bias_mat.type(), shape(bias_mat), cv::format("%s_b", name.c_str())); + op->set_input_beta(*(op_const_bias->getOp())); + op->update_input_desc_beta(*(op_const_bias->getTensorDesc())); + } + } // set outputs auto output_desc_y = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); @@ -264,6 +307,7 @@ public: auto ieInpNode = nodes[0].dynamicCast()->node; const auto &input_shape = ieInpNode.get_shape(); std::shared_ptr mvn, result; + ov::Output scale, bias; // mvn // https://docs.openvino.ai/2023.1/openvino_docs_ops_normalization_MVN_6.html @@ -274,23 +318,33 @@ public: mvn = std::make_shared(ieInpNode, axes, normalize_variance, epsilon, ov::op::MVNEpsMode::INSIDE_SQRT); // layer norm = scale * mvn + bias - auto scale = nodes[1].dynamicCast()->node; - ov::Output bias; - if (nodes.size() == 3) { - bias = nodes[2].dynamicCast()->node; + if (blobs.empty()) { + scale = nodes[1].dynamicCast()->node; + if (nodes.size() == 3) { + bias = nodes[2].dynamicCast()->node; + } + } else { + auto scale_mat = blobs.front(); + const auto scale_shape = shape(scale_mat); + scale = std::make_shared(ov::element::f32, std::vector(scale_shape.begin(), scale_shape.end()), scale_mat.data); + if ((nodes.size() + blobs.size()) == 3) { + auto bias_mat = blobs.back(); + const auto bias_shape = shape(bias_mat); + bias = std::make_shared(ov::element::f32, std::vector(bias_shape.begin(), bias_shape.end()), bias_mat.data); + } } if (axis == -1 || axis == input_shape.size() - 1) { // special case for 1D tensor (2D mat) std::vector shared_shape_v(input_shape.size(), 1); shared_shape_v.back() = -1; auto shared_shape = std::make_shared(ov::element::i64, ov::Shape{shared_shape_v.size()}, shared_shape_v.data()); - scale = std::make_shared(scale, shared_shape, true); - if (nodes.size() == 3) { - bias = std::make_shared(bias, shared_shape, true); + scale = std::make_shared(scale, shared_shape, true); + if ((nodes.size() + blobs.size()) == 3) { + bias = std::make_shared(bias, shared_shape, true); } } result = std::make_shared(mvn, scale); - if (nodes.size() == 3) { + if ((nodes.size() + blobs.size()) == 3) { result = std::make_shared(result, bias); } @@ -308,10 +362,12 @@ public: auto input_shape = input_wrapper->getShape(); size_t loops = static_cast(total(input_shape, 0, axis)); - return make_cuda_node(preferableTarget, std::move(context->stream), axis, epsilon, loops); + const auto scale = blobs.empty() ? Mat() : blobs.front(), + bias = blobs.empty() ? Mat() : blobs.back(); + + return make_cuda_node(preferableTarget, std::move(context->stream), scale, bias, axis, epsilon, loops); } #endif // HAVE_CUDA - }; Ptr LayerNormLayer::create(const LayerParams& params) diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 5583fd0453..997914c4cf 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -3160,18 +3160,9 @@ void ONNXImporter::parseLayerNorm(LayerParams& layerParams, const opencv_onnx::N // constants as constant inputs for (size_t i = 1; i < node_proto.input_size(); i++) { - if (layer_id.find(node_proto.input(i)) == layer_id.end()) - { + if (constBlobs.find(node_proto.input(i)) != constBlobs.end()) { Mat blob = getBlob(node_proto, i); - - LayerParams constParams; - constParams.name = node_proto.input(i); - constParams.type = "Const"; - constParams.blobs.push_back(blob); - - opencv_onnx::NodeProto proto; - proto.add_output(constParams.name); - addLayer(constParams, proto); + layerParams.blobs.push_back(blob); } } @@ -3935,17 +3926,9 @@ void ONNXImporter::parseAttention(LayerParams& params, const opencv_onnx::NodePr CV_CheckEQ(param_qkv_hidden_sizes.size(), 3, "ONNXImporter/parseAttention: qkv_hidden_sizes is must and only have three elements"); for (size_t i = 1; i < node_proto.input_size(); i++) { - if (layer_id.find(node_proto.input(i)) == layer_id.end()) { - Mat tensor = getBlob(node_proto, i); - - LayerParams const_params; - const_params.name = node_proto.input(i); - const_params.type = "Const"; - const_params.blobs.push_back(tensor); - - opencv_onnx::NodeProto proto; - proto.add_output(const_params.name); - addLayer(const_params, proto); + if (constBlobs.find(node_proto.input(i)) != constBlobs.end()) { + Mat blob = getBlob(node_proto, i); + params.blobs.push_back(blob); } }