mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00
Merge pull request #25271 from fengyuentau:matmul_bias
Merge with https://github.com/opencv/opencv_extra/pull/1158 Todo: - [x] Fix Attention pattern recognition. - [x] Handle other backends. Benchmark: "VIT_B_32 OCV/CPU", M1, results in milliseconds. | Model | 4.x | This PR | | - | - | - | | VIT_B_32 OCV/CPU | 87.66 | **83.83** | ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
9716bf95ae
commit
b758897c29
@ -12,6 +12,8 @@
|
||||
#include "../csl/tensor.hpp"
|
||||
#include "../csl/tensor_ops.hpp"
|
||||
|
||||
#include "../kernels/eltwise_ops.hpp" // for adding bias
|
||||
|
||||
#include <opencv2/core.hpp>
|
||||
|
||||
#include <utility>
|
||||
@ -23,7 +25,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
||||
public:
|
||||
using wrapper_type = GetCUDABackendWrapperType<T>;
|
||||
|
||||
MatMulBroadcastOp(csl::Stream stream_, csl::cublas::Handle handle, const Mat &B, bool _transA, bool _transB,
|
||||
MatMulBroadcastOp(csl::Stream stream_, csl::cublas::Handle handle, const Mat &B, const Mat &bias, bool _transA, bool _transB,
|
||||
const std::vector<size_t> &A_offsets_, const std::vector<size_t> &B_offsets_, std::vector<size_t> &C_offsets_,
|
||||
size_t batch_)
|
||||
: stream(std::move(stream_)), cublasHandle(std::move(handle)), A_offsets(A_offsets_), B_offsets(B_offsets_), C_offsets(C_offsets_), batch(batch_)
|
||||
@ -33,6 +35,11 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
||||
csl::copyMatToTensor<T>(B, input_B_tensor, stream);
|
||||
}
|
||||
|
||||
if (!bias.empty()) {
|
||||
bias_tensor = csl::makeTensorHeader<T>(bias);
|
||||
csl::copyMatToTensor<T>(bias, bias_tensor, stream);
|
||||
}
|
||||
|
||||
transA = _transA;
|
||||
transB = _transB;
|
||||
}
|
||||
@ -42,9 +49,6 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
||||
const std::vector<cv::Ptr<BackendWrapper>>& outputs,
|
||||
csl::Workspace& workspace) override
|
||||
{
|
||||
CV_Assert(((inputs.size() == 2 && input_B_tensor.empty()) ||
|
||||
(inputs.size() == 1 && !input_B_tensor.empty())) && outputs.size() == 1);
|
||||
|
||||
auto input_A_wrapper = inputs[0].dynamicCast<wrapper_type>();
|
||||
auto input_A = input_A_wrapper->getView();
|
||||
|
||||
@ -60,12 +64,26 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
||||
auto output = output_wrapper->getSpan();
|
||||
|
||||
csl::tensor_ops::gemmBatched<T>(cublasHandle, batch, 0.f, output, C_offsets, 1.f, transA, input_A, A_offsets, transB, input_B, B_offsets);
|
||||
|
||||
// add bias if exists
|
||||
if (!bias_tensor.empty() || inputs.size() >= 3) {
|
||||
csl::TensorView<T> bias;
|
||||
if (bias_tensor.empty()) {
|
||||
auto bias_wrapper = inputs[2].dynamicCast<wrapper_type>();
|
||||
bias = bias_wrapper->getView();
|
||||
} else {
|
||||
bias = csl::TensorView<T>(bias_tensor);
|
||||
}
|
||||
|
||||
kernels::eltwise_sum_2<T>(stream, output, output, bias);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
csl::Stream stream;
|
||||
csl::cublas::Handle cublasHandle;
|
||||
csl::Tensor<T> input_B_tensor;
|
||||
csl::Tensor<T> bias_tensor;
|
||||
bool transA, transB;
|
||||
|
||||
std::vector<size_t> A_offsets;
|
||||
|
@ -26,6 +26,10 @@ using namespace cv::dnn::cuda4dnn;
|
||||
namespace cv { namespace dnn {
|
||||
|
||||
class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
||||
#ifdef HAVE_OPENCL
|
||||
UMat weight_umat, bias_umat;
|
||||
#endif
|
||||
|
||||
public:
|
||||
MatMulLayerImpl(const LayerParams& params) {
|
||||
setParamsFrom(params);
|
||||
@ -34,6 +38,8 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
||||
trans_b = params.get<bool>("transB", false);
|
||||
alpha = params.get<float>("alpha", 1.f);
|
||||
beta = params.get<float>("beta", 1.f);
|
||||
|
||||
real_ndims_C = params.get<int>("real_ndims_C", -1);
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE {
|
||||
@ -48,8 +54,9 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE {
|
||||
CV_CheckGE(inputs.size(), static_cast<size_t>(1), "DNN/MatMul: one varible input at least");
|
||||
CV_CheckLE(inputs.size(), static_cast<size_t>(2), "DNN/MatMul: two variable inputs at most");
|
||||
int num_inputs = inputs.size() + blobs.size();
|
||||
CV_CheckGE(num_inputs, 2, "DNN/MatMul: two inputs at least");
|
||||
CV_CheckLE(num_inputs, 3, "DNN/MatMul: three inputs at most");
|
||||
|
||||
const auto shape_A = inputs[0], shape_B = blobs.empty() ? inputs[1] : shape(blobs[0]);
|
||||
CV_CheckGE(shape_A.size(), static_cast<size_t>(2), "DNN/MatMul: invalid shape of input A");
|
||||
@ -64,7 +71,7 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
||||
int K_B = trans_b ? nB : mB;
|
||||
CV_CheckEQ(K_A, K_B, "DNN/MatMul: invalid dimension K");
|
||||
|
||||
// Check legal broadcast. It is legal for sure if A and B are 2d, or one of them is 2d.
|
||||
// Check if inputs are broadcastable.
|
||||
MatShape common_shape;
|
||||
if (shape_A.size() != 2 || shape_B.size() != 2) {
|
||||
const auto &shape_more_dims = shape_A.size() > shape_B.size() ? shape_A : shape_B;
|
||||
@ -89,6 +96,24 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
||||
common_shape[1] = N;
|
||||
}
|
||||
|
||||
// Check if bias is broadcastable
|
||||
if (num_inputs == 3) {
|
||||
const auto shape_C = blobs.empty() ? inputs.back() : shape(blobs.back());
|
||||
if (real_ndims_C == 1) { // (1) or (N)
|
||||
CV_Check(shape_C[0], shape_C[0] == 1 || shape_C[0] == N, "DNN/MatMul: invalid dimension of C");
|
||||
} else if (real_ndims_C >= 2) {
|
||||
const auto &shape_large = common_shape.size() > shape_C.size() ? common_shape : shape_C;
|
||||
const auto &shape_small = common_shape.size() > shape_C.size() ? shape_C : common_shape;
|
||||
size_t diff_dims = shape_large.size() - shape_small.size();
|
||||
for (size_t i = 0; i < shape_small.size(); i++) {
|
||||
const auto dl = shape_small[i], dm = shape_large[i + diff_dims];
|
||||
if (dl != 1 && dm != 1 && dl != dm) {
|
||||
CV_Error(Error::StsBadSize, "DNN/MatMul: invalid shape of C");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
outputs.assign(1, common_shape);
|
||||
return false;
|
||||
}
|
||||
@ -109,6 +134,44 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
||||
fastGemmPackB(blobs[0], packed_input_B, trans_b, opt);
|
||||
helper.updatePackedBOffsets(packed_input_B.size());
|
||||
}
|
||||
|
||||
// broadcast bias if needed
|
||||
if ((inputs.size() + blobs.size()) >= 3 && blobs.size() >= 2) {
|
||||
const auto bias_mat = blobs.back();
|
||||
const auto bias_shape = shape(bias_mat);
|
||||
bool is_broadcast_needed = real_ndims_C == 0 || real_ndims_C == 1 || (total(bias_shape) != total(C_shape) || bias_shape.size() != C_shape.size());
|
||||
|
||||
if (is_broadcast_needed) {
|
||||
broadcast_bias = Mat(C_shape, CV_32F);
|
||||
auto *broadcast_bias_ptr = broadcast_bias.ptr<float>();
|
||||
|
||||
const auto *bias = bias_mat.ptr<const float>();
|
||||
if (bias_mat.total() == 1) { // [], [1], [1, ...]
|
||||
float b = (*bias) * beta;
|
||||
for (size_t i = 0; i < broadcast_bias.total(); i++) {
|
||||
broadcast_bias_ptr[i] = b;
|
||||
}
|
||||
} else if (real_ndims_C == 1) { // [n]
|
||||
size_t inner_size = C_shape.back(),
|
||||
loops = total(C_shape) / inner_size;
|
||||
for (size_t i = 0; i < loops; i++) {
|
||||
size_t step = i * inner_size;
|
||||
for (size_t j = 0; j < inner_size; j++) {
|
||||
broadcast_bias_ptr[step + j] = beta * bias[j];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
broadcast(bias_mat, C_shape, broadcast_bias);
|
||||
}
|
||||
} else {
|
||||
broadcast_bias = blobs.back();
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
weight_umat.release();
|
||||
bias_umat.release();
|
||||
#endif
|
||||
}
|
||||
|
||||
// works like Y = numpy.matmul(A, B)
|
||||
@ -134,7 +197,38 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
||||
|
||||
const auto *a = A.ptr<const float>();
|
||||
auto *y = Y.ptr<float>();
|
||||
std::memset(y, 0, Y.total() * sizeof(float));
|
||||
// add bias if existed
|
||||
if ((inputs.size() + blobs.size()) >= 3) {
|
||||
const auto &shape_Y = shape(Y);
|
||||
if (blobs.empty()) { // bias from input
|
||||
const auto &bias_mat = inputs.back();
|
||||
const auto *bias = bias_mat.ptr<const float>();
|
||||
if (bias_mat.total() == 1) { // [], [1], [1, ...]
|
||||
float b = (*bias) * beta;
|
||||
for (size_t i = 0; i < Y.total(); i++) {
|
||||
y[i] = b;
|
||||
}
|
||||
} else if (real_ndims_C == 1) { // [n]
|
||||
const size_t inner_size = shape_Y.back(),
|
||||
batches = total(Y) / inner_size;
|
||||
parallel_for_(Range(0, batches), [&] (const Range &r) {
|
||||
for (int i = r.start; i < r.end; i++) {
|
||||
const size_t output_offset = i * inner_size;
|
||||
for (size_t j = 0; j < inner_size; j++) {
|
||||
y[output_offset + j] = beta * bias[j];
|
||||
}
|
||||
}
|
||||
}, double(batches * inner_size * (1 / 1024.0)));
|
||||
} else {
|
||||
broadcast(bias_mat, shape_Y, Y);
|
||||
}
|
||||
} else { // bias from constant
|
||||
const auto *bias = broadcast_bias.ptr<const float>();
|
||||
std::memcpy(y, bias, total(shape_Y) * sizeof(float));
|
||||
}
|
||||
} else {
|
||||
std::memset(y, 0, Y.total() * sizeof(float));
|
||||
}
|
||||
|
||||
if (blobs.empty()) {
|
||||
const auto &B = inputs[1];
|
||||
@ -158,14 +252,36 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
||||
inputs_arr.getUMatVector(inputs);
|
||||
outputs_arr.getUMatVector(outputs);
|
||||
|
||||
const auto &input_A = inputs[0];
|
||||
UMat input_B;
|
||||
if (blobs.empty()) {
|
||||
input_B = inputs[1];
|
||||
} else {
|
||||
blobs[0].copyTo(input_B);
|
||||
// does not support bias as input
|
||||
if (inputs.size() >= 3) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto &input_A = inputs[0];
|
||||
auto &output = outputs[0];
|
||||
const auto output_shape = shape(output);
|
||||
|
||||
if (blobs.empty()) {
|
||||
weight_umat = inputs[1];
|
||||
if ((inputs.size() + blobs.size() >= 3)) {
|
||||
bias_umat = UMat::zeros(output_shape.size(), output_shape.data(), CV_32F);
|
||||
}
|
||||
} else {
|
||||
if (weight_umat.empty()) {
|
||||
blobs.front().copyTo(weight_umat);
|
||||
}
|
||||
if ((inputs.size() + blobs.size() >= 3)) {
|
||||
if (bias_umat.empty()) {
|
||||
broadcast_bias.copyTo(bias_umat);
|
||||
}
|
||||
} else {
|
||||
if (bias_umat.empty()) {
|
||||
bias_umat = UMat::zeros(output_shape.size(), output_shape.data(), CV_32F);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto &input_B = weight_umat;
|
||||
|
||||
int M = static_cast<int>(helper.M),
|
||||
N = static_cast<int>(helper.N),
|
||||
@ -181,7 +297,7 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
||||
UMat A, B, C, A_fp32, B_fp32, C_fp32;
|
||||
for (int i = 0; i < batch; i++) {
|
||||
A = input_A_2d.row(helper.A_rows[i]).reshape(1, trans_a ? K : M);
|
||||
B = input_B_2d.row(helper.B_rows[i]).reshape(1, trans_b ? K : N);
|
||||
B = input_B_2d.row(helper.B_rows[i]).reshape(1, trans_b ? N : K);
|
||||
C = output_2d.row(helper.C_rows[i]).reshape(1, M);
|
||||
|
||||
if (trans_a) {
|
||||
@ -200,7 +316,6 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
||||
B_fp32 = B;
|
||||
C_fp32 = C;
|
||||
}
|
||||
|
||||
cv::gemm(A_fp32, B_fp32, 1.f, noArray(), 0.f, C_fp32);
|
||||
if (use_half) {
|
||||
A_fp32.convertTo(A, CV_16F);
|
||||
@ -208,6 +323,12 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
||||
C_fp32.convertTo(C, CV_16F);
|
||||
}
|
||||
}
|
||||
|
||||
// add bias
|
||||
if (!bias_umat.empty()) {
|
||||
cv::add(output, bias_umat, output);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif // HAVE_OPENCL
|
||||
@ -216,18 +337,28 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE {
|
||||
auto& input_A_node = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
std::shared_ptr<ov::Node> matmul;
|
||||
std::shared_ptr<ov::Node> result;
|
||||
ov::Output<ov::Node> bias;
|
||||
|
||||
if (nodes.size() == 2) {
|
||||
if (blobs.empty()) {
|
||||
auto &input_B_node = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
matmul = std::make_shared<ov::op::v0::MatMul>(input_A_node, input_B_node, trans_a, trans_b);
|
||||
result = std::make_shared<ov::op::v0::MatMul>(input_A_node, input_B_node, trans_a, trans_b);
|
||||
if (nodes.size() >= 3) {
|
||||
bias = nodes[2].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
result = std::make_shared<ov::op::v1::Add>(result, bias);
|
||||
}
|
||||
} else {
|
||||
auto input_B_shape = getShape<size_t>(blobs[0]);
|
||||
auto input_B_node = std::make_shared<ov::op::v0::Constant>(ov::element::f32, input_B_shape, blobs[0].data);
|
||||
matmul = std::make_shared<ov::op::v0::MatMul>(input_A_node, input_B_node, trans_a, trans_b);
|
||||
result = std::make_shared<ov::op::v0::MatMul>(input_A_node, input_B_node, trans_a, trans_b);
|
||||
if ((nodes.size() + blobs.size()) >= 3) {
|
||||
const auto bias_shape = shape(broadcast_bias);
|
||||
bias = std::make_shared<ov::op::v0::Constant>(ov::element::f32, std::vector<size_t>(bias_shape.begin(), bias_shape.end()), broadcast_bias.data);
|
||||
result = std::make_shared<ov::op::v1::Add>(result, bias);
|
||||
}
|
||||
}
|
||||
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(matmul));
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(result));
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
@ -239,7 +370,7 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
||||
|
||||
const auto input_A_shape = shape(*input_A_wrapper->getMat());
|
||||
const auto output_shape = shape(*output_wrapper->getMat());
|
||||
if (output_shape.size() != 2) {
|
||||
if ((inputs.size() + blobs.size()) >= 3 || output_shape.size() != 2) {
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
@ -259,11 +390,17 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
||||
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<Ptr<BackendWrapper>>& outputs) override {
|
||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||
auto input_B = blobs.empty() ? Mat() : blobs[0];
|
||||
auto input_B = Mat(), bias = Mat();
|
||||
if (!blobs.empty()) {
|
||||
input_B = blobs.front();
|
||||
if (blobs.size() >= 2) {
|
||||
bias = broadcast_bias;
|
||||
}
|
||||
}
|
||||
|
||||
CV_CheckFalse(helper.empty(), "DNN/MatMul/CUDA: MatMulHelper is not initialized");
|
||||
|
||||
return make_cuda_node<cuda4dnn::MatMulBroadcastOp>(preferableTarget, std::move(context->stream), std::move(context->cublas_handle), input_B, trans_a, trans_b, helper.A_offsets, helper.B_offsets, helper.C_offsets, helper.batch);
|
||||
return make_cuda_node<cuda4dnn::MatMulBroadcastOp>(preferableTarget, std::move(context->stream), std::move(context->cublas_handle), input_B, bias, trans_a, trans_b, helper.A_offsets, helper.B_offsets, helper.C_offsets, helper.batch);
|
||||
}
|
||||
#endif // HAVE_CUDA
|
||||
|
||||
@ -275,7 +412,7 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
||||
auto input_A_desc = input_A_wrapper->getTensorDesc();
|
||||
auto input_A_node = nodes[0].dynamicCast<CannBackendNode>()->getOp();
|
||||
|
||||
auto op = std::make_shared<ge::op::BatchMatMul>(name);
|
||||
auto op = std::make_shared<ge::op::BatchMatMulV2>(name);
|
||||
|
||||
// set attributes
|
||||
op->set_attr_adj_x1(trans_a);
|
||||
@ -292,11 +429,31 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
||||
auto input_B_node = nodes[1].dynamicCast<CannBackendNode>()->getOp();
|
||||
op->set_input_x2_by_name(*input_B_node, "y");
|
||||
op->update_input_desc_x2(*input_B_desc);
|
||||
if (inputs.size() >= 3) {
|
||||
auto input_bias_wrapper = inputs[2].dynamicCast<CannBackendWrapper>();
|
||||
auto input_bias_desc = input_bias_wrapper->getTensorDesc();
|
||||
auto input_bias_node = nodes[2].dynamicCast<CannBackendNode>()->getOp();
|
||||
op->set_input_bias_by_name(*input_bias_node, "y");
|
||||
op->update_input_desc_bias(*input_bias_desc);
|
||||
}
|
||||
} else { // constant input B
|
||||
auto B = blobs[0];
|
||||
auto const_B_node = std::make_shared<CannConstOp>(B.data, B.type(), shape(B), cv::format("%s_B", name.c_str()));
|
||||
op->set_input_x2_by_name(*(const_B_node->getOp()), "y");
|
||||
op->update_input_desc_x2(*(const_B_node->getTensorDesc()));
|
||||
if ((inputs.size() + blobs.size()) >= 3) { // does not support broadcast bias
|
||||
auto bias_mat = blobs.back();
|
||||
auto bias_shape = shape(bias_mat);
|
||||
|
||||
// reshape if 1d
|
||||
if (real_ndims_C == 1 && bias_shape.front() != 1) {
|
||||
bias_shape = std::vector<int>{bias_shape.front()};
|
||||
}
|
||||
|
||||
auto const_bias_node = std::make_shared<CannConstOp>(bias_mat.data, bias_mat.type(), bias_shape, cv::format("%s_bias", name.c_str()));
|
||||
op->set_input_bias_by_name(*(const_bias_node->getOp()), "y");
|
||||
op->update_input_desc_bias(*(const_bias_node->getTensorDesc()));
|
||||
}
|
||||
}
|
||||
|
||||
// set outputs
|
||||
@ -312,7 +469,10 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
||||
float alpha;
|
||||
float beta;
|
||||
|
||||
int real_ndims_C;
|
||||
|
||||
std::vector<float> packed_input_B;
|
||||
Mat broadcast_bias;
|
||||
|
||||
FastGemmOpt opt;
|
||||
MatMulHelper helper;
|
||||
|
@ -242,6 +242,115 @@ class AdjustSliceAllOptionalInputsSubgraph : public Subgraph {
|
||||
size_t num_inputs_;
|
||||
};
|
||||
|
||||
/* Fusion for biased MatMul.
|
||||
|
||||
Graph before fusion: [Input] -> MatMul -> Add -> [Output]
|
||||
|
||||
Graph after fusion: [Input] -> MatMul -> [Output]
|
||||
\
|
||||
bias
|
||||
*/
|
||||
|
||||
class BiasedMatmulSubgraph : public Subgraph {
|
||||
public:
|
||||
BiasedMatmulSubgraph() {
|
||||
int input = addNodeToMatch("");
|
||||
matmul_id = addNodeToMatch("MatMul", input, addNodeToMatch(""));
|
||||
add_id = addNodeToMatch("Add", addNodeToMatch(""), matmul_id);
|
||||
|
||||
setFusedNode("MatMul", input);
|
||||
}
|
||||
|
||||
virtual bool match(const Ptr<ImportGraphWrapper>& net, int nodeId,
|
||||
std::vector<int>& matchedNodesIds) CV_OVERRIDE {
|
||||
if (Subgraph::match(net, nodeId, matchedNodesIds)) {
|
||||
auto onnx_net = net.dynamicCast<ONNXGraphWrapper>();
|
||||
|
||||
// get input weight from MatMul
|
||||
{
|
||||
// make sure that input A is not Constant
|
||||
if (onnx_net->getInputInitializerId(matchedNodesIds[matmul_id], 0) >= 0) {
|
||||
return false;
|
||||
} else {
|
||||
const Ptr<ImportNodeWrapper> node = net->getNode(matchedNodesIds[matmul_id]);
|
||||
|
||||
int constant_id = Subgraph::getInputNodeId(net, node, 0);
|
||||
auto constant_node = net->getNode(constant_id);
|
||||
if (constant_node->getType() == "Constant") {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool is_weight_const = false;
|
||||
int initializer_id = onnx_net->getInputInitializerId(matchedNodesIds[matmul_id], 1);
|
||||
if (initializer_id != -1) { // Initializer
|
||||
weight_name = onnx_net->getNameOfInitializer(initializer_id);
|
||||
is_weight_const = true;
|
||||
} else { // Constant layer
|
||||
const Ptr<ImportNodeWrapper> node = net->getNode(matchedNodesIds[matmul_id]);
|
||||
|
||||
int constant_id = Subgraph::getInputNodeId(net, node, 1);
|
||||
auto constant_node = net->getNode(constant_id);
|
||||
if (constant_node->getType() == "Constant") {
|
||||
weight_name = node->getInputName(1);
|
||||
is_weight_const = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_weight_const) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// get input bias from Add
|
||||
{
|
||||
bool is_bias_const = false;
|
||||
int initializer_id = std::max(onnx_net->getInputInitializerId(matchedNodesIds[add_id], 0),
|
||||
onnx_net->getInputInitializerId(matchedNodesIds[add_id], 1));
|
||||
if (initializer_id != -1) {
|
||||
bias_name = onnx_net->getNameOfInitializer(initializer_id);
|
||||
is_bias_const = true;
|
||||
} else { // Constant layer
|
||||
const Ptr<ImportNodeWrapper> node = net->getNode(matchedNodesIds[add_id]);
|
||||
|
||||
int constant_id = Subgraph::getInputNodeId(net, node, 0);
|
||||
auto constant_node = net->getNode(constant_id);
|
||||
if (constant_node->getType() == "Constant") {
|
||||
bias_name = node->getInputName(0);
|
||||
is_bias_const = true;
|
||||
} else {
|
||||
constant_id = Subgraph::getInputNodeId(net, node, 1);
|
||||
constant_node = net->getNode(constant_id);
|
||||
if (constant_node->getType() == "Constant") {
|
||||
bias_name = node->getInputName(1);
|
||||
is_bias_const = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!is_bias_const) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual void finalize(const Ptr<ImportGraphWrapper>& net,
|
||||
const Ptr<ImportNodeWrapper>& fusedNode,
|
||||
std::vector<Ptr<ImportNodeWrapper> >&) CV_OVERRIDE {
|
||||
opencv_onnx::NodeProto* node = fusedNode.dynamicCast<ONNXNodeWrapper>()->node;
|
||||
// add inputs
|
||||
node->add_input(weight_name);
|
||||
node->add_input(bias_name);
|
||||
}
|
||||
|
||||
private:
|
||||
int matmul_id, add_id;
|
||||
std::string weight_name, bias_name;
|
||||
};
|
||||
|
||||
/* The fusion for the multi-head attention from vision transformer.
|
||||
|
||||
Abbreviations:
|
||||
@ -322,22 +431,21 @@ class AttentionSubGraph : public Subgraph {
|
||||
AttentionSubGraph() {
|
||||
int input = addNodeToMatch("");
|
||||
int transpose = addNodeToMatch("Transpose", input); // tranpose does not make any differences to the accuracy here in this subgraph
|
||||
att_matmul = addNodeToMatch("MatMul", transpose, addNodeToMatch(""));
|
||||
att_add = addNodeToMatch("Add", addNodeToMatch(""), att_matmul);
|
||||
att_matmul = addNodeToMatch("MatMul", transpose, addNodeToMatch(""), addNodeToMatch("")); // add is fused into matmul via BiasedMatMulSubgraph
|
||||
|
||||
// v_path
|
||||
slice_v = addNodeToMatch("Slice", std::vector<int>{att_add, addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch("")});
|
||||
slice_v = addNodeToMatch("Slice", std::vector<int>{att_matmul, addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch("")});
|
||||
int reshape_v = addNodeToMatch("Reshape", slice_v, addNodeToMatch(""));
|
||||
int transpose_v = addNodeToMatch("Transpose", reshape_v);
|
||||
|
||||
// q_path
|
||||
slice_q = addNodeToMatch("Slice", std::vector<int>{att_add, addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch("")});
|
||||
slice_q = addNodeToMatch("Slice", std::vector<int>{att_matmul, addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch("")});
|
||||
reshape_q = addNodeToMatch("Reshape", slice_q, addNodeToMatch(""));
|
||||
int transpose_q = addNodeToMatch("Transpose", reshape_q);
|
||||
div_q = addNodeToMatch("Div", transpose_q, addNodeToMatch(""));
|
||||
|
||||
// k_path
|
||||
slice_k = addNodeToMatch("Slice", std::vector<int>{att_add, addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch("")});
|
||||
slice_k = addNodeToMatch("Slice", std::vector<int>{att_matmul, addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch("")});
|
||||
int reshape_k = addNodeToMatch("Reshape", slice_k, addNodeToMatch(""));
|
||||
int transpose_k = addNodeToMatch("Transpose", reshape_k);
|
||||
|
||||
@ -380,7 +488,7 @@ class AttentionSubGraph : public Subgraph {
|
||||
|
||||
// get names
|
||||
weight_name = getInputName(net, matchedNodesIds[att_matmul], 1);
|
||||
bias_name = getInputName(net, matchedNodesIds[att_add], 0);
|
||||
bias_name = getInputName(net, matchedNodesIds[att_matmul], 2);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -414,7 +522,7 @@ class AttentionSubGraph : public Subgraph {
|
||||
}
|
||||
|
||||
private:
|
||||
int att_matmul, att_add;
|
||||
int att_matmul;
|
||||
int slice_q, slice_k, slice_v;
|
||||
int reshape_q, div_q, last_reshape;
|
||||
|
||||
@ -436,20 +544,19 @@ class AttentionSingleHeadSubGraph : public Subgraph {
|
||||
AttentionSingleHeadSubGraph() {
|
||||
int input = addNodeToMatch("");
|
||||
int transpose = addNodeToMatch("Transpose", input); // tranpose does not make any differences to the accuracy here in this subgraph
|
||||
att_matmul = addNodeToMatch("MatMul", transpose, addNodeToMatch(""));
|
||||
att_add = addNodeToMatch("Add", addNodeToMatch(""), att_matmul);
|
||||
att_matmul = addNodeToMatch("MatMul", transpose, addNodeToMatch(""), addNodeToMatch("")); // add is fused into matmul via BiasedMatMulSubgraph
|
||||
|
||||
// v_path
|
||||
slice_v = addNodeToMatch("Slice", std::vector<int>{att_add, addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch("")});
|
||||
slice_v = addNodeToMatch("Slice", std::vector<int>{att_matmul, addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch("")});
|
||||
int transpose_v = addNodeToMatch("Transpose", slice_v);
|
||||
|
||||
// q_path
|
||||
slice_q = addNodeToMatch("Slice", std::vector<int>{att_add, addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch("")});
|
||||
slice_q = addNodeToMatch("Slice", std::vector<int>{att_matmul, addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch("")});
|
||||
int transpose_q = addNodeToMatch("Transpose", slice_q);
|
||||
div_q = addNodeToMatch("Div", transpose_q, addNodeToMatch(""));
|
||||
|
||||
// k_path
|
||||
slice_k = addNodeToMatch("Slice", std::vector<int>{att_add, addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch("")});
|
||||
slice_k = addNodeToMatch("Slice", std::vector<int>{att_matmul, addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch("")});
|
||||
int transpose_k = addNodeToMatch("Transpose", slice_k);
|
||||
|
||||
// qk
|
||||
@ -491,7 +598,7 @@ class AttentionSingleHeadSubGraph : public Subgraph {
|
||||
|
||||
// get names
|
||||
weight_name = getInputName(net, matchedNodesIds[att_matmul], 1);
|
||||
bias_name = getInputName(net, matchedNodesIds[att_add], 0);
|
||||
bias_name = getInputName(net, matchedNodesIds[att_matmul], 2);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -525,7 +632,7 @@ class AttentionSingleHeadSubGraph : public Subgraph {
|
||||
}
|
||||
|
||||
protected:
|
||||
int att_matmul, att_add;
|
||||
int att_matmul;
|
||||
int slice_q, slice_k, slice_v;
|
||||
int div_q, last_reshape;
|
||||
|
||||
@ -1558,6 +1665,7 @@ public:
|
||||
void simplifySubgraphs(opencv_onnx::GraphProto& net)
|
||||
{
|
||||
std::vector<Ptr<Subgraph> > subgraphs;
|
||||
subgraphs.push_back(makePtr<BiasedMatmulSubgraph>());
|
||||
subgraphs.push_back(makePtr<AdjustSliceAllOptionalInputsSubgraph>(3));
|
||||
subgraphs.push_back(makePtr<AdjustSliceAllOptionalInputsSubgraph>(4));
|
||||
subgraphs.push_back(makePtr<GeluSubGraph>());
|
||||
|
@ -1961,7 +1961,8 @@ void ONNXImporter::parseGemm(LayerParams& layerParams, const opencv_onnx::NodePr
|
||||
|
||||
void ONNXImporter::parseMatMul(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) {
|
||||
auto node_proto = node_proto_;
|
||||
CV_CheckEQ(node_proto.input_size(), 2, "ONNXImporter/MatMul: two inputs required");
|
||||
CV_CheckGE(node_proto.input_size(), 2, "ONNXImporter/MatMul: two inputs required at least");
|
||||
CV_CheckLE(node_proto.input_size(), 3, "ONNXImporter/MatMul: three inputs required at most");
|
||||
|
||||
for (int i = 0; i < node_proto.input_size(); i++) {
|
||||
if (constBlobs.find(node_proto.input(i)) == constBlobs.end()) {
|
||||
@ -1970,9 +1971,7 @@ void ONNXImporter::parseMatMul(LayerParams& layerParams, const opencv_onnx::Node
|
||||
|
||||
Mat blob = getBlob(node_proto, i);
|
||||
|
||||
if (i == 1) {
|
||||
layerParams.blobs.push_back(blob);
|
||||
} else {
|
||||
if (i == 0) {
|
||||
LayerParams const_params;
|
||||
const_params.name = node_proto.input(i);
|
||||
const_params.type = "Const";
|
||||
@ -1983,6 +1982,12 @@ void ONNXImporter::parseMatMul(LayerParams& layerParams, const opencv_onnx::Node
|
||||
addLayer(const_params, const_node_proto);
|
||||
|
||||
node_proto.set_input(i, const_params.name);
|
||||
} else {
|
||||
layerParams.blobs.push_back(blob);
|
||||
}
|
||||
|
||||
if (i == 2 && constBlobsExtraInfo.find(node_proto.input(2)) != constBlobsExtraInfo.end()) {
|
||||
layerParams.set("real_ndims_C", getBlobExtraInfo(node_proto, 2).real_ndims);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -143,4 +143,11 @@ TEST_F(Test_Graph_Simplifier, AttentionSubgraph) {
|
||||
test("attention_single_head", "Attention");
|
||||
}
|
||||
|
||||
TEST_F(Test_Graph_Simplifier, BiasedMatMulSubgraph) {
|
||||
/* Test for 1 subgraphs
|
||||
- BiasedMatMulSubgraph
|
||||
*/
|
||||
test("biased_matmul", "MatMul");
|
||||
}
|
||||
|
||||
}}
|
||||
|
@ -3090,6 +3090,12 @@ TEST_P(Test_ONNX_layers, LayerNormNoFusion) {
|
||||
testONNXModels("layer_norm_no_fusion");
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, MatMulAddFusion) {
|
||||
double l1 = (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) ? 0.0018 : default_l1;
|
||||
double lInf = (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) ? 0.011 : default_lInf;
|
||||
testONNXModels("biased_matmul", npy, l1, lInf);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Test_ONNX_nets, dnnBackendsAndTargets());
|
||||
|
||||
}} // namespace
|
||||
|
Loading…
Reference in New Issue
Block a user