mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00
Merge pull request #25271 from fengyuentau:matmul_bias
Merge with https://github.com/opencv/opencv_extra/pull/1158 Todo: - [x] Fix Attention pattern recognition. - [x] Handle other backends. Benchmark: "VIT_B_32 OCV/CPU", M1, results in milliseconds. | Model | 4.x | This PR | | - | - | - | | VIT_B_32 OCV/CPU | 87.66 | **83.83** | ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
9716bf95ae
commit
b758897c29
@ -12,6 +12,8 @@
|
|||||||
#include "../csl/tensor.hpp"
|
#include "../csl/tensor.hpp"
|
||||||
#include "../csl/tensor_ops.hpp"
|
#include "../csl/tensor_ops.hpp"
|
||||||
|
|
||||||
|
#include "../kernels/eltwise_ops.hpp" // for adding bias
|
||||||
|
|
||||||
#include <opencv2/core.hpp>
|
#include <opencv2/core.hpp>
|
||||||
|
|
||||||
#include <utility>
|
#include <utility>
|
||||||
@ -23,7 +25,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
|||||||
public:
|
public:
|
||||||
using wrapper_type = GetCUDABackendWrapperType<T>;
|
using wrapper_type = GetCUDABackendWrapperType<T>;
|
||||||
|
|
||||||
MatMulBroadcastOp(csl::Stream stream_, csl::cublas::Handle handle, const Mat &B, bool _transA, bool _transB,
|
MatMulBroadcastOp(csl::Stream stream_, csl::cublas::Handle handle, const Mat &B, const Mat &bias, bool _transA, bool _transB,
|
||||||
const std::vector<size_t> &A_offsets_, const std::vector<size_t> &B_offsets_, std::vector<size_t> &C_offsets_,
|
const std::vector<size_t> &A_offsets_, const std::vector<size_t> &B_offsets_, std::vector<size_t> &C_offsets_,
|
||||||
size_t batch_)
|
size_t batch_)
|
||||||
: stream(std::move(stream_)), cublasHandle(std::move(handle)), A_offsets(A_offsets_), B_offsets(B_offsets_), C_offsets(C_offsets_), batch(batch_)
|
: stream(std::move(stream_)), cublasHandle(std::move(handle)), A_offsets(A_offsets_), B_offsets(B_offsets_), C_offsets(C_offsets_), batch(batch_)
|
||||||
@ -33,6 +35,11 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
|||||||
csl::copyMatToTensor<T>(B, input_B_tensor, stream);
|
csl::copyMatToTensor<T>(B, input_B_tensor, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!bias.empty()) {
|
||||||
|
bias_tensor = csl::makeTensorHeader<T>(bias);
|
||||||
|
csl::copyMatToTensor<T>(bias, bias_tensor, stream);
|
||||||
|
}
|
||||||
|
|
||||||
transA = _transA;
|
transA = _transA;
|
||||||
transB = _transB;
|
transB = _transB;
|
||||||
}
|
}
|
||||||
@ -42,9 +49,6 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
|||||||
const std::vector<cv::Ptr<BackendWrapper>>& outputs,
|
const std::vector<cv::Ptr<BackendWrapper>>& outputs,
|
||||||
csl::Workspace& workspace) override
|
csl::Workspace& workspace) override
|
||||||
{
|
{
|
||||||
CV_Assert(((inputs.size() == 2 && input_B_tensor.empty()) ||
|
|
||||||
(inputs.size() == 1 && !input_B_tensor.empty())) && outputs.size() == 1);
|
|
||||||
|
|
||||||
auto input_A_wrapper = inputs[0].dynamicCast<wrapper_type>();
|
auto input_A_wrapper = inputs[0].dynamicCast<wrapper_type>();
|
||||||
auto input_A = input_A_wrapper->getView();
|
auto input_A = input_A_wrapper->getView();
|
||||||
|
|
||||||
@ -60,12 +64,26 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
|||||||
auto output = output_wrapper->getSpan();
|
auto output = output_wrapper->getSpan();
|
||||||
|
|
||||||
csl::tensor_ops::gemmBatched<T>(cublasHandle, batch, 0.f, output, C_offsets, 1.f, transA, input_A, A_offsets, transB, input_B, B_offsets);
|
csl::tensor_ops::gemmBatched<T>(cublasHandle, batch, 0.f, output, C_offsets, 1.f, transA, input_A, A_offsets, transB, input_B, B_offsets);
|
||||||
|
|
||||||
|
// add bias if exists
|
||||||
|
if (!bias_tensor.empty() || inputs.size() >= 3) {
|
||||||
|
csl::TensorView<T> bias;
|
||||||
|
if (bias_tensor.empty()) {
|
||||||
|
auto bias_wrapper = inputs[2].dynamicCast<wrapper_type>();
|
||||||
|
bias = bias_wrapper->getView();
|
||||||
|
} else {
|
||||||
|
bias = csl::TensorView<T>(bias_tensor);
|
||||||
|
}
|
||||||
|
|
||||||
|
kernels::eltwise_sum_2<T>(stream, output, output, bias);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
csl::Stream stream;
|
csl::Stream stream;
|
||||||
csl::cublas::Handle cublasHandle;
|
csl::cublas::Handle cublasHandle;
|
||||||
csl::Tensor<T> input_B_tensor;
|
csl::Tensor<T> input_B_tensor;
|
||||||
|
csl::Tensor<T> bias_tensor;
|
||||||
bool transA, transB;
|
bool transA, transB;
|
||||||
|
|
||||||
std::vector<size_t> A_offsets;
|
std::vector<size_t> A_offsets;
|
||||||
|
@ -26,6 +26,10 @@ using namespace cv::dnn::cuda4dnn;
|
|||||||
namespace cv { namespace dnn {
|
namespace cv { namespace dnn {
|
||||||
|
|
||||||
class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
||||||
|
#ifdef HAVE_OPENCL
|
||||||
|
UMat weight_umat, bias_umat;
|
||||||
|
#endif
|
||||||
|
|
||||||
public:
|
public:
|
||||||
MatMulLayerImpl(const LayerParams& params) {
|
MatMulLayerImpl(const LayerParams& params) {
|
||||||
setParamsFrom(params);
|
setParamsFrom(params);
|
||||||
@ -34,6 +38,8 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
|||||||
trans_b = params.get<bool>("transB", false);
|
trans_b = params.get<bool>("transB", false);
|
||||||
alpha = params.get<float>("alpha", 1.f);
|
alpha = params.get<float>("alpha", 1.f);
|
||||||
beta = params.get<float>("beta", 1.f);
|
beta = params.get<float>("beta", 1.f);
|
||||||
|
|
||||||
|
real_ndims_C = params.get<int>("real_ndims_C", -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual bool supportBackend(int backendId) CV_OVERRIDE {
|
virtual bool supportBackend(int backendId) CV_OVERRIDE {
|
||||||
@ -48,8 +54,9 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
|||||||
const int requiredOutputs,
|
const int requiredOutputs,
|
||||||
std::vector<MatShape> &outputs,
|
std::vector<MatShape> &outputs,
|
||||||
std::vector<MatShape> &internals) const CV_OVERRIDE {
|
std::vector<MatShape> &internals) const CV_OVERRIDE {
|
||||||
CV_CheckGE(inputs.size(), static_cast<size_t>(1), "DNN/MatMul: one varible input at least");
|
int num_inputs = inputs.size() + blobs.size();
|
||||||
CV_CheckLE(inputs.size(), static_cast<size_t>(2), "DNN/MatMul: two variable inputs at most");
|
CV_CheckGE(num_inputs, 2, "DNN/MatMul: two inputs at least");
|
||||||
|
CV_CheckLE(num_inputs, 3, "DNN/MatMul: three inputs at most");
|
||||||
|
|
||||||
const auto shape_A = inputs[0], shape_B = blobs.empty() ? inputs[1] : shape(blobs[0]);
|
const auto shape_A = inputs[0], shape_B = blobs.empty() ? inputs[1] : shape(blobs[0]);
|
||||||
CV_CheckGE(shape_A.size(), static_cast<size_t>(2), "DNN/MatMul: invalid shape of input A");
|
CV_CheckGE(shape_A.size(), static_cast<size_t>(2), "DNN/MatMul: invalid shape of input A");
|
||||||
@ -64,7 +71,7 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
|||||||
int K_B = trans_b ? nB : mB;
|
int K_B = trans_b ? nB : mB;
|
||||||
CV_CheckEQ(K_A, K_B, "DNN/MatMul: invalid dimension K");
|
CV_CheckEQ(K_A, K_B, "DNN/MatMul: invalid dimension K");
|
||||||
|
|
||||||
// Check legal broadcast. It is legal for sure if A and B are 2d, or one of them is 2d.
|
// Check if inputs are broadcastable.
|
||||||
MatShape common_shape;
|
MatShape common_shape;
|
||||||
if (shape_A.size() != 2 || shape_B.size() != 2) {
|
if (shape_A.size() != 2 || shape_B.size() != 2) {
|
||||||
const auto &shape_more_dims = shape_A.size() > shape_B.size() ? shape_A : shape_B;
|
const auto &shape_more_dims = shape_A.size() > shape_B.size() ? shape_A : shape_B;
|
||||||
@ -89,6 +96,24 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
|||||||
common_shape[1] = N;
|
common_shape[1] = N;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if bias is broadcastable
|
||||||
|
if (num_inputs == 3) {
|
||||||
|
const auto shape_C = blobs.empty() ? inputs.back() : shape(blobs.back());
|
||||||
|
if (real_ndims_C == 1) { // (1) or (N)
|
||||||
|
CV_Check(shape_C[0], shape_C[0] == 1 || shape_C[0] == N, "DNN/MatMul: invalid dimension of C");
|
||||||
|
} else if (real_ndims_C >= 2) {
|
||||||
|
const auto &shape_large = common_shape.size() > shape_C.size() ? common_shape : shape_C;
|
||||||
|
const auto &shape_small = common_shape.size() > shape_C.size() ? shape_C : common_shape;
|
||||||
|
size_t diff_dims = shape_large.size() - shape_small.size();
|
||||||
|
for (size_t i = 0; i < shape_small.size(); i++) {
|
||||||
|
const auto dl = shape_small[i], dm = shape_large[i + diff_dims];
|
||||||
|
if (dl != 1 && dm != 1 && dl != dm) {
|
||||||
|
CV_Error(Error::StsBadSize, "DNN/MatMul: invalid shape of C");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
outputs.assign(1, common_shape);
|
outputs.assign(1, common_shape);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -109,6 +134,44 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
|||||||
fastGemmPackB(blobs[0], packed_input_B, trans_b, opt);
|
fastGemmPackB(blobs[0], packed_input_B, trans_b, opt);
|
||||||
helper.updatePackedBOffsets(packed_input_B.size());
|
helper.updatePackedBOffsets(packed_input_B.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// broadcast bias if needed
|
||||||
|
if ((inputs.size() + blobs.size()) >= 3 && blobs.size() >= 2) {
|
||||||
|
const auto bias_mat = blobs.back();
|
||||||
|
const auto bias_shape = shape(bias_mat);
|
||||||
|
bool is_broadcast_needed = real_ndims_C == 0 || real_ndims_C == 1 || (total(bias_shape) != total(C_shape) || bias_shape.size() != C_shape.size());
|
||||||
|
|
||||||
|
if (is_broadcast_needed) {
|
||||||
|
broadcast_bias = Mat(C_shape, CV_32F);
|
||||||
|
auto *broadcast_bias_ptr = broadcast_bias.ptr<float>();
|
||||||
|
|
||||||
|
const auto *bias = bias_mat.ptr<const float>();
|
||||||
|
if (bias_mat.total() == 1) { // [], [1], [1, ...]
|
||||||
|
float b = (*bias) * beta;
|
||||||
|
for (size_t i = 0; i < broadcast_bias.total(); i++) {
|
||||||
|
broadcast_bias_ptr[i] = b;
|
||||||
|
}
|
||||||
|
} else if (real_ndims_C == 1) { // [n]
|
||||||
|
size_t inner_size = C_shape.back(),
|
||||||
|
loops = total(C_shape) / inner_size;
|
||||||
|
for (size_t i = 0; i < loops; i++) {
|
||||||
|
size_t step = i * inner_size;
|
||||||
|
for (size_t j = 0; j < inner_size; j++) {
|
||||||
|
broadcast_bias_ptr[step + j] = beta * bias[j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
broadcast(bias_mat, C_shape, broadcast_bias);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
broadcast_bias = blobs.back();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_OPENCL
|
||||||
|
weight_umat.release();
|
||||||
|
bias_umat.release();
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// works like Y = numpy.matmul(A, B)
|
// works like Y = numpy.matmul(A, B)
|
||||||
@ -134,7 +197,38 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
|||||||
|
|
||||||
const auto *a = A.ptr<const float>();
|
const auto *a = A.ptr<const float>();
|
||||||
auto *y = Y.ptr<float>();
|
auto *y = Y.ptr<float>();
|
||||||
std::memset(y, 0, Y.total() * sizeof(float));
|
// add bias if existed
|
||||||
|
if ((inputs.size() + blobs.size()) >= 3) {
|
||||||
|
const auto &shape_Y = shape(Y);
|
||||||
|
if (blobs.empty()) { // bias from input
|
||||||
|
const auto &bias_mat = inputs.back();
|
||||||
|
const auto *bias = bias_mat.ptr<const float>();
|
||||||
|
if (bias_mat.total() == 1) { // [], [1], [1, ...]
|
||||||
|
float b = (*bias) * beta;
|
||||||
|
for (size_t i = 0; i < Y.total(); i++) {
|
||||||
|
y[i] = b;
|
||||||
|
}
|
||||||
|
} else if (real_ndims_C == 1) { // [n]
|
||||||
|
const size_t inner_size = shape_Y.back(),
|
||||||
|
batches = total(Y) / inner_size;
|
||||||
|
parallel_for_(Range(0, batches), [&] (const Range &r) {
|
||||||
|
for (int i = r.start; i < r.end; i++) {
|
||||||
|
const size_t output_offset = i * inner_size;
|
||||||
|
for (size_t j = 0; j < inner_size; j++) {
|
||||||
|
y[output_offset + j] = beta * bias[j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, double(batches * inner_size * (1 / 1024.0)));
|
||||||
|
} else {
|
||||||
|
broadcast(bias_mat, shape_Y, Y);
|
||||||
|
}
|
||||||
|
} else { // bias from constant
|
||||||
|
const auto *bias = broadcast_bias.ptr<const float>();
|
||||||
|
std::memcpy(y, bias, total(shape_Y) * sizeof(float));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
std::memset(y, 0, Y.total() * sizeof(float));
|
||||||
|
}
|
||||||
|
|
||||||
if (blobs.empty()) {
|
if (blobs.empty()) {
|
||||||
const auto &B = inputs[1];
|
const auto &B = inputs[1];
|
||||||
@ -158,14 +252,36 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
|||||||
inputs_arr.getUMatVector(inputs);
|
inputs_arr.getUMatVector(inputs);
|
||||||
outputs_arr.getUMatVector(outputs);
|
outputs_arr.getUMatVector(outputs);
|
||||||
|
|
||||||
const auto &input_A = inputs[0];
|
// does not support bias as input
|
||||||
UMat input_B;
|
if (inputs.size() >= 3) {
|
||||||
if (blobs.empty()) {
|
return false;
|
||||||
input_B = inputs[1];
|
|
||||||
} else {
|
|
||||||
blobs[0].copyTo(input_B);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const auto &input_A = inputs[0];
|
||||||
auto &output = outputs[0];
|
auto &output = outputs[0];
|
||||||
|
const auto output_shape = shape(output);
|
||||||
|
|
||||||
|
if (blobs.empty()) {
|
||||||
|
weight_umat = inputs[1];
|
||||||
|
if ((inputs.size() + blobs.size() >= 3)) {
|
||||||
|
bias_umat = UMat::zeros(output_shape.size(), output_shape.data(), CV_32F);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (weight_umat.empty()) {
|
||||||
|
blobs.front().copyTo(weight_umat);
|
||||||
|
}
|
||||||
|
if ((inputs.size() + blobs.size() >= 3)) {
|
||||||
|
if (bias_umat.empty()) {
|
||||||
|
broadcast_bias.copyTo(bias_umat);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (bias_umat.empty()) {
|
||||||
|
bias_umat = UMat::zeros(output_shape.size(), output_shape.data(), CV_32F);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto &input_B = weight_umat;
|
||||||
|
|
||||||
int M = static_cast<int>(helper.M),
|
int M = static_cast<int>(helper.M),
|
||||||
N = static_cast<int>(helper.N),
|
N = static_cast<int>(helper.N),
|
||||||
@ -181,7 +297,7 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
|||||||
UMat A, B, C, A_fp32, B_fp32, C_fp32;
|
UMat A, B, C, A_fp32, B_fp32, C_fp32;
|
||||||
for (int i = 0; i < batch; i++) {
|
for (int i = 0; i < batch; i++) {
|
||||||
A = input_A_2d.row(helper.A_rows[i]).reshape(1, trans_a ? K : M);
|
A = input_A_2d.row(helper.A_rows[i]).reshape(1, trans_a ? K : M);
|
||||||
B = input_B_2d.row(helper.B_rows[i]).reshape(1, trans_b ? K : N);
|
B = input_B_2d.row(helper.B_rows[i]).reshape(1, trans_b ? N : K);
|
||||||
C = output_2d.row(helper.C_rows[i]).reshape(1, M);
|
C = output_2d.row(helper.C_rows[i]).reshape(1, M);
|
||||||
|
|
||||||
if (trans_a) {
|
if (trans_a) {
|
||||||
@ -200,7 +316,6 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
|||||||
B_fp32 = B;
|
B_fp32 = B;
|
||||||
C_fp32 = C;
|
C_fp32 = C;
|
||||||
}
|
}
|
||||||
|
|
||||||
cv::gemm(A_fp32, B_fp32, 1.f, noArray(), 0.f, C_fp32);
|
cv::gemm(A_fp32, B_fp32, 1.f, noArray(), 0.f, C_fp32);
|
||||||
if (use_half) {
|
if (use_half) {
|
||||||
A_fp32.convertTo(A, CV_16F);
|
A_fp32.convertTo(A, CV_16F);
|
||||||
@ -208,6 +323,12 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
|||||||
C_fp32.convertTo(C, CV_16F);
|
C_fp32.convertTo(C, CV_16F);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// add bias
|
||||||
|
if (!bias_umat.empty()) {
|
||||||
|
cv::add(output, bias_umat, output);
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
#endif // HAVE_OPENCL
|
#endif // HAVE_OPENCL
|
||||||
@ -216,18 +337,28 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
|||||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
|
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
|
||||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE {
|
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE {
|
||||||
auto& input_A_node = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
auto& input_A_node = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||||
std::shared_ptr<ov::Node> matmul;
|
std::shared_ptr<ov::Node> result;
|
||||||
|
ov::Output<ov::Node> bias;
|
||||||
|
|
||||||
if (nodes.size() == 2) {
|
if (blobs.empty()) {
|
||||||
auto &input_B_node = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
|
auto &input_B_node = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
|
||||||
matmul = std::make_shared<ov::op::v0::MatMul>(input_A_node, input_B_node, trans_a, trans_b);
|
result = std::make_shared<ov::op::v0::MatMul>(input_A_node, input_B_node, trans_a, trans_b);
|
||||||
|
if (nodes.size() >= 3) {
|
||||||
|
bias = nodes[2].dynamicCast<InfEngineNgraphNode>()->node;
|
||||||
|
result = std::make_shared<ov::op::v1::Add>(result, bias);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
auto input_B_shape = getShape<size_t>(blobs[0]);
|
auto input_B_shape = getShape<size_t>(blobs[0]);
|
||||||
auto input_B_node = std::make_shared<ov::op::v0::Constant>(ov::element::f32, input_B_shape, blobs[0].data);
|
auto input_B_node = std::make_shared<ov::op::v0::Constant>(ov::element::f32, input_B_shape, blobs[0].data);
|
||||||
matmul = std::make_shared<ov::op::v0::MatMul>(input_A_node, input_B_node, trans_a, trans_b);
|
result = std::make_shared<ov::op::v0::MatMul>(input_A_node, input_B_node, trans_a, trans_b);
|
||||||
|
if ((nodes.size() + blobs.size()) >= 3) {
|
||||||
|
const auto bias_shape = shape(broadcast_bias);
|
||||||
|
bias = std::make_shared<ov::op::v0::Constant>(ov::element::f32, std::vector<size_t>(bias_shape.begin(), bias_shape.end()), broadcast_bias.data);
|
||||||
|
result = std::make_shared<ov::op::v1::Add>(result, bias);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return Ptr<BackendNode>(new InfEngineNgraphNode(matmul));
|
return Ptr<BackendNode>(new InfEngineNgraphNode(result));
|
||||||
}
|
}
|
||||||
#endif // HAVE_DNN_NGRAPH
|
#endif // HAVE_DNN_NGRAPH
|
||||||
|
|
||||||
@ -239,7 +370,7 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
|||||||
|
|
||||||
const auto input_A_shape = shape(*input_A_wrapper->getMat());
|
const auto input_A_shape = shape(*input_A_wrapper->getMat());
|
||||||
const auto output_shape = shape(*output_wrapper->getMat());
|
const auto output_shape = shape(*output_wrapper->getMat());
|
||||||
if (output_shape.size() != 2) {
|
if ((inputs.size() + blobs.size()) >= 3 || output_shape.size() != 2) {
|
||||||
return Ptr<BackendNode>();
|
return Ptr<BackendNode>();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -259,11 +390,17 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
|||||||
const std::vector<Ptr<BackendWrapper>>& inputs,
|
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||||
const std::vector<Ptr<BackendWrapper>>& outputs) override {
|
const std::vector<Ptr<BackendWrapper>>& outputs) override {
|
||||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||||
auto input_B = blobs.empty() ? Mat() : blobs[0];
|
auto input_B = Mat(), bias = Mat();
|
||||||
|
if (!blobs.empty()) {
|
||||||
|
input_B = blobs.front();
|
||||||
|
if (blobs.size() >= 2) {
|
||||||
|
bias = broadcast_bias;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
CV_CheckFalse(helper.empty(), "DNN/MatMul/CUDA: MatMulHelper is not initialized");
|
CV_CheckFalse(helper.empty(), "DNN/MatMul/CUDA: MatMulHelper is not initialized");
|
||||||
|
|
||||||
return make_cuda_node<cuda4dnn::MatMulBroadcastOp>(preferableTarget, std::move(context->stream), std::move(context->cublas_handle), input_B, trans_a, trans_b, helper.A_offsets, helper.B_offsets, helper.C_offsets, helper.batch);
|
return make_cuda_node<cuda4dnn::MatMulBroadcastOp>(preferableTarget, std::move(context->stream), std::move(context->cublas_handle), input_B, bias, trans_a, trans_b, helper.A_offsets, helper.B_offsets, helper.C_offsets, helper.batch);
|
||||||
}
|
}
|
||||||
#endif // HAVE_CUDA
|
#endif // HAVE_CUDA
|
||||||
|
|
||||||
@ -275,7 +412,7 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
|||||||
auto input_A_desc = input_A_wrapper->getTensorDesc();
|
auto input_A_desc = input_A_wrapper->getTensorDesc();
|
||||||
auto input_A_node = nodes[0].dynamicCast<CannBackendNode>()->getOp();
|
auto input_A_node = nodes[0].dynamicCast<CannBackendNode>()->getOp();
|
||||||
|
|
||||||
auto op = std::make_shared<ge::op::BatchMatMul>(name);
|
auto op = std::make_shared<ge::op::BatchMatMulV2>(name);
|
||||||
|
|
||||||
// set attributes
|
// set attributes
|
||||||
op->set_attr_adj_x1(trans_a);
|
op->set_attr_adj_x1(trans_a);
|
||||||
@ -292,11 +429,31 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
|||||||
auto input_B_node = nodes[1].dynamicCast<CannBackendNode>()->getOp();
|
auto input_B_node = nodes[1].dynamicCast<CannBackendNode>()->getOp();
|
||||||
op->set_input_x2_by_name(*input_B_node, "y");
|
op->set_input_x2_by_name(*input_B_node, "y");
|
||||||
op->update_input_desc_x2(*input_B_desc);
|
op->update_input_desc_x2(*input_B_desc);
|
||||||
|
if (inputs.size() >= 3) {
|
||||||
|
auto input_bias_wrapper = inputs[2].dynamicCast<CannBackendWrapper>();
|
||||||
|
auto input_bias_desc = input_bias_wrapper->getTensorDesc();
|
||||||
|
auto input_bias_node = nodes[2].dynamicCast<CannBackendNode>()->getOp();
|
||||||
|
op->set_input_bias_by_name(*input_bias_node, "y");
|
||||||
|
op->update_input_desc_bias(*input_bias_desc);
|
||||||
|
}
|
||||||
} else { // constant input B
|
} else { // constant input B
|
||||||
auto B = blobs[0];
|
auto B = blobs[0];
|
||||||
auto const_B_node = std::make_shared<CannConstOp>(B.data, B.type(), shape(B), cv::format("%s_B", name.c_str()));
|
auto const_B_node = std::make_shared<CannConstOp>(B.data, B.type(), shape(B), cv::format("%s_B", name.c_str()));
|
||||||
op->set_input_x2_by_name(*(const_B_node->getOp()), "y");
|
op->set_input_x2_by_name(*(const_B_node->getOp()), "y");
|
||||||
op->update_input_desc_x2(*(const_B_node->getTensorDesc()));
|
op->update_input_desc_x2(*(const_B_node->getTensorDesc()));
|
||||||
|
if ((inputs.size() + blobs.size()) >= 3) { // does not support broadcast bias
|
||||||
|
auto bias_mat = blobs.back();
|
||||||
|
auto bias_shape = shape(bias_mat);
|
||||||
|
|
||||||
|
// reshape if 1d
|
||||||
|
if (real_ndims_C == 1 && bias_shape.front() != 1) {
|
||||||
|
bias_shape = std::vector<int>{bias_shape.front()};
|
||||||
|
}
|
||||||
|
|
||||||
|
auto const_bias_node = std::make_shared<CannConstOp>(bias_mat.data, bias_mat.type(), bias_shape, cv::format("%s_bias", name.c_str()));
|
||||||
|
op->set_input_bias_by_name(*(const_bias_node->getOp()), "y");
|
||||||
|
op->update_input_desc_bias(*(const_bias_node->getTensorDesc()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// set outputs
|
// set outputs
|
||||||
@ -312,7 +469,10 @@ class MatMulLayerImpl CV_FINAL : public MatMulLayer {
|
|||||||
float alpha;
|
float alpha;
|
||||||
float beta;
|
float beta;
|
||||||
|
|
||||||
|
int real_ndims_C;
|
||||||
|
|
||||||
std::vector<float> packed_input_B;
|
std::vector<float> packed_input_B;
|
||||||
|
Mat broadcast_bias;
|
||||||
|
|
||||||
FastGemmOpt opt;
|
FastGemmOpt opt;
|
||||||
MatMulHelper helper;
|
MatMulHelper helper;
|
||||||
|
@ -242,6 +242,115 @@ class AdjustSliceAllOptionalInputsSubgraph : public Subgraph {
|
|||||||
size_t num_inputs_;
|
size_t num_inputs_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Fusion for biased MatMul.
|
||||||
|
|
||||||
|
Graph before fusion: [Input] -> MatMul -> Add -> [Output]
|
||||||
|
|
||||||
|
Graph after fusion: [Input] -> MatMul -> [Output]
|
||||||
|
\
|
||||||
|
bias
|
||||||
|
*/
|
||||||
|
|
||||||
|
class BiasedMatmulSubgraph : public Subgraph {
|
||||||
|
public:
|
||||||
|
BiasedMatmulSubgraph() {
|
||||||
|
int input = addNodeToMatch("");
|
||||||
|
matmul_id = addNodeToMatch("MatMul", input, addNodeToMatch(""));
|
||||||
|
add_id = addNodeToMatch("Add", addNodeToMatch(""), matmul_id);
|
||||||
|
|
||||||
|
setFusedNode("MatMul", input);
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool match(const Ptr<ImportGraphWrapper>& net, int nodeId,
|
||||||
|
std::vector<int>& matchedNodesIds) CV_OVERRIDE {
|
||||||
|
if (Subgraph::match(net, nodeId, matchedNodesIds)) {
|
||||||
|
auto onnx_net = net.dynamicCast<ONNXGraphWrapper>();
|
||||||
|
|
||||||
|
// get input weight from MatMul
|
||||||
|
{
|
||||||
|
// make sure that input A is not Constant
|
||||||
|
if (onnx_net->getInputInitializerId(matchedNodesIds[matmul_id], 0) >= 0) {
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
const Ptr<ImportNodeWrapper> node = net->getNode(matchedNodesIds[matmul_id]);
|
||||||
|
|
||||||
|
int constant_id = Subgraph::getInputNodeId(net, node, 0);
|
||||||
|
auto constant_node = net->getNode(constant_id);
|
||||||
|
if (constant_node->getType() == "Constant") {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_weight_const = false;
|
||||||
|
int initializer_id = onnx_net->getInputInitializerId(matchedNodesIds[matmul_id], 1);
|
||||||
|
if (initializer_id != -1) { // Initializer
|
||||||
|
weight_name = onnx_net->getNameOfInitializer(initializer_id);
|
||||||
|
is_weight_const = true;
|
||||||
|
} else { // Constant layer
|
||||||
|
const Ptr<ImportNodeWrapper> node = net->getNode(matchedNodesIds[matmul_id]);
|
||||||
|
|
||||||
|
int constant_id = Subgraph::getInputNodeId(net, node, 1);
|
||||||
|
auto constant_node = net->getNode(constant_id);
|
||||||
|
if (constant_node->getType() == "Constant") {
|
||||||
|
weight_name = node->getInputName(1);
|
||||||
|
is_weight_const = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!is_weight_const) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// get input bias from Add
|
||||||
|
{
|
||||||
|
bool is_bias_const = false;
|
||||||
|
int initializer_id = std::max(onnx_net->getInputInitializerId(matchedNodesIds[add_id], 0),
|
||||||
|
onnx_net->getInputInitializerId(matchedNodesIds[add_id], 1));
|
||||||
|
if (initializer_id != -1) {
|
||||||
|
bias_name = onnx_net->getNameOfInitializer(initializer_id);
|
||||||
|
is_bias_const = true;
|
||||||
|
} else { // Constant layer
|
||||||
|
const Ptr<ImportNodeWrapper> node = net->getNode(matchedNodesIds[add_id]);
|
||||||
|
|
||||||
|
int constant_id = Subgraph::getInputNodeId(net, node, 0);
|
||||||
|
auto constant_node = net->getNode(constant_id);
|
||||||
|
if (constant_node->getType() == "Constant") {
|
||||||
|
bias_name = node->getInputName(0);
|
||||||
|
is_bias_const = true;
|
||||||
|
} else {
|
||||||
|
constant_id = Subgraph::getInputNodeId(net, node, 1);
|
||||||
|
constant_node = net->getNode(constant_id);
|
||||||
|
if (constant_node->getType() == "Constant") {
|
||||||
|
bias_name = node->getInputName(1);
|
||||||
|
is_bias_const = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!is_bias_const) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void finalize(const Ptr<ImportGraphWrapper>& net,
|
||||||
|
const Ptr<ImportNodeWrapper>& fusedNode,
|
||||||
|
std::vector<Ptr<ImportNodeWrapper> >&) CV_OVERRIDE {
|
||||||
|
opencv_onnx::NodeProto* node = fusedNode.dynamicCast<ONNXNodeWrapper>()->node;
|
||||||
|
// add inputs
|
||||||
|
node->add_input(weight_name);
|
||||||
|
node->add_input(bias_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
int matmul_id, add_id;
|
||||||
|
std::string weight_name, bias_name;
|
||||||
|
};
|
||||||
|
|
||||||
/* The fusion for the multi-head attention from vision transformer.
|
/* The fusion for the multi-head attention from vision transformer.
|
||||||
|
|
||||||
Abbreviations:
|
Abbreviations:
|
||||||
@ -322,22 +431,21 @@ class AttentionSubGraph : public Subgraph {
|
|||||||
AttentionSubGraph() {
|
AttentionSubGraph() {
|
||||||
int input = addNodeToMatch("");
|
int input = addNodeToMatch("");
|
||||||
int transpose = addNodeToMatch("Transpose", input); // tranpose does not make any differences to the accuracy here in this subgraph
|
int transpose = addNodeToMatch("Transpose", input); // tranpose does not make any differences to the accuracy here in this subgraph
|
||||||
att_matmul = addNodeToMatch("MatMul", transpose, addNodeToMatch(""));
|
att_matmul = addNodeToMatch("MatMul", transpose, addNodeToMatch(""), addNodeToMatch("")); // add is fused into matmul via BiasedMatMulSubgraph
|
||||||
att_add = addNodeToMatch("Add", addNodeToMatch(""), att_matmul);
|
|
||||||
|
|
||||||
// v_path
|
// v_path
|
||||||
slice_v = addNodeToMatch("Slice", std::vector<int>{att_add, addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch("")});
|
slice_v = addNodeToMatch("Slice", std::vector<int>{att_matmul, addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch("")});
|
||||||
int reshape_v = addNodeToMatch("Reshape", slice_v, addNodeToMatch(""));
|
int reshape_v = addNodeToMatch("Reshape", slice_v, addNodeToMatch(""));
|
||||||
int transpose_v = addNodeToMatch("Transpose", reshape_v);
|
int transpose_v = addNodeToMatch("Transpose", reshape_v);
|
||||||
|
|
||||||
// q_path
|
// q_path
|
||||||
slice_q = addNodeToMatch("Slice", std::vector<int>{att_add, addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch("")});
|
slice_q = addNodeToMatch("Slice", std::vector<int>{att_matmul, addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch("")});
|
||||||
reshape_q = addNodeToMatch("Reshape", slice_q, addNodeToMatch(""));
|
reshape_q = addNodeToMatch("Reshape", slice_q, addNodeToMatch(""));
|
||||||
int transpose_q = addNodeToMatch("Transpose", reshape_q);
|
int transpose_q = addNodeToMatch("Transpose", reshape_q);
|
||||||
div_q = addNodeToMatch("Div", transpose_q, addNodeToMatch(""));
|
div_q = addNodeToMatch("Div", transpose_q, addNodeToMatch(""));
|
||||||
|
|
||||||
// k_path
|
// k_path
|
||||||
slice_k = addNodeToMatch("Slice", std::vector<int>{att_add, addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch("")});
|
slice_k = addNodeToMatch("Slice", std::vector<int>{att_matmul, addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch("")});
|
||||||
int reshape_k = addNodeToMatch("Reshape", slice_k, addNodeToMatch(""));
|
int reshape_k = addNodeToMatch("Reshape", slice_k, addNodeToMatch(""));
|
||||||
int transpose_k = addNodeToMatch("Transpose", reshape_k);
|
int transpose_k = addNodeToMatch("Transpose", reshape_k);
|
||||||
|
|
||||||
@ -380,7 +488,7 @@ class AttentionSubGraph : public Subgraph {
|
|||||||
|
|
||||||
// get names
|
// get names
|
||||||
weight_name = getInputName(net, matchedNodesIds[att_matmul], 1);
|
weight_name = getInputName(net, matchedNodesIds[att_matmul], 1);
|
||||||
bias_name = getInputName(net, matchedNodesIds[att_add], 0);
|
bias_name = getInputName(net, matchedNodesIds[att_matmul], 2);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
@ -414,7 +522,7 @@ class AttentionSubGraph : public Subgraph {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int att_matmul, att_add;
|
int att_matmul;
|
||||||
int slice_q, slice_k, slice_v;
|
int slice_q, slice_k, slice_v;
|
||||||
int reshape_q, div_q, last_reshape;
|
int reshape_q, div_q, last_reshape;
|
||||||
|
|
||||||
@ -436,20 +544,19 @@ class AttentionSingleHeadSubGraph : public Subgraph {
|
|||||||
AttentionSingleHeadSubGraph() {
|
AttentionSingleHeadSubGraph() {
|
||||||
int input = addNodeToMatch("");
|
int input = addNodeToMatch("");
|
||||||
int transpose = addNodeToMatch("Transpose", input); // tranpose does not make any differences to the accuracy here in this subgraph
|
int transpose = addNodeToMatch("Transpose", input); // tranpose does not make any differences to the accuracy here in this subgraph
|
||||||
att_matmul = addNodeToMatch("MatMul", transpose, addNodeToMatch(""));
|
att_matmul = addNodeToMatch("MatMul", transpose, addNodeToMatch(""), addNodeToMatch("")); // add is fused into matmul via BiasedMatMulSubgraph
|
||||||
att_add = addNodeToMatch("Add", addNodeToMatch(""), att_matmul);
|
|
||||||
|
|
||||||
// v_path
|
// v_path
|
||||||
slice_v = addNodeToMatch("Slice", std::vector<int>{att_add, addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch("")});
|
slice_v = addNodeToMatch("Slice", std::vector<int>{att_matmul, addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch("")});
|
||||||
int transpose_v = addNodeToMatch("Transpose", slice_v);
|
int transpose_v = addNodeToMatch("Transpose", slice_v);
|
||||||
|
|
||||||
// q_path
|
// q_path
|
||||||
slice_q = addNodeToMatch("Slice", std::vector<int>{att_add, addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch("")});
|
slice_q = addNodeToMatch("Slice", std::vector<int>{att_matmul, addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch("")});
|
||||||
int transpose_q = addNodeToMatch("Transpose", slice_q);
|
int transpose_q = addNodeToMatch("Transpose", slice_q);
|
||||||
div_q = addNodeToMatch("Div", transpose_q, addNodeToMatch(""));
|
div_q = addNodeToMatch("Div", transpose_q, addNodeToMatch(""));
|
||||||
|
|
||||||
// k_path
|
// k_path
|
||||||
slice_k = addNodeToMatch("Slice", std::vector<int>{att_add, addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch("")});
|
slice_k = addNodeToMatch("Slice", std::vector<int>{att_matmul, addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch(""), addNodeToMatch("")});
|
||||||
int transpose_k = addNodeToMatch("Transpose", slice_k);
|
int transpose_k = addNodeToMatch("Transpose", slice_k);
|
||||||
|
|
||||||
// qk
|
// qk
|
||||||
@ -491,7 +598,7 @@ class AttentionSingleHeadSubGraph : public Subgraph {
|
|||||||
|
|
||||||
// get names
|
// get names
|
||||||
weight_name = getInputName(net, matchedNodesIds[att_matmul], 1);
|
weight_name = getInputName(net, matchedNodesIds[att_matmul], 1);
|
||||||
bias_name = getInputName(net, matchedNodesIds[att_add], 0);
|
bias_name = getInputName(net, matchedNodesIds[att_matmul], 2);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
@ -525,7 +632,7 @@ class AttentionSingleHeadSubGraph : public Subgraph {
|
|||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
int att_matmul, att_add;
|
int att_matmul;
|
||||||
int slice_q, slice_k, slice_v;
|
int slice_q, slice_k, slice_v;
|
||||||
int div_q, last_reshape;
|
int div_q, last_reshape;
|
||||||
|
|
||||||
@ -1558,6 +1665,7 @@ public:
|
|||||||
void simplifySubgraphs(opencv_onnx::GraphProto& net)
|
void simplifySubgraphs(opencv_onnx::GraphProto& net)
|
||||||
{
|
{
|
||||||
std::vector<Ptr<Subgraph> > subgraphs;
|
std::vector<Ptr<Subgraph> > subgraphs;
|
||||||
|
subgraphs.push_back(makePtr<BiasedMatmulSubgraph>());
|
||||||
subgraphs.push_back(makePtr<AdjustSliceAllOptionalInputsSubgraph>(3));
|
subgraphs.push_back(makePtr<AdjustSliceAllOptionalInputsSubgraph>(3));
|
||||||
subgraphs.push_back(makePtr<AdjustSliceAllOptionalInputsSubgraph>(4));
|
subgraphs.push_back(makePtr<AdjustSliceAllOptionalInputsSubgraph>(4));
|
||||||
subgraphs.push_back(makePtr<GeluSubGraph>());
|
subgraphs.push_back(makePtr<GeluSubGraph>());
|
||||||
|
@ -1961,7 +1961,8 @@ void ONNXImporter::parseGemm(LayerParams& layerParams, const opencv_onnx::NodePr
|
|||||||
|
|
||||||
void ONNXImporter::parseMatMul(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) {
|
void ONNXImporter::parseMatMul(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) {
|
||||||
auto node_proto = node_proto_;
|
auto node_proto = node_proto_;
|
||||||
CV_CheckEQ(node_proto.input_size(), 2, "ONNXImporter/MatMul: two inputs required");
|
CV_CheckGE(node_proto.input_size(), 2, "ONNXImporter/MatMul: two inputs required at least");
|
||||||
|
CV_CheckLE(node_proto.input_size(), 3, "ONNXImporter/MatMul: three inputs required at most");
|
||||||
|
|
||||||
for (int i = 0; i < node_proto.input_size(); i++) {
|
for (int i = 0; i < node_proto.input_size(); i++) {
|
||||||
if (constBlobs.find(node_proto.input(i)) == constBlobs.end()) {
|
if (constBlobs.find(node_proto.input(i)) == constBlobs.end()) {
|
||||||
@ -1970,9 +1971,7 @@ void ONNXImporter::parseMatMul(LayerParams& layerParams, const opencv_onnx::Node
|
|||||||
|
|
||||||
Mat blob = getBlob(node_proto, i);
|
Mat blob = getBlob(node_proto, i);
|
||||||
|
|
||||||
if (i == 1) {
|
if (i == 0) {
|
||||||
layerParams.blobs.push_back(blob);
|
|
||||||
} else {
|
|
||||||
LayerParams const_params;
|
LayerParams const_params;
|
||||||
const_params.name = node_proto.input(i);
|
const_params.name = node_proto.input(i);
|
||||||
const_params.type = "Const";
|
const_params.type = "Const";
|
||||||
@ -1983,6 +1982,12 @@ void ONNXImporter::parseMatMul(LayerParams& layerParams, const opencv_onnx::Node
|
|||||||
addLayer(const_params, const_node_proto);
|
addLayer(const_params, const_node_proto);
|
||||||
|
|
||||||
node_proto.set_input(i, const_params.name);
|
node_proto.set_input(i, const_params.name);
|
||||||
|
} else {
|
||||||
|
layerParams.blobs.push_back(blob);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i == 2 && constBlobsExtraInfo.find(node_proto.input(2)) != constBlobsExtraInfo.end()) {
|
||||||
|
layerParams.set("real_ndims_C", getBlobExtraInfo(node_proto, 2).real_ndims);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -143,4 +143,11 @@ TEST_F(Test_Graph_Simplifier, AttentionSubgraph) {
|
|||||||
test("attention_single_head", "Attention");
|
test("attention_single_head", "Attention");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(Test_Graph_Simplifier, BiasedMatMulSubgraph) {
|
||||||
|
/* Test for 1 subgraphs
|
||||||
|
- BiasedMatMulSubgraph
|
||||||
|
*/
|
||||||
|
test("biased_matmul", "MatMul");
|
||||||
|
}
|
||||||
|
|
||||||
}}
|
}}
|
||||||
|
@ -3090,6 +3090,12 @@ TEST_P(Test_ONNX_layers, LayerNormNoFusion) {
|
|||||||
testONNXModels("layer_norm_no_fusion");
|
testONNXModels("layer_norm_no_fusion");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_P(Test_ONNX_layers, MatMulAddFusion) {
|
||||||
|
double l1 = (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) ? 0.0018 : default_l1;
|
||||||
|
double lInf = (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) ? 0.011 : default_lInf;
|
||||||
|
testONNXModels("biased_matmul", npy, l1, lInf);
|
||||||
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(/**/, Test_ONNX_nets, dnnBackendsAndTargets());
|
INSTANTIATE_TEST_CASE_P(/**/, Test_ONNX_nets, dnnBackendsAndTargets());
|
||||||
|
|
||||||
}} // namespace
|
}} // namespace
|
||||||
|
Loading…
Reference in New Issue
Block a user