mirror of
https://github.com/opencv/opencv.git
synced 2025-07-24 14:06:27 +08:00
Merge pull request #23614 from Abdurrahheem:lstm_layout_attribute
LSTM ONNX Layout Attribute Support #23614 ### Explanation This PR contains necessary changes to support `layout` attribute. This attributes is present in [ONNX](https://github.com/onnx/onnx/blob/main/docs/Operators.md#lstm) and [Torch](https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html#lstm) (in touch it is name as `batch_first=True`) libraries. When `layout = 1` input to LSTM layer is expected to have batch dimension first -> `[batch_size, sequence_length, features]` vs `layout = 0` - default `[sequence_length, batch_size, features]` ### Test Data Test data and data generator for PR located here [#1063](https://github.com/opencv/opencv_extra/pull/1063) ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
d2618bfe11
commit
d2143bcd44
@ -113,12 +113,19 @@ class LSTMLayerImpl CV_FINAL : public LSTMLayer
|
||||
MatShape outTailShape; //shape of single output sample
|
||||
MatShape outTsShape; //shape of N output samples
|
||||
|
||||
enum layout_t : int {
|
||||
SEQ_BATCH_HID = 0,
|
||||
BATCH_SEQ_HID = 1
|
||||
};
|
||||
|
||||
bool useTimestampDim;
|
||||
bool produceCellOutput;
|
||||
float forgetBias, cellClip;
|
||||
bool useCellClip, usePeephole;
|
||||
bool reverse; // If true, go in negative direction along the time axis
|
||||
bool bidirectional; // If true, produces both forward and reversed directions along time axis
|
||||
layout_t layout; // If layout == BATCH_SEQ_HID, uses batch_size x seq_length x num_hidden for input and output
|
||||
// else uses seq_length x batch_size x num_hidden
|
||||
|
||||
ActivationFunction f_activation;
|
||||
ActivationFunction g_activation;
|
||||
@ -198,6 +205,7 @@ public:
|
||||
}
|
||||
}
|
||||
}
|
||||
layout = (layout_t) params.get<int>("layout", SEQ_BATCH_HID);
|
||||
useTimestampDim = params.get<bool>("use_timestamp_dim", true);
|
||||
produceCellOutput = params.get<bool>("produce_cell_output", false);
|
||||
forgetBias = params.get<float>("forget_bias", 0.0f);
|
||||
@ -291,8 +299,13 @@ public:
|
||||
if (useTimestampDim)
|
||||
{
|
||||
CV_Assert(inp0.size() >= 2 && total(inp0, 2) == _numInp);
|
||||
_numSamples = inp0[1];
|
||||
outResShape.push_back(inp0[0]);
|
||||
if (layout == SEQ_BATCH_HID) {
|
||||
_numSamples = inp0[1];
|
||||
outResShape.push_back(inp0[0]);
|
||||
} else {
|
||||
_numSamples = inp0[0];
|
||||
outResShape.push_back(inp0[1]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -349,8 +362,13 @@ public:
|
||||
if (useTimestampDim)
|
||||
{
|
||||
CV_Assert(inp0.dims >= 2 && (int)inp0.total(2) == numInp);
|
||||
numTimeStamps = inp0.size[0];
|
||||
numSamples = inp0.size[1];
|
||||
if (layout == SEQ_BATCH_HID){
|
||||
numTimeStamps = inp0.size[0];
|
||||
numSamples = inp0.size[1];
|
||||
}else{
|
||||
numTimeStamps = inp0.size[1];
|
||||
numSamples = inp0.size[0];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -383,6 +401,21 @@ public:
|
||||
outputs_arr.getMatVector(output);
|
||||
internals_arr.getMatVector(internals);
|
||||
|
||||
if (layout == BATCH_SEQ_HID){
|
||||
//swap axis 0 and 1 input x
|
||||
cv::Mat tmp;
|
||||
// Since python input is 4 dimentional and C++ input 3 dimentinal
|
||||
// we need to proccess each differently
|
||||
if (input[0].dims == 4){
|
||||
// here !!!
|
||||
CV_Assert(input[0].size[3] == 1);
|
||||
cv::transposeND(input[0], {1, 0, 2, 3}, tmp); //back to seq_len, batch_size, hidden_size format
|
||||
}else{
|
||||
cv::transposeND(input[0], {1, 0, 2}, tmp); //back to seq_len, batch_size, hidden_size format
|
||||
}
|
||||
input[0] = tmp;
|
||||
}
|
||||
|
||||
Mat cOut = produceCellOutput ? output[0].clone() : Mat();
|
||||
const bool needYcTransform = !originalBlobs.empty(); // if the producer is onnx
|
||||
const int numDirs = 1 + static_cast<int>(bidirectional);
|
||||
@ -599,7 +632,12 @@ public:
|
||||
cInternal.copyTo(cOutTs.rowRange(curRowRange));
|
||||
}
|
||||
}
|
||||
|
||||
// transpose to match batch first output
|
||||
if (layout == BATCH_SEQ_HID){
|
||||
cv::Mat tmp;
|
||||
cv::transposeND(output[0], {1, 0, 2}, tmp);
|
||||
output[0] = tmp;
|
||||
}
|
||||
if (needYcTransform && produceCellOutput)
|
||||
{
|
||||
fixCellState(cOut, numDirs);
|
||||
@ -618,7 +656,13 @@ public:
|
||||
|
||||
// permute to {0, 2, 1, 3};
|
||||
cv::Mat newCellState;
|
||||
cv::transposeND(cOut, {0, 2, 1, 3}, newCellState);
|
||||
// transpose to match batch first output
|
||||
if (layout == BATCH_SEQ_HID){
|
||||
cv::transposeND(cOut, {2, 0, 1, 3}, newCellState);
|
||||
}
|
||||
else{
|
||||
cv::transposeND(cOut, {0, 2, 1, 3}, newCellState);
|
||||
}
|
||||
cOut = newCellState;
|
||||
|
||||
if (numDirs == 1)
|
||||
|
@ -1637,8 +1637,16 @@ void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodePr
|
||||
CV_Assert(shapeIt != outShapes.end());
|
||||
const MatShape x_shape = shapeIt->second;
|
||||
|
||||
const int seq_length = x_shape[0];
|
||||
const int batch_size = x_shape[1];
|
||||
//if layout is 1, change batch and sequence dims
|
||||
const int layout = layerParams.get<int>("layout", 0);
|
||||
int batch_size, seq_length;
|
||||
if (layout == 1){
|
||||
batch_size = x_shape[0];
|
||||
seq_length = x_shape[1];
|
||||
}else{
|
||||
seq_length = x_shape[0];
|
||||
batch_size = x_shape[1];
|
||||
}
|
||||
const int input_size = x_shape[2];
|
||||
const int hidden_size = layerParams.get<int>("hidden_size");
|
||||
const int num_directions = constBlobs[lstm_proto.input(1)].size[0];
|
||||
|
@ -1393,6 +1393,20 @@ TEST_P(Test_ONNX_layers, LSTM_init_h0_c0)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA);
|
||||
testONNXModels("lstm_init_h0_c0", npy, 0, 0, false, false, 3);
|
||||
}
|
||||
// epsilon is larger because onnx does not match with torch/opencv exactly
|
||||
TEST_P(Test_ONNX_layers, LSTM_layout_seq)
|
||||
{
|
||||
if(backend == DNN_BACKEND_CUDA)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA);
|
||||
testONNXModels("lstm_layout_0", npy, 0.005, 0.005, false, false, 3);
|
||||
}
|
||||
// epsilon is larger because onnx does not match with torch/opencv exactly
|
||||
TEST_P(Test_ONNX_layers, LSTM_layout_batch)
|
||||
{
|
||||
if(backend == DNN_BACKEND_CUDA)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA);
|
||||
testONNXModels("lstm_layout_1", npy, 0.005, 0.005, false, false, 3);
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, Pad2d_Unfused)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user