dnn: Allow LSTM layer to operate in reverse direction

This is useful for bidirectional LSTMs.
2025-07-25 22:57:53 +08:00 · 2019-09-25 14:12:43 +01:00 · 2019-09-25 14:12:43 +01:00 · b88435fdc2
commit b88435fdc2
parent 3289a0aff9
2 changed files with 63 additions and 1 deletions
--- a/modules/dnn/src/layers/recurrent_layers.cpp
+++ b/modules/dnn/src/layers/recurrent_layers.cpp
@ -92,6 +92,7 @@ class LSTMLayerImpl CV_FINAL : public LSTMLayer
    bool produceCellOutput;
    float forgetBias, cellClip;
    bool useCellClip, usePeephole;
    bool reverse;   // If true, go in negative direction along the time axis
 public:
@ -133,6 +134,7 @@ public:
        cellClip = params.get<float>("cell_clip", 0.0f);
        useCellClip = params.get<bool>("use_cell_clip", false);
        usePeephole = params.get<bool>("use_peephole", false);
        reverse = params.get<bool>("reverse", false);
        allocated = false;
        outTailShape.clear();
@ -288,7 +290,18 @@ public:
        Mat hOutTs = output[0].reshape(1, numSamplesTotal);
        Mat cOutTs = produceCellOutput ? output[1].reshape(1, numSamplesTotal) : Mat();
-        for (int ts = 0; ts < numTimeStamps; ts++)
+        int tsStart, tsEnd, tsInc;
        if (reverse) {
            tsStart = numTimeStamps - 1;
            tsEnd = -1;
            tsInc = -1;
        }
        else {
            tsStart = 0;
            tsEnd = numTimeStamps;
            tsInc = 1;
        }
        for (int ts = tsStart; ts != tsEnd; ts += tsInc)
        {
            Range curRowRange(ts*numSamples, (ts + 1)*numSamples);
            Mat xCurr = xTs.rowRange(curRowRange);
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@ -489,6 +489,55 @@ TEST(Layer_RNN_Test_Accuracy_with_, CaffeRecurrent)
    normAssert(h_ref, output[0]);
 }
 TEST(Layer_LSTM_Test_Accuracy_, Reverse)
 {
    // This handcrafted setup calculates (approximately) the prefix sum of the
    // input, assuming the inputs are suitably small.
    cv::Mat input(2, 1, CV_32FC1);
    input.at<float>(0, 0) = 1e-5f;
    input.at<float>(1, 0) = 2e-5f;
    cv::Mat Wx(4, 1, CV_32FC1);
    Wx.at<float>(0, 0) = 0.f;  // Input gate
    Wx.at<float>(1, 0) = 0.f;  // Forget gate
    Wx.at<float>(2, 0) = 0.f;  // Output gate
    Wx.at<float>(3, 0) = 1.f;  // Update signal
    cv::Mat Wh(4, 1, CV_32FC1);
    Wh.at<float>(0, 0) = 0.f;  // Input gate
    Wh.at<float>(1, 0) = 0.f;  // Forget gate
    Wh.at<float>(2, 0) = 0.f;  // Output gate
    Wh.at<float>(3, 0) = 0.f;  // Update signal
    cv::Mat bias(4, 1, CV_32FC1);
    bias.at<float>(0, 0) = 1e10f;  // Input gate - always allows input to c
    bias.at<float>(1, 0) = 1e10f;  // Forget gate - never forget anything on c
    bias.at<float>(2, 0) = 1e10f;  // Output gate - always output everything
    bias.at<float>(3, 0) = 0.f;  // Update signal
    LayerParams lp;
    lp.set("reverse", true);
    lp.set("use_timestamp_dim", true);
    lp.blobs.clear();
    lp.blobs.push_back(Wh);
    lp.blobs.push_back(Wx);
    lp.blobs.push_back(bias);
    cv::Ptr<cv::dnn::LSTMLayer> layer = LSTMLayer::create(lp);
    std::vector<cv::Mat> outputs;
    std::vector<cv::Mat> inputs;
    inputs.push_back(input);
    runLayer(layer, inputs, outputs);
    ASSERT_EQ(1, outputs.size());
    cv::Mat out = outputs[0];
    ASSERT_EQ(3, out.dims);
    ASSERT_EQ(shape(2, 1, 1), shape(out));
    float* data = reinterpret_cast<float*>(out.data);
    EXPECT_NEAR(std::tanh(1e-5f) + std::tanh(2e-5f), data[0], 1e-10);
    EXPECT_NEAR(std::tanh(2e-5f), data[1], 1e-10);
 }
 class Layer_RNN_Test : public ::testing::Test
 {