dnn: Allow LSTM layer to operate in reverse direction

This is useful for bidirectional LSTMs.
2025-07-31 09:57:28 +08:00 · 2019-09-25 14:12:43 +01:00 · 2019-09-25 14:12:43 +01:00 · b88435fdc2
commit b88435fdc2
parent 3289a0aff9
2 changed files with 63 additions and 1 deletions
--- a/modules/dnn/src/layers/recurrent_layers.cpp
+++ b/modules/dnn/src/layers/recurrent_layers.cpp
@ -92,6 +92,7 @@ class LSTMLayerImpl CV_FINAL : public LSTMLayer
    bool produceCellOutput;
    float forgetBias, cellClip;
    bool useCellClip, usePeephole;
+    bool reverse;   // If true, go in negative direction along the time axis

 public:

@ -133,6 +134,7 @@ public:
        cellClip = params.get<float>("cell_clip", 0.0f);
        useCellClip = params.get<bool>("use_cell_clip", false);
        usePeephole = params.get<bool>("use_peephole", false);
+        reverse = params.get<bool>("reverse", false);

        allocated = false;
        outTailShape.clear();
@ -288,7 +290,18 @@ public:
        Mat hOutTs = output[0].reshape(1, numSamplesTotal);
        Mat cOutTs = produceCellOutput ? output[1].reshape(1, numSamplesTotal) : Mat();

-        for (int ts = 0; ts < numTimeStamps; ts++)
+        int tsStart, tsEnd, tsInc;
+        if (reverse) {
+            tsStart = numTimeStamps - 1;
+            tsEnd = -1;
+            tsInc = -1;
+        }
+        else {
+            tsStart = 0;
+            tsEnd = numTimeStamps;
+            tsInc = 1;
+        }
+        for (int ts = tsStart; ts != tsEnd; ts += tsInc)
        {
            Range curRowRange(ts*numSamples, (ts + 1)*numSamples);
            Mat xCurr = xTs.rowRange(curRowRange);
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@ -489,6 +489,55 @@ TEST(Layer_RNN_Test_Accuracy_with_, CaffeRecurrent)
    normAssert(h_ref, output[0]);
 }

+TEST(Layer_LSTM_Test_Accuracy_, Reverse)
+{
+    // This handcrafted setup calculates (approximately) the prefix sum of the
+    // input, assuming the inputs are suitably small.
+    cv::Mat input(2, 1, CV_32FC1);
+    input.at<float>(0, 0) = 1e-5f;
+    input.at<float>(1, 0) = 2e-5f;
+
+    cv::Mat Wx(4, 1, CV_32FC1);
+    Wx.at<float>(0, 0) = 0.f;  // Input gate
+    Wx.at<float>(1, 0) = 0.f;  // Forget gate
+    Wx.at<float>(2, 0) = 0.f;  // Output gate
+    Wx.at<float>(3, 0) = 1.f;  // Update signal
+
+    cv::Mat Wh(4, 1, CV_32FC1);
+    Wh.at<float>(0, 0) = 0.f;  // Input gate
+    Wh.at<float>(1, 0) = 0.f;  // Forget gate
+    Wh.at<float>(2, 0) = 0.f;  // Output gate
+    Wh.at<float>(3, 0) = 0.f;  // Update signal
+
+    cv::Mat bias(4, 1, CV_32FC1);
+    bias.at<float>(0, 0) = 1e10f;  // Input gate - always allows input to c
+    bias.at<float>(1, 0) = 1e10f;  // Forget gate - never forget anything on c
+    bias.at<float>(2, 0) = 1e10f;  // Output gate - always output everything
+    bias.at<float>(3, 0) = 0.f;  // Update signal
+
+    LayerParams lp;
+    lp.set("reverse", true);
+    lp.set("use_timestamp_dim", true);
+    lp.blobs.clear();
+    lp.blobs.push_back(Wh);
+    lp.blobs.push_back(Wx);
+    lp.blobs.push_back(bias);
+
+    cv::Ptr<cv::dnn::LSTMLayer> layer = LSTMLayer::create(lp);
+    std::vector<cv::Mat> outputs;
+    std::vector<cv::Mat> inputs;
+    inputs.push_back(input);
+    runLayer(layer, inputs, outputs);
+
+    ASSERT_EQ(1, outputs.size());
+    cv::Mat out = outputs[0];
+    ASSERT_EQ(3, out.dims);
+    ASSERT_EQ(shape(2, 1, 1), shape(out));
+    float* data = reinterpret_cast<float*>(out.data);
+    EXPECT_NEAR(std::tanh(1e-5f) + std::tanh(2e-5f), data[0], 1e-10);
+    EXPECT_NEAR(std::tanh(2e-5f), data[1], 1e-10);
+}
+

 class Layer_RNN_Test : public ::testing::Test
 {