From 33c9d57c6f04ae8d07876039d7da787e5fb048d0 Mon Sep 17 00:00:00 2001
From: Wu Zhiwen <zhiwen.wu@intel.com>
Date: Tue, 6 Nov 2018 20:24:00 +0800
Subject: [PATCH] dnn/Vulkan: skip heavy convolution task

This is a workaround for GPU hang on heavy convolution workload (> 10 GFLOPS).
e.g. ResNet101_DUC_HDC

For the long time task, vkWaitForFences() return without error but next call on
vkQueueSubmit() return -4, i.e. "VK_ERROR_DEVICE_LOST" and driver reports GPU hang.

Need more investigation on root cause of GPU hang and need to optimize convolution shader
to reduce process time.
---
 modules/dnn/src/dnn.cpp | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp
index cda864980f..d0dc9dfb28 100644
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@@ -1411,6 +1411,32 @@ struct Net::Impl
                 continue;
             }
 
+            if (ld.type == "Convolution")
+            {
+                std::vector<MatShape> in_shapes;
+                std::vector<MatShape> out_shapes;
+                CV_Assert(ld.inputBlobs.size() == ld.outputBlobs.size());
+
+                for (int i = 0; i < ld.inputBlobs.size(); i++)
+                {
+                    in_shapes.push_back(shape(*ld.inputBlobs[i]));
+                    out_shapes.push_back(shape(ld.outputBlobs[i]));
+                }
+                int64 flops = layer->getFLOPS(in_shapes, out_shapes);
+                // FIXME
+                //
+                // This is a workaround for GPU hang on heavy convolution workload ( > 10 GFLOPS).
+                // For the long time task, vkWaitForFences() return without error but next call on
+                // vkQueueSubmit() return -4, i.e. "VK_ERROR_DEVICE_LOST" and driver reports GPU hang.
+                //
+                // Need more investigation on root cause of GPU hang and need to optimize convolution shader
+                // to reduce process time.
+                if (flops > CV_BIG_INT(10) * 1000 * 1000 * 1000)
+                {
+                    continue;
+                }
+            }
+
             ld.skip = false;
 
             try