Merge pull request #10097 from wzw-intel:tuning_time

2025-08-06 14:36:36 +08:00 · 2017-11-16 09:34:42 +00:00 · 2017-11-16 09:34:42 +00:00 · 55dabd2858
commit 55dabd2858
parent 1fbdca83f5 88e6daa315
2 changed files with 11 additions and 13 deletions
--- a/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
+++ b/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
@ -217,8 +217,7 @@ class OCL4DNNConvSpatial
        bool convolve(const UMat &bottom, UMat &top,
                      const UMat &weight, const UMat &bias,
                      int32_t numImages,
-                      kernelConfig* config,
-                      const cv::ocl::Queue& queue);
+                      kernelConfig* config);
        float timedConvolve(const UMat &bottom, UMat &top,
                            const UMat &weight, const UMat &bias,
                            int32_t numImages, kernelConfig* config);
--- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
+++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
@ -381,7 +381,7 @@ bool OCL4DNNConvSpatial<Dtype>::Forward(const UMat& bottom,
    prepareKernel(bottom, top, weight, bias, numImages);
    if (bestKernelConfig.empty())
        return false;
-    return convolve(bottom, top, weight, bias, numImages, bestKernelConfig, cv::ocl::Queue::getDefault());
+    return convolve(bottom, top, weight, bias, numImages, bestKernelConfig);
 }

 template<typename Dtype>
@ -392,7 +392,7 @@ void OCL4DNNConvSpatial<Dtype>::calculateBenchmark(const UMat &bottom, UMat &ver
    options_.str(""); options_.clear(); // clear contents and state flags
    createBasicKernel(1, 1, 1);
    kernel_index_ = kernelQueue.size() - 1;
-    convolve(bottom, verifyTop, weight, bias, numImages, kernelQueue[kernel_index_], cv::ocl::Queue::getDefault());
+    convolve(bottom, verifyTop, weight, bias, numImages, kernelQueue[kernel_index_]);
    CV_Assert(phash.find(kernelQueue[kernel_index_]->kernelName) != phash.end());
    //unloadProgram(kernelQueue[kernel_index_]->kernelName);
    kernelQueue.pop_back();
@ -649,8 +649,7 @@ void OCL4DNNConvSpatial<float>::CreateSubBuffer(const UMat& buffer, UMat& sub_bu
 template<>
 bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top,
                                         const UMat &weight, const UMat &bias,
-                                         int32_t numImages, kernelConfig* config,
-                                         const cv::ocl::Queue& queue)
+                                         int32_t numImages, kernelConfig* config)
 {
    ocl::Program program;
    phash_t::iterator it = phash.find(config->kernelName);
@ -926,17 +925,17 @@ float OCL4DNNConvSpatial<float>::timedConvolve(const UMat &bottom, UMat &top,
                                               const UMat &weight, const UMat &bias,
                                               int32_t numImages, kernelConfig* config)
 {
-    cv::ocl::Queue profilingQueue;
+    cv::ocl::Queue queue;
    try
    {
-        profilingQueue = cv::ocl::Queue::getDefault().getProfilingQueue();
+        queue = cv::ocl::Queue::getDefault();
    }
    catch (const cv::Exception&)
    {
        static int warn_ = 0;
        if (!warn_)
        {
-            std::cout << "OpenCV(ocl4dnn): Can't create OpenCL profiling queue for auto-tuning." << std::endl;
+            std::cout << "OpenCV(ocl4dnn): Can't get OpenCL default queue for auto-tuning." << std::endl;
            warn_ = true;
        }
        return 1e6;
@ -945,16 +944,16 @@ float OCL4DNNConvSpatial<float>::timedConvolve(const UMat &bottom, UMat &top,
    // warm up.
    bool saved_tuned = tuned_;
    tuned_ = false;
-    convolve(bottom, top, weight, bias, numImages, config, profilingQueue);
+    convolve(bottom, top, weight, bias, numImages, config);

-    cv::ocl::Timer timer(profilingQueue);
+    cv::ocl::Timer timer(queue);
    timer.start();
    bool res = true;;
    dbgPrint(std::cout << "Benchmarking kernel: " << config->kernelName << std::endl);
    tuned_ = true;
    int loop_cnt = 4;
    for (int i = 0; i < loop_cnt; i++) {
-        res = convolve(bottom, top, weight, bias, numImages, config, profilingQueue);
+        res = convolve(bottom, top, weight, bias, numImages, config);
        if (!res)
            break;
    }
@ -1009,7 +1008,7 @@ bool OCL4DNNConvSpatial<float>::verifyResult(const UMat &bottom,
    top.zeros(4, sz, CV_32FC1);
    bool saved_tuned = tuned_;
    tuned_ = false;
-    convolve(bottom, top, weight, bias, numImages, config, cv::ocl::Queue::getDefault());
+    convolve(bottom, top, weight, bias, numImages, config);
    tuned_ = saved_tuned;

    float *data = (float *)top.getMat(ACCESS_READ).ptr<float>();