From 2ef427db91b6c4aec170f691c5d2e6c47d6520d7 Mon Sep 17 00:00:00 2001
From: "Woo, Insoo" <insoo.woo@intel.com>
Date: Tue, 31 Jan 2017 11:26:26 -0800
Subject: [PATCH] Fix API compatibility error

This patch fixes a OCV API compatibility error. The error was reported
due to the interface changes of Kernel::run. To resolve the issue,
An overloaded function of Kernel::run is added. It take a flag indicating
whether there are more work to be done with the kernel object without
releasing resources related to it.

Signed-off-by: Woo, Insoo <insoo.woo@intel.com>
---
 modules/core/include/opencv2/core/ocl.hpp | 17 ++++++++++++++++-
 modules/core/src/intel_gpu_gemm.cpp       | 10 ++++++----
 modules/core/src/ocl.cpp                  |  8 +++++++-
 3 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/modules/core/include/opencv2/core/ocl.hpp b/modules/core/include/opencv2/core/ocl.hpp
index 610288eaf9..630208481f 100644
--- a/modules/core/include/opencv2/core/ocl.hpp
+++ b/modules/core/include/opencv2/core/ocl.hpp
@@ -580,6 +580,20 @@ public:
     @param localsize work-group size for each dimension.
     @param sync specify whether to wait for OpenCL computation to finish before return.
     @param q command queue
+    */
+    bool run(int dims, size_t globalsize[],
+             size_t localsize[], bool sync, const Queue& q=Queue());
+    /**
+    Run the OpenCL kernel.
+    @param dims the work problem dimensions. It is the length of globalsize and localsize. It can be either 1, 2 or 3.
+    @param globalsize work items for each dimension.
+    It is not the final globalsize passed to OpenCL.
+    Each dimension will be adjusted to the nearest integer divisible by the corresponding value in localsize.
+    If localsize is NULL, it will still be adjusted depending on dims.
+    The adjusted values are greater than or equal to the original values.
+    @param localsize work-group size for each dimension.
+    @param sync specify whether to wait for OpenCL computation to finish before return.
+    @param q command queue
     @param moreWorkDone specify whether there will the remaining work to be computed (more Kernel::run calls).
     When a computation requires multiple kernel execution by changing input and output buffer offset to get
     the final computation results.
@@ -590,7 +604,8 @@ public:
     kernel.run(..., q, false);
     */
     bool run(int dims, size_t globalsize[],
-             size_t localsize[], bool sync, const Queue& q=Queue(), bool moreWorkDone = false);
+             size_t localsize[], bool sync, bool moreWorkDone, const Queue& q);
+
     bool runTask(bool sync, const Queue& q=Queue());
 
     size_t workGroupSize() const;
diff --git a/modules/core/src/intel_gpu_gemm.cpp b/modules/core/src/intel_gpu_gemm.cpp
index 34ad7b944b..5cc000944a 100644
--- a/modules/core/src/intel_gpu_gemm.cpp
+++ b/modules/core/src/intel_gpu_gemm.cpp
@@ -33,6 +33,8 @@
 namespace cv
 {
 
+bool intel_gpu_gemm( UMat A, Size sizeA, UMat B, Size sizeB, UMat D, Size sizeD, double alpha, double beta, bool atrans, bool btrans);
+
 bool intel_gpu_gemm(
     UMat A, Size sizeA,
     UMat B, Size sizeB,
@@ -40,7 +42,7 @@ bool intel_gpu_gemm(
     double alpha, double beta, 
     bool atrans, bool btrans)
 {
-    sizeA; sizeB;
+    CV_UNUSED(sizeA); CV_UNUSED(sizeB);
 
     int M = sizeD.height, N = sizeD.width, K = ((atrans)? sizeA.height : sizeA.width);
 
@@ -105,7 +107,7 @@ bool intel_gpu_gemm(
     ocl::Queue q;
     if(!atrans && btrans)
     {
-        ret = k.run(2, global, local, false, q, false);
+        ret = k.run(2, global, local, false, false, q);
     }
     else
     {
@@ -114,12 +116,12 @@ bool intel_gpu_gemm(
        	    k.set(14, &start_index, sizeof(start_index));
             if ((start_index + stride) < K)
     	    {
-    	        ret = k.run(2, global, local, false, q, true);
+    	        ret = k.run(2, global, local, false, true, q);
                 if (!ret) return ret;
     	    }
     	    else
             {
-                ret = k.run(2, global, local, false, q, false);
+                ret = k.run(2, global, local, false, false, q);
     	    }
         }
     }
diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp
index ca35c9c162..0ec16db3c5 100644
--- a/modules/core/src/ocl.cpp
+++ b/modules/core/src/ocl.cpp
@@ -3465,7 +3465,7 @@ int Kernel::set(int i, const KernelArg& arg)
 }
 
 bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
-                 bool sync, const Queue& q, bool moreWorkDone)
+                 bool sync, bool moreWorkDone, const Queue& q)
 {
     CV_INSTRUMENT_REGION_OPENCL_RUN(p->name.c_str());
 
@@ -3511,6 +3511,12 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
     return retval == CL_SUCCESS;
 }
 
+bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
+                 bool sync, const Queue& q)
+{
+    return run(dims, _globalsize, _localsize, sync, false, q);
+}
+
 bool Kernel::runTask(bool sync, const Queue& q)
 {
     if(!p || !p->handle || p->e != 0)