Merge pull request #1383 from pengx17:2.4_clblas

This commit is contained in:
Roman Donchenko 2013-09-03 12:05:00 +04:00 committed by OpenCV Buildbot
commit f50a829943
2 changed files with 51 additions and 2 deletions

View File

@ -46,16 +46,62 @@
#include <iomanip> #include <iomanip>
#include "precomp.hpp" #include "precomp.hpp"
namespace cv { namespace ocl {
// used for clAmdBlas library to avoid redundant setup/teardown
void clBlasSetup();
void clBlasTeardown();
}} /* namespace cv { namespace ocl */
#if !defined HAVE_CLAMDBLAS #if !defined HAVE_CLAMDBLAS
void cv::ocl::gemm(const oclMat&, const oclMat&, double, void cv::ocl::gemm(const oclMat&, const oclMat&, double,
const oclMat&, double, oclMat&, int) const oclMat&, double, oclMat&, int)
{ {
CV_Error(CV_StsNotImplemented, "OpenCL BLAS is not implemented"); CV_Error(CV_StsNotImplemented, "OpenCL BLAS is not implemented");
} }
void cv::ocl::clBlasSetup()
{
CV_Error(CV_StsNotImplemented, "OpenCL BLAS is not implemented");
}
void cv::ocl::clBlasTeardown()
{
//intentionally do nothing
}
#else #else
#include "clAmdBlas.h" #include "clAmdBlas.h"
using namespace cv; using namespace cv;
static bool clBlasInitialized = false;
static Mutex cs;
void cv::ocl::clBlasSetup()
{
if(!clBlasInitialized)
{
AutoLock al(cs);
if(!clBlasInitialized)
{
openCLSafeCall(clAmdBlasSetup());
clBlasInitialized = true;
}
}
}
void cv::ocl::clBlasTeardown()
{
AutoLock al(cs);
if(clBlasInitialized)
{
clAmdBlasTeardown();
clBlasInitialized = false;
}
}
void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha, void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
const oclMat &src3, double beta, oclMat &dst, int flags) const oclMat &src3, double beta, oclMat &dst, int flags)
{ {
@ -71,7 +117,8 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
dst.create(src1.rows, src2.cols, src1.type()); dst.create(src1.rows, src2.cols, src1.type());
dst.setTo(Scalar::all(0)); dst.setTo(Scalar::all(0));
} }
openCLSafeCall( clAmdBlasSetup() );
clBlasSetup();
const clAmdBlasTranspose transA = (cv::GEMM_1_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans; const clAmdBlasTranspose transA = (cv::GEMM_1_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans;
const clAmdBlasTranspose transB = (cv::GEMM_2_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans; const clAmdBlasTranspose transB = (cv::GEMM_2_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans;
@ -156,6 +203,5 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
} }
break; break;
} }
clAmdBlasTeardown();
} }
#endif #endif

View File

@ -68,6 +68,7 @@ namespace cv
namespace ocl namespace ocl
{ {
extern void fft_teardown(); extern void fft_teardown();
extern void clBlasTeardown();
/* /*
* The binary caching system to eliminate redundant program source compilation. * The binary caching system to eliminate redundant program source compilation.
* Strictly, this is not a cache because we do not implement evictions right now. * Strictly, this is not a cache because we do not implement evictions right now.
@ -1050,6 +1051,7 @@ namespace cv
void Info::release() void Info::release()
{ {
fft_teardown(); fft_teardown();
clBlasTeardown();
impl->release(); impl->release();
impl = new Impl; impl = new Impl;
DeviceName.clear(); DeviceName.clear();
@ -1058,6 +1060,7 @@ namespace cv
Info::~Info() Info::~Info()
{ {
fft_teardown(); fft_teardown();
clBlasTeardown();
impl->release(); impl->release();
} }