core/ocl: OpenCLBufferPool

This commit is contained in:
Alexander Alekhin 2014-01-16 18:30:39 +04:00
parent 22146e4b18
commit 485635310c
10 changed files with 479 additions and 14 deletions

View File

@ -0,0 +1,26 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
#ifndef __OPENCV_CORE_BUFFER_POOL_HPP__
#define __OPENCV_CORE_BUFFER_POOL_HPP__
namespace cv
{
class BufferPoolController
{
protected:
~BufferPoolController() { }
public:
virtual size_t getReservedSize() const = 0;
virtual size_t getMaxReservedSize() const = 0;
virtual void setMaxReservedSize(size_t size) = 0;
virtual void freeAllReservedBuffers() = 0;
};
}
#endif // __OPENCV_CORE_BUFFER_POOL_HPP__

View File

@ -51,6 +51,7 @@
#include "opencv2/core/matx.hpp"
#include "opencv2/core/types.hpp"
#include "opencv2/core/bufferpool.hpp"
namespace cv
{
@ -299,6 +300,9 @@ public:
virtual void copy(UMatData* srcdata, UMatData* dstdata, int dims, const size_t sz[],
const size_t srcofs[], const size_t srcstep[],
const size_t dstofs[], const size_t dststep[], bool sync) const;
// default implementation returns DummyBufferPoolController
virtual BufferPoolController* getBufferPoolController() const;
};
@ -363,7 +367,7 @@ struct CV_EXPORTS UMatData
int refcount;
uchar* data;
uchar* origdata;
size_t size;
size_t size, capacity;
int flags;
void* handle;

View File

@ -596,6 +596,9 @@ protected:
Impl* p;
};
CV_EXPORTS MatAllocator* getOpenCLAllocator();
}}
#endif

View File

@ -0,0 +1,132 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
#include "perf_precomp.hpp"
#include "opencv2/ts/ocl_perf.hpp"
#ifdef HAVE_OPENCL
namespace cvtest {
namespace ocl {
struct BufferPoolState
{
BufferPoolController* controller_;
size_t oldMaxReservedSize_;
BufferPoolState(BufferPoolController* c, bool enable)
: controller_(c)
{
if (!cv::ocl::useOpenCL())
{
throw ::perf::TestBase::PerfSkipTestException();
}
oldMaxReservedSize_ = c->getMaxReservedSize();
if (oldMaxReservedSize_ == (size_t)-1)
{
throw ::perf::TestBase::PerfSkipTestException();
}
if (!enable)
{
c->setMaxReservedSize(0);
}
else
{
c->freeAllReservedBuffers();
}
}
~BufferPoolState()
{
controller_->setMaxReservedSize(oldMaxReservedSize_);
}
};
typedef TestBaseWithParam<bool> BufferPoolFixture;
OCL_PERF_TEST_P(BufferPoolFixture, BufferPool_UMatCreation100, Bool())
{
BufferPoolState s(cv::ocl::getOpenCLAllocator()->getBufferPoolController(), GetParam());
Size sz(1920, 1080);
OCL_TEST_CYCLE()
{
for (int i = 0; i < 100; i++)
{
UMat u(sz, CV_8UC1);
}
}
SANITY_CHECK_NOTHING()
}
OCL_PERF_TEST_P(BufferPoolFixture, BufferPool_UMatCountNonZero100, Bool())
{
BufferPoolState s(cv::ocl::getOpenCLAllocator()->getBufferPoolController(), GetParam());
Size sz(1920, 1080);
OCL_TEST_CYCLE()
{
for (int i = 0; i < 100; i++)
{
UMat u(sz, CV_8UC1);
countNonZero(u);
}
}
SANITY_CHECK_NOTHING()
}
OCL_PERF_TEST_P(BufferPoolFixture, BufferPool_UMatCanny10, Bool())
{
BufferPoolState s(cv::ocl::getOpenCLAllocator()->getBufferPoolController(), GetParam());
Size sz(1920, 1080);
int aperture = 3;
bool useL2 = false;
double thresh_low = 100;
double thresh_high = 120;
OCL_TEST_CYCLE()
{
for (int i = 0; i < 10; i++)
{
UMat src(sz, CV_8UC1);
UMat dst;
Canny(src, dst, thresh_low, thresh_high, aperture, useL2);
dst.getMat(ACCESS_READ); // complete async operations
}
}
SANITY_CHECK_NOTHING()
}
OCL_PERF_TEST_P(BufferPoolFixture, BufferPool_UMatIntegral10, Bool())
{
BufferPoolState s(cv::ocl::getOpenCLAllocator()->getBufferPoolController(), GetParam());
Size sz(1920, 1080);
OCL_TEST_CYCLE()
{
for (int i = 0; i < 10; i++)
{
UMat src(sz, CV_32FC1);
UMat dst;
integral(src, dst);
dst.getMat(ACCESS_READ); // complete async operations
}
}
SANITY_CHECK_NOTHING()
}
} } // namespace cvtest::ocl
#endif // HAVE_OPENCL

View File

@ -0,0 +1,28 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
#ifndef __OPENCV_CORE_BUFFER_POOL_IMPL_HPP__
#define __OPENCV_CORE_BUFFER_POOL_IMPL_HPP__
#include "opencv2/core/bufferpool.hpp"
namespace cv {
class DummyBufferPoolController : public BufferPoolController
{
public:
DummyBufferPoolController() { }
virtual ~DummyBufferPoolController() { }
virtual size_t getReservedSize() const { return (size_t)-1; }
virtual size_t getMaxReservedSize() const { return (size_t)-1; }
virtual void setMaxReservedSize(size_t size) { (void)size; }
virtual void freeAllReservedBuffers() { }
};
} // namespace
#endif // __OPENCV_CORE_BUFFER_POOL_IMPL_HPP__

View File

@ -43,6 +43,8 @@
#include "precomp.hpp"
#include "opencl_kernels.hpp"
#include "bufferpool.impl.hpp"
/****************************************************************************************\
* [scaled] Identity matrix initialization *
\****************************************************************************************/
@ -157,6 +159,12 @@ void MatAllocator::copy(UMatData* usrc, UMatData* udst, int dims, const size_t s
memcpy(ptrs[1], ptrs[0], planesz);
}
BufferPoolController* MatAllocator::getBufferPoolController() const
{
static DummyBufferPoolController dummy;
return &dummy;
}
class StdMatAllocator : public MatAllocator
{
public:

View File

@ -40,11 +40,48 @@
//M*/
#include "precomp.hpp"
#include <list>
#include <map>
#include <string>
#include <sstream>
#include <iostream> // std::cerr
#include "opencv2/core/bufferpool.hpp"
#ifndef LOG_BUFFER_POOL
# if 0
# define LOG_BUFFER_POOL printf
# else
# define LOG_BUFFER_POOL(...)
# endif
#endif
// TODO Move to some common place
static size_t getConfigurationParameterForSize(const char* name, size_t defaultValue)
{
const char* envValue = getenv(name);
if (envValue == NULL)
{
return defaultValue;
}
cv::String value = envValue;
size_t pos = 0;
for (; pos < value.size(); pos++)
{
if (!isdigit(value[pos]))
break;
}
cv::String valueStr = value.substr(0, pos);
cv::String suffixStr = value.substr(pos, value.length() - pos);
int v = atoi(valueStr.c_str());
if (suffixStr.length() == 0)
return v;
else if (suffixStr == "MB" || suffixStr == "Mb" || suffixStr == "mb")
return v * 1024 * 1024;
else if (suffixStr == "KB" || suffixStr == "Kb" || suffixStr == "kb")
return v * 1024;
CV_ErrorNoReturn(cv::Error::StsBadArg, cv::format("Invalid value for %s parameter: %s", name, value.c_str()));
}
#include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
#include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
@ -3234,8 +3271,208 @@ ProgramSource2::hash_t ProgramSource2::hash() const
//////////////////////////////////////////// OpenCLAllocator //////////////////////////////////////////////////
class OpenCLBufferPool
{
protected:
~OpenCLBufferPool() { }
public:
virtual cl_mem allocate(size_t size, CV_OUT size_t& capacity) = 0;
virtual void release(cl_mem handle, size_t capacity) = 0;
};
class OpenCLBufferPoolImpl : public BufferPoolController, public OpenCLBufferPool
{
public:
struct BufferEntry
{
cl_mem clBuffer_;
size_t capacity_;
};
protected:
Mutex mutex_;
size_t currentReservedSize;
size_t maxReservedSize;
std::list<BufferEntry> reservedEntries_; // LRU order
// synchronized
bool _findAndRemoveEntryFromReservedList(CV_OUT BufferEntry& entry, const size_t size)
{
if (reservedEntries_.empty())
return false;
std::list<BufferEntry>::iterator i = reservedEntries_.begin();
std::list<BufferEntry>::iterator result_pos = reservedEntries_.end();
BufferEntry result = {NULL, 0};
size_t minDiff = (size_t)(-1);
for (; i != reservedEntries_.end(); ++i)
{
BufferEntry& e = *i;
if (e.capacity_ >= size)
{
size_t diff = e.capacity_ - size;
if (diff < size / 8 && (result_pos == reservedEntries_.end() || diff < minDiff))
{
minDiff = diff;
result_pos = i;
result = e;
if (diff == 0)
break;
}
}
}
if (result_pos != reservedEntries_.end())
{
//CV_DbgAssert(result == *result_pos);
reservedEntries_.erase(result_pos);
entry = result;
currentReservedSize -= entry.capacity_;
return true;
}
return false;
}
// synchronized
void _checkSizeOfReservedEntries()
{
while (currentReservedSize > maxReservedSize)
{
CV_DbgAssert(!reservedEntries_.empty());
const BufferEntry& entry = reservedEntries_.back();
CV_DbgAssert(currentReservedSize >= entry.capacity_);
currentReservedSize -= entry.capacity_;
_releaseBufferEntry(entry);
reservedEntries_.pop_back();
}
}
inline size_t _allocationGranularity(size_t size)
{
// heuristic values
if (size < 1024)
return 16;
else if (size < 64*1024)
return 64;
else if (size < 1024*1024)
return 4096;
else if (size < 16*1024*1024)
return 64*1024;
else
return 1024*1024;
}
void _allocateBufferEntry(BufferEntry& entry, size_t size)
{
CV_DbgAssert(entry.clBuffer_ == NULL);
entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
Context2& ctx = Context2::getDefault();
cl_int retval = CL_SUCCESS;
entry.clBuffer_ = clCreateBuffer((cl_context)ctx.ptr(), CL_MEM_READ_WRITE, entry.capacity_, 0, &retval);
CV_Assert(retval == CL_SUCCESS);
CV_Assert(entry.clBuffer_ != NULL);
LOG_BUFFER_POOL("OpenCL allocate %lld (0x%llx) bytes: %p\n",
(long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
}
void _releaseBufferEntry(const BufferEntry& entry)
{
CV_Assert(entry.capacity_ != 0);
CV_Assert(entry.clBuffer_ != NULL);
LOG_BUFFER_POOL("OpenCL release buffer: %p, %lld (0x%llx) bytes\n",
entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
clReleaseMemObject(entry.clBuffer_);
}
public:
OpenCLBufferPoolImpl()
: currentReservedSize(0), maxReservedSize(0)
{
// Note: Buffer pool is disabled by default,
// because we didn't receive significant performance improvement
maxReservedSize = getConfigurationParameterForSize("OPENCV_OPENCL_BUFFERPOOL_LIMIT", 0);
}
virtual ~OpenCLBufferPoolImpl()
{
freeAllReservedBuffers();
CV_Assert(reservedEntries_.empty());
}
public:
virtual cl_mem allocate(size_t size, CV_OUT size_t& capacity)
{
BufferEntry entry = {NULL, 0};
if (maxReservedSize > 0)
{
AutoLock locker(mutex_);
if (_findAndRemoveEntryFromReservedList(entry, size))
{
CV_DbgAssert(size <= entry.capacity_);
LOG_BUFFER_POOL("Reuse reserved buffer: %p\n", entry.clBuffer_);
capacity = entry.capacity_;
return entry.clBuffer_;
}
}
_allocateBufferEntry(entry, size);
capacity = entry.capacity_;
return entry.clBuffer_;
}
virtual void release(cl_mem handle, size_t capacity)
{
BufferEntry entry = {handle, capacity};
if (maxReservedSize == 0 || entry.capacity_ > maxReservedSize / 8)
{
_releaseBufferEntry(entry);
}
else
{
AutoLock locker(mutex_);
reservedEntries_.push_front(entry);
currentReservedSize += entry.capacity_;
_checkSizeOfReservedEntries();
}
}
virtual size_t getReservedSize() const { return currentReservedSize; }
virtual size_t getMaxReservedSize() const { return maxReservedSize; }
virtual void setMaxReservedSize(size_t size)
{
AutoLock locker(mutex_);
size_t oldMaxReservedSize = maxReservedSize;
maxReservedSize = size;
if (maxReservedSize < oldMaxReservedSize)
{
std::list<BufferEntry>::iterator i = reservedEntries_.begin();
for (; i != reservedEntries_.end();)
{
const BufferEntry& entry = *i;
if (entry.capacity_ > maxReservedSize / 8)
{
CV_DbgAssert(currentReservedSize >= entry.capacity_);
currentReservedSize -= entry.capacity_;
_releaseBufferEntry(entry);
i = reservedEntries_.erase(i);
continue;
}
++i;
}
_checkSizeOfReservedEntries();
}
}
virtual void freeAllReservedBuffers()
{
AutoLock locker(mutex_);
std::list<BufferEntry>::const_iterator i = reservedEntries_.begin();
for (; i != reservedEntries_.end(); ++i)
{
const BufferEntry& entry = *i;
_releaseBufferEntry(entry);
}
reservedEntries_.clear();
}
};
class OpenCLAllocator : public MatAllocator
{
mutable OpenCLBufferPoolImpl bufferPool;
public:
OpenCLAllocator() { matStdAllocator = Mat::getStdAllocator(); }
@ -3274,17 +3511,18 @@ public:
int createFlags = 0, flags0 = 0;
getBestFlags(ctx, flags, createFlags, flags0);
cl_int retval = 0;
void* handle = clCreateBuffer((cl_context)ctx.ptr(),
createFlags, total, 0, &retval);
if( !handle || retval != CL_SUCCESS )
CV_Assert(createFlags == CL_MEM_READ_WRITE);
size_t capacity = 0;
void* handle = bufferPool.allocate(total, capacity);
if (!handle)
return defaultAllocate(dims, sizes, type, data, step, flags);
UMatData* u = new UMatData(this);
u->data = 0;
u->size = total;
u->capacity = capacity;
u->handle = handle;
u->flags = flags0;
CV_DbgAssert(!u->tempUMat()); // for bufferPool.release() consistency
return u;
}
@ -3405,8 +3643,9 @@ public:
fastFree(u->data);
u->data = 0;
}
clReleaseMemObject((cl_mem)u->handle);
bufferPool.release((cl_mem)u->handle, u->capacity);
u->handle = 0;
u->capacity = 0;
delete u;
}
}
@ -3713,6 +3952,8 @@ public:
}
}
BufferPoolController* getBufferPoolController() const { return &bufferPool; }
MatAllocator* matStdAllocator;
};

View File

@ -260,11 +260,6 @@ extern TLSData<CoreTLSData> coreTlsData;
#define CL_RUNTIME_EXPORT
#endif
namespace ocl
{
MatAllocator* getOpenCLAllocator();
}
extern bool __termination; // skip some cleanups, because process is terminating
// (for example, if ExitProcess() was already called)

View File

@ -56,7 +56,7 @@ UMatData::UMatData(const MatAllocator* allocator)
prevAllocator = currAllocator = allocator;
urefcount = refcount = 0;
data = origdata = 0;
size = 0;
size = 0; capacity = 0;
flags = 0;
handle = 0;
userdata = 0;
@ -67,7 +67,7 @@ UMatData::~UMatData()
prevAllocator = currAllocator = 0;
urefcount = refcount = 0;
data = origdata = 0;
size = 0;
size = 0; capacity = 0;
flags = 0;
handle = 0;
userdata = 0;

View File

@ -291,3 +291,31 @@ TEST(UMat, setOpenCL)
// reset state to the previous one
ocl::setUseOpenCL(useOCL);
}
TEST(UMat, BufferPoolGrowing)
{
#ifdef _DEBUG
const int ITERATIONS = 100;
#else
const int ITERATIONS = 200;
#endif
const Size sz(1920, 1080);
BufferPoolController* c = ocl::getOpenCLAllocator()->getBufferPoolController();
if (c)
{
size_t oldMaxReservedSize = c->getMaxReservedSize();
c->freeAllReservedBuffers();
c->setMaxReservedSize(sz.area() * 10);
for (int i = 0; i < ITERATIONS; i++)
{
UMat um(Size(sz.width + i, sz.height + i), CV_8UC1);
UMat um2(Size(sz.width + 2 * i, sz.height + 2 * i), CV_8UC1);
}
c->setMaxReservedSize(oldMaxReservedSize);
c->freeAllReservedBuffers();
}
else
{
std::cout << "Skipped, no OpenCL" << std::endl;
}
}