core(alloc): drop unused code, use memalign() functions instead of hacks

valgrind provides better detection without memory buffer hacks
This commit is contained in:
Alexander Alekhin 2017-07-27 18:01:34 +03:00
parent 34f9c039c5
commit b46e741c95
5 changed files with 42 additions and 627 deletions

View File

@ -551,6 +551,7 @@ if(UNIX)
endif()
include(CheckFunctionExists)
include(CheckIncludeFile)
include(CheckSymbolExists)
if(NOT APPLE)
CHECK_INCLUDE_FILE(pthread.h HAVE_LIBPTHREAD)
@ -566,6 +567,12 @@ if(UNIX)
else()
set(HAVE_LIBPTHREAD YES)
endif()
CHECK_SYMBOL_EXISTS(posix_memalign stdlib.h HAVE_POSIX_MEMALIGN)
CHECK_INCLUDE_FILE(malloc.h HAVE_MALLOC_H)
if(HAVE_MALLOC_H)
CHECK_SYMBOL_EXISTS(memalign malloc.h HAVE_MEMALIGN)
endif()
endif()
include(cmake/OpenCVPCHSupport.cmake)

View File

@ -45,6 +45,16 @@ endif()
if(ITT_INCLUDE_DIRS)
ocv_module_include_directories(${ITT_INCLUDE_DIRS})
endif()
if(HAVE_POSIX_MEMALIGN)
ocv_append_sourge_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/alloc.cpp "HAVE_POSIX_MEMALIGN=1")
endif()
if(HAVE_MALLOC_H)
ocv_append_sourge_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/alloc.cpp "HAVE_MALLOC_H=1")
endif()
if(HAVE_MEMALIGN)
ocv_append_sourge_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/alloc.cpp "HAVE_MEMALIGN=1")
endif()
ocv_create_module(${extra_libs})
ocv_target_link_libraries(${the_module}

View File

@ -42,35 +42,50 @@
#include "precomp.hpp"
#define CV_USE_SYSTEM_MALLOC 1
#ifdef HAVE_POSIX_MEMALIGN
#include <stdlib.h>
#elif defined HAVE_MALLOC_H
#include <malloc.h>
#endif
namespace cv
{
namespace cv {
static void* OutOfMemoryError(size_t size)
{
CV_Error_(CV_StsNoMem, ("Failed to allocate %lu bytes", (unsigned long)size));
CV_Error_(CV_StsNoMem, ("Failed to allocate %llu bytes", (unsigned long long)size));
return 0;
}
#if CV_USE_SYSTEM_MALLOC
#if defined _WIN32
void deleteThreadAllocData() {}
#endif
void* fastMalloc( size_t size )
{
#ifdef HAVE_POSIX_MEMALIGN
void* ptr = NULL;
if(posix_memalign(&ptr, CV_MALLOC_ALIGN, size))
ptr = NULL;
if(!ptr)
return OutOfMemoryError(size);
return ptr;
#elif defined HAVE_MEMALIGN
void* ptr = memalign(CV_MALLOC_ALIGN, size);
if(!ptr)
return OutOfMemoryError(size);
return ptr;
#else
uchar* udata = (uchar*)malloc(size + sizeof(void*) + CV_MALLOC_ALIGN);
if(!udata)
return OutOfMemoryError(size);
uchar** adata = alignPtr((uchar**)udata + 1, CV_MALLOC_ALIGN);
adata[-1] = udata;
return adata;
#endif
}
void fastFree(void* ptr)
{
#if defined HAVE_POSIX_MEMALIGN || defined HAVE_MEMALIGN
free(ptr);
#else
if(ptr)
{
uchar* udata = ((uchar**)ptr)[-1];
@ -78,621 +93,10 @@ void fastFree(void* ptr)
((uchar*)ptr - udata) <= (ptrdiff_t)(sizeof(void*)+CV_MALLOC_ALIGN));
free(udata);
}
}
#else //CV_USE_SYSTEM_MALLOC
#if 0
#define SANITY_CHECK(block) \
CV_Assert(((size_t)(block) & (MEM_BLOCK_SIZE-1)) == 0 && \
(unsigned)(block)->binIdx <= (unsigned)MAX_BIN && \
(block)->signature == MEM_BLOCK_SIGNATURE)
#else
#define SANITY_CHECK(block)
#endif
#define STAT(stmt)
#ifdef _WIN32
#if (_WIN32_WINNT >= 0x0602)
#include <synchapi.h>
#endif
struct CriticalSection
{
CriticalSection()
{
#if (_WIN32_WINNT >= 0x0600)
InitializeCriticalSectionEx(&cs, 1000, 0);
#else
InitializeCriticalSection(&cs);
#endif
}
~CriticalSection() { DeleteCriticalSection(&cs); }
void lock() { EnterCriticalSection(&cs); }
void unlock() { LeaveCriticalSection(&cs); }
bool trylock() { return TryEnterCriticalSection(&cs) != 0; }
CRITICAL_SECTION cs;
};
void* SystemAlloc(size_t size)
{
void* ptr = malloc(size);
return ptr ? ptr : OutOfMemoryError(size);
}
void SystemFree(void* ptr, size_t)
{
free(ptr);
}
#else //_WIN32
#include <sys/mman.h>
struct CriticalSection
{
CriticalSection() { pthread_mutex_init(&mutex, 0); }
~CriticalSection() { pthread_mutex_destroy(&mutex); }
void lock() { pthread_mutex_lock(&mutex); }
void unlock() { pthread_mutex_unlock(&mutex); }
bool trylock() { return pthread_mutex_trylock(&mutex) == 0; }
pthread_mutex_t mutex;
};
void* SystemAlloc(size_t size)
{
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
void* ptr = 0;
ptr = mmap(ptr, size, (PROT_READ | PROT_WRITE), MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
return ptr != MAP_FAILED ? ptr : OutOfMemoryError(size);
}
void SystemFree(void* ptr, size_t size)
{
munmap(ptr, size);
}
#endif //_WIN32
struct AutoLock
{
AutoLock(CriticalSection& _cs) : cs(&_cs) { cs->lock(); }
~AutoLock() { cs->unlock(); }
CriticalSection* cs;
};
const size_t MEM_BLOCK_SIGNATURE = 0x01234567;
const int MEM_BLOCK_SHIFT = 14;
const size_t MEM_BLOCK_SIZE = 1 << MEM_BLOCK_SHIFT;
const size_t HDR_SIZE = 128;
const size_t MAX_BLOCK_SIZE = MEM_BLOCK_SIZE - HDR_SIZE;
const int MAX_BIN = 28;
static const int binSizeTab[MAX_BIN+1] =
{ 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 128, 160, 192, 256, 320, 384, 480, 544, 672, 768,
896, 1056, 1328, 1600, 2688, 4048, 5408, 8128, 16256 };
struct MallocTables
{
void initBinTab()
{
int i, j = 0, n;
for( i = 0; i <= MAX_BIN; i++ )
{
n = binSizeTab[i]>>3;
for( ; j <= n; j++ )
binIdx[j] = (uchar)i;
}
}
int bin(size_t size)
{
assert( size <= MAX_BLOCK_SIZE );
return binIdx[(size + 7)>>3];
}
MallocTables()
{
initBinTab();
}
uchar binIdx[MAX_BLOCK_SIZE/8+1];
};
MallocTables mallocTables;
struct Node
{
Node* next;
};
struct ThreadData;
struct Block
{
Block(Block* _next)
{
signature = MEM_BLOCK_SIGNATURE;
prev = 0;
next = _next;
privateFreeList = publicFreeList = 0;
bumpPtr = endPtr = 0;
objSize = 0;
threadData = 0;
data = (uchar*)this + HDR_SIZE;
}
~Block() {}
void init(Block* _prev, Block* _next, int _objSize, ThreadData* _threadData)
{
prev = _prev;
if(prev)
prev->next = this;
next = _next;
if(next)
next->prev = this;
objSize = _objSize;
binIdx = mallocTables.bin(objSize);
threadData = _threadData;
privateFreeList = publicFreeList = 0;
bumpPtr = data;
int nobjects = MAX_BLOCK_SIZE/objSize;
endPtr = bumpPtr + nobjects*objSize;
almostEmptyThreshold = (nobjects + 1)/2;
allocated = 0;
}
bool isFilled() const { return allocated > almostEmptyThreshold; }
size_t signature;
Block* prev;
Block* next;
Node* privateFreeList;
Node* publicFreeList;
uchar* bumpPtr;
uchar* endPtr;
uchar* data;
ThreadData* threadData;
int objSize;
int binIdx;
int allocated;
int almostEmptyThreshold;
CriticalSection cs;
};
struct BigBlock
{
BigBlock(int bigBlockSize, BigBlock* _next)
{
first = alignPtr((Block*)(this+1), MEM_BLOCK_SIZE);
next = _next;
nblocks = (int)(((char*)this + bigBlockSize - (char*)first)/MEM_BLOCK_SIZE);
Block* p = 0;
for( int i = nblocks-1; i >= 0; i-- )
p = ::new((uchar*)first + i*MEM_BLOCK_SIZE) Block(p);
}
~BigBlock()
{
for( int i = nblocks-1; i >= 0; i-- )
((Block*)((uchar*)first+i*MEM_BLOCK_SIZE))->~Block();
}
BigBlock* next;
Block* first;
int nblocks;
};
struct BlockPool
{
BlockPool(int _bigBlockSize=1<<20) : pool(0), bigBlockSize(_bigBlockSize)
{
}
~BlockPool()
{
AutoLock lock(cs);
while( pool )
{
BigBlock* nextBlock = pool->next;
pool->~BigBlock();
SystemFree(pool, bigBlockSize);
pool = nextBlock;
}
}
Block* alloc()
{
AutoLock lock(cs);
Block* block;
if( !freeBlocks )
{
BigBlock* bblock = ::new(SystemAlloc(bigBlockSize)) BigBlock(bigBlockSize, pool);
assert( bblock != 0 );
freeBlocks = bblock->first;
pool = bblock;
}
block = freeBlocks;
freeBlocks = freeBlocks->next;
if( freeBlocks )
freeBlocks->prev = 0;
STAT(stat.bruttoBytes += MEM_BLOCK_SIZE);
return block;
}
void free(Block* block)
{
AutoLock lock(cs);
block->prev = 0;
block->next = freeBlocks;
freeBlocks = block;
STAT(stat.bruttoBytes -= MEM_BLOCK_SIZE);
}
CriticalSection cs;
Block* freeBlocks;
BigBlock* pool;
int bigBlockSize;
int blocksPerBigBlock;
};
BlockPool mallocPool;
enum { START=0, FREE=1, GC=2 };
struct ThreadData
{
ThreadData() { for(int i = 0; i <= MAX_BIN; i++) bins[i][START] = bins[i][FREE] = bins[i][GC] = 0; }
~ThreadData()
{
// mark all the thread blocks as abandoned or even release them
for( int i = 0; i <= MAX_BIN; i++ )
{
Block *bin = bins[i][START], *block = bin;
bins[i][START] = bins[i][FREE] = bins[i][GC] = 0;
if( block )
{
do
{
Block* next = block->next;
int allocated = block->allocated;
{
AutoLock lock(block->cs);
block->next = block->prev = 0;
block->threadData = 0;
Node *node = block->publicFreeList;
for( ; node != 0; node = node->next )
allocated--;
}
if( allocated == 0 )
mallocPool.free(block);
block = next;
}
while( block != bin );
}
}
}
void moveBlockToFreeList( Block* block )
{
int i = block->binIdx;
Block*& freePtr = bins[i][FREE];
CV_DbgAssert( block->next->prev == block && block->prev->next == block );
if( block != freePtr )
{
Block*& gcPtr = bins[i][GC];
if( gcPtr == block )
gcPtr = block->next;
if( block->next != block )
{
block->prev->next = block->next;
block->next->prev = block->prev;
}
block->next = freePtr->next;
block->prev = freePtr;
freePtr = block->next->prev = block->prev->next = block;
}
}
Block* bins[MAX_BIN+1][3];
#ifdef _WIN32
#ifdef WINCE
# define TLS_OUT_OF_INDEXES ((DWORD)0xFFFFFFFF)
#endif //WINCE
static DWORD tlsKey;
static ThreadData* get()
{
ThreadData* data;
if( tlsKey == TLS_OUT_OF_INDEXES )
tlsKey = TlsAlloc();
data = (ThreadData*)TlsGetValue(tlsKey);
if( !data )
{
data = new ThreadData;
TlsSetValue(tlsKey, data);
}
return data;
}
#else //_WIN32
static void deleteData(void* data)
{
delete (ThreadData*)data;
}
static pthread_key_t tlsKey;
static ThreadData* get()
{
ThreadData* data;
if( !tlsKey )
pthread_key_create(&tlsKey, deleteData);
data = (ThreadData*)pthread_getspecific(tlsKey);
if( !data )
{
data = new ThreadData;
pthread_setspecific(tlsKey, data);
}
return data;
}
#endif //_WIN32
};
#ifdef _WIN32
DWORD ThreadData::tlsKey = TLS_OUT_OF_INDEXES;
void deleteThreadAllocData()
{
if( ThreadData::tlsKey != TLS_OUT_OF_INDEXES )
delete (ThreadData*)TlsGetValue( ThreadData::tlsKey );
}
#else //_WIN32
pthread_key_t ThreadData::tlsKey = 0;
#endif //_WIN32
#if 0
static void checkList(ThreadData* tls, int idx)
{
Block* block = tls->bins[idx][START];
if( !block )
{
CV_DbgAssert( tls->bins[idx][FREE] == 0 && tls->bins[idx][GC] == 0 );
}
else
{
bool gcInside = false;
bool freeInside = false;
do
{
if( tls->bins[idx][FREE] == block )
freeInside = true;
if( tls->bins[idx][GC] == block )
gcInside = true;
block = block->next;
}
while( block != tls->bins[idx][START] );
CV_DbgAssert( gcInside && freeInside );
}
}
#else
#define checkList(tls, idx)
#endif
void* fastMalloc( size_t size )
{
if( size > MAX_BLOCK_SIZE )
{
size_t size1 = size + sizeof(uchar*)*2 + MEM_BLOCK_SIZE;
uchar* udata = (uchar*)SystemAlloc(size1);
uchar** adata = alignPtr((uchar**)udata + 2, MEM_BLOCK_SIZE);
adata[-1] = udata;
adata[-2] = (uchar*)size1;
return adata;
}
{
ThreadData* tls = ThreadData::get();
int idx = mallocTables.bin(size);
Block*& startPtr = tls->bins[idx][START];
Block*& gcPtr = tls->bins[idx][GC];
Block*& freePtr = tls->bins[idx][FREE], *block = freePtr;
checkList(tls, idx);
size = binSizeTab[idx];
STAT(
stat.nettoBytes += size;
stat.mallocCalls++;
);
uchar* data = 0;
for(;;)
{
if( block )
{
// try to find non-full block
for(;;)
{
CV_DbgAssert( block->next->prev == block && block->prev->next == block );
if( block->bumpPtr )
{
data = block->bumpPtr;
if( (block->bumpPtr += size) >= block->endPtr )
block->bumpPtr = 0;
break;
}
if( block->privateFreeList )
{
data = (uchar*)block->privateFreeList;
block->privateFreeList = block->privateFreeList->next;
break;
}
if( block == startPtr )
break;
block = block->next;
}
#if 0
avg_k += _k;
avg_nk++;
if( avg_nk == 1000 )
{
printf("avg search iters per 1e3 allocs = %g\n", (double)avg_k/avg_nk );
avg_k = avg_nk = 0;
}
#endif
freePtr = block;
if( !data )
{
block = gcPtr;
for( int k = 0; k < 2; k++ )
{
SANITY_CHECK(block);
CV_DbgAssert( block->next->prev == block && block->prev->next == block );
if( block->publicFreeList )
{
{
AutoLock lock(block->cs);
block->privateFreeList = block->publicFreeList;
block->publicFreeList = 0;
}
Node* node = block->privateFreeList;
for(;node != 0; node = node->next)
--block->allocated;
data = (uchar*)block->privateFreeList;
block->privateFreeList = block->privateFreeList->next;
gcPtr = block->next;
if( block->allocated+1 <= block->almostEmptyThreshold )
tls->moveBlockToFreeList(block);
break;
}
block = block->next;
}
if( !data )
gcPtr = block;
}
}
if( data )
break;
block = mallocPool.alloc();
block->init(startPtr ? startPtr->prev : block, startPtr ? startPtr : block, (int)size, tls);
if( !startPtr )
startPtr = gcPtr = freePtr = block;
checkList(tls, block->binIdx);
SANITY_CHECK(block);
}
++block->allocated;
return data;
}
}
void fastFree( void* ptr )
{
if( ((size_t)ptr & (MEM_BLOCK_SIZE-1)) == 0 )
{
if( ptr != 0 )
{
void* origPtr = ((void**)ptr)[-1];
size_t sz = (size_t)((void**)ptr)[-2];
SystemFree( origPtr, sz );
}
return;
}
{
ThreadData* tls = ThreadData::get();
Node* node = (Node*)ptr;
Block* block = (Block*)((size_t)ptr & -(int)MEM_BLOCK_SIZE);
assert( block->signature == MEM_BLOCK_SIGNATURE );
if( block->threadData == tls )
{
STAT(
stat.nettoBytes -= block->objSize;
stat.freeCalls++;
float ratio = (float)stat.nettoBytes/stat.bruttoBytes;
if( stat.minUsageRatio > ratio )
stat.minUsageRatio = ratio;
);
SANITY_CHECK(block);
bool prevFilled = block->isFilled();
--block->allocated;
if( !block->isFilled() && (block->allocated == 0 || prevFilled) )
{
if( block->allocated == 0 )
{
int idx = block->binIdx;
Block*& startPtr = tls->bins[idx][START];
Block*& freePtr = tls->bins[idx][FREE];
Block*& gcPtr = tls->bins[idx][GC];
if( block == block->next )
{
CV_DbgAssert( startPtr == block && freePtr == block && gcPtr == block );
startPtr = freePtr = gcPtr = 0;
}
else
{
if( freePtr == block )
freePtr = block->next;
if( gcPtr == block )
gcPtr = block->next;
if( startPtr == block )
startPtr = block->next;
block->prev->next = block->next;
block->next->prev = block->prev;
}
mallocPool.free(block);
checkList(tls, idx);
return;
}
tls->moveBlockToFreeList(block);
}
node->next = block->privateFreeList;
block->privateFreeList = node;
}
else
{
AutoLock lock(block->cs);
SANITY_CHECK(block);
node->next = block->publicFreeList;
block->publicFreeList = node;
if( block->threadData == 0 )
{
// take ownership of the abandoned block.
// note that it can happen at the same time as
// ThreadData::deleteData() marks the blocks as abandoned,
// so this part of the algorithm needs to be checked for data races
int idx = block->binIdx;
block->threadData = tls;
Block*& startPtr = tls->bins[idx][START];
if( startPtr )
{
block->next = startPtr;
block->prev = startPtr->prev;
block->next->prev = block->prev->next = block;
}
else
startPtr = tls->bins[idx][FREE] = tls->bins[idx][GC] = block;
}
}
}
}
#endif //CV_USE_SYSTEM_MALLOC
}
} // namespace
CV_IMPL void* cvAlloc( size_t size )
{
@ -704,5 +108,4 @@ CV_IMPL void cvFree_( void* ptr )
cv::fastFree( ptr );
}
/* End of file. */

View File

@ -148,10 +148,6 @@ BinaryFunc getCopyMaskFunc(size_t esz);
/* maximal average node_count/hash_size ratio beyond which hash table is resized */
#define CV_SPARSE_HASH_RATIO 3
#if defined _WIN32
void deleteThreadAllocData();
#endif
inline Size getContinuousSize_( int flags, int cols, int rows, int widthScale )
{
int64 sz = (int64)cols * rows * widthScale;

View File

@ -1532,7 +1532,6 @@ BOOL WINAPI DllMain(HINSTANCE, DWORD fdwReason, LPVOID lpReserved)
{
// Not allowed to free resources if lpReserved is non-null
// http://msdn.microsoft.com/en-us/library/windows/desktop/ms682583.aspx
cv::deleteThreadAllocData();
cv::getTlsStorage().releaseThread();
}
}