mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-23 18:49:08 +08:00
commit
5b14121449
@ -17,8 +17,8 @@
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#endif
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
#ifdef __APPLE__
|
||||
|
@ -15,8 +15,8 @@
|
||||
#endif
|
||||
#include <float.h>
|
||||
|
||||
#include "openclwrapper.h"
|
||||
#include "oclkernels.h"
|
||||
#include "openclwrapper.h"
|
||||
|
||||
// for micro-benchmark
|
||||
#include "otsuthr.h"
|
||||
@ -627,8 +627,7 @@ static void populateGPUEnvFromDevice(GPUEnv *gpuInfo, cl_device_id device) {
|
||||
CHECK_OPENCL(clStatus, "populateGPUEnv::createCommandQueue");
|
||||
}
|
||||
|
||||
int OpenclDevice::LoadOpencl()
|
||||
{
|
||||
int OpenclDevice::LoadOpencl() {
|
||||
#ifdef WIN32
|
||||
HINSTANCE HOpenclDll = nullptr;
|
||||
void* OpenclDll = nullptr;
|
||||
@ -644,8 +643,7 @@ int OpenclDevice::LoadOpencl()
|
||||
#endif
|
||||
return 1;
|
||||
}
|
||||
int OpenclDevice::SetKernelEnv( KernelEnv *envInfo )
|
||||
{
|
||||
int OpenclDevice::SetKernelEnv(KernelEnv* envInfo) {
|
||||
envInfo->mpkContext = gpuEnv.mpContext;
|
||||
envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue;
|
||||
envInfo->mpkProgram = gpuEnv.mpArryPrograms[0];
|
||||
@ -653,9 +651,9 @@ int OpenclDevice::SetKernelEnv( KernelEnv *envInfo )
|
||||
return 1;
|
||||
}
|
||||
|
||||
static cl_mem allocateZeroCopyBuffer(const KernelEnv &rEnv, l_uint32 *hostbuffer,
|
||||
size_t nElements, cl_mem_flags flags,
|
||||
cl_int *pStatus) {
|
||||
static cl_mem allocateZeroCopyBuffer(const KernelEnv& rEnv,
|
||||
l_uint32* hostbuffer, size_t nElements,
|
||||
cl_mem_flags flags, cl_int* pStatus) {
|
||||
cl_mem membuffer =
|
||||
clCreateBuffer(rEnv.mpkContext, (cl_mem_flags)(flags),
|
||||
nElements * sizeof(l_uint32), hostbuffer, pStatus);
|
||||
@ -697,8 +695,7 @@ static Pix *mapOutputCLBuffer(const KernelEnv &rEnv, cl_mem clbuffer, Pix *pixd,
|
||||
return pixd;
|
||||
}
|
||||
|
||||
void OpenclDevice::releaseMorphCLBuffers()
|
||||
{
|
||||
void OpenclDevice::releaseMorphCLBuffers() {
|
||||
if (pixdCLIntermediate != nullptr) clReleaseMemObject(pixdCLIntermediate);
|
||||
if (pixsCLBuffer != nullptr) clReleaseMemObject(pixsCLBuffer);
|
||||
if (pixdCLBuffer != nullptr) clReleaseMemObject(pixdCLBuffer);
|
||||
@ -736,15 +733,12 @@ int OpenclDevice::initMorphCLAllocations(l_int32 wpl, l_int32 h, Pix *pixs) {
|
||||
return (int)clStatus;
|
||||
}
|
||||
|
||||
int OpenclDevice::InitEnv()
|
||||
{
|
||||
int OpenclDevice::InitEnv() {
|
||||
// PERF_COUNT_START("OD::InitEnv")
|
||||
// printf("[OD] OpenclDevice::InitEnv()\n");
|
||||
#ifdef SAL_WIN32
|
||||
while( 1 )
|
||||
{
|
||||
if( 1 == LoadOpencl() )
|
||||
break;
|
||||
while (1) {
|
||||
if (1 == LoadOpencl()) break;
|
||||
}
|
||||
PERF_COUNT_SUB("LoadOpencl")
|
||||
#endif
|
||||
@ -756,8 +750,7 @@ PERF_COUNT_SUB("LoadOpencl")
|
||||
return 1;
|
||||
}
|
||||
|
||||
int OpenclDevice::ReleaseOpenclRunEnv()
|
||||
{
|
||||
int OpenclDevice::ReleaseOpenclRunEnv() {
|
||||
ReleaseOpenclEnv(&gpuEnv);
|
||||
#ifdef SAL_WIN32
|
||||
FreeOpenclDll();
|
||||
@ -765,8 +758,7 @@ int OpenclDevice::ReleaseOpenclRunEnv()
|
||||
return 1;
|
||||
}
|
||||
|
||||
inline int OpenclDevice::AddKernelConfig( int kCount, const char *kName )
|
||||
{
|
||||
inline int OpenclDevice::AddKernelConfig(int kCount, const char* kName) {
|
||||
ASSERT_HOST(kCount > 0);
|
||||
ASSERT_HOST(strlen(kName) < sizeof(gpuEnv.mArrykernelNames[kCount - 1]));
|
||||
strcpy(gpuEnv.mArrykernelNames[kCount - 1], kName);
|
||||
@ -774,10 +766,8 @@ inline int OpenclDevice::AddKernelConfig( int kCount, const char *kName )
|
||||
return 0;
|
||||
}
|
||||
|
||||
int OpenclDevice::RegistOpenclKernel()
|
||||
{
|
||||
if ( !gpuEnv.mnIsUserCreated )
|
||||
memset( &gpuEnv, 0, sizeof(gpuEnv) );
|
||||
int OpenclDevice::RegistOpenclKernel() {
|
||||
if (!gpuEnv.mnIsUserCreated) memset(&gpuEnv, 0, sizeof(gpuEnv));
|
||||
|
||||
gpuEnv.mnFileCount = 0; // argc;
|
||||
gpuEnv.mnKernelCount = 0UL;
|
||||
@ -795,7 +785,8 @@ int OpenclDevice::InitOpenclRunEnv_DeviceSelection( int argc ) {
|
||||
cl_device_id bestDevice = bestDevice_DS.oclDeviceID;
|
||||
// overwrite global static GPUEnv with new device
|
||||
if (selectedDeviceIsOpenCL()) {
|
||||
//printf("[DS] InitOpenclRunEnv_DS::Calling populateGPUEnvFromDevice() for selected device\n");
|
||||
// printf("[DS] InitOpenclRunEnv_DS::Calling populateGPUEnvFromDevice()
|
||||
// for selected device\n");
|
||||
populateGPUEnvFromDevice(&gpuEnv, bestDevice);
|
||||
gpuEnv.mnFileCount = 0; // argc;
|
||||
gpuEnv.mnKernelCount = 0UL;
|
||||
@ -803,7 +794,8 @@ int OpenclDevice::InitOpenclRunEnv_DeviceSelection( int argc ) {
|
||||
CompileKernelFile(&gpuEnv, "");
|
||||
// PERF_COUNT_SUB("CompileKernelFile")
|
||||
} else {
|
||||
//printf("[DS] InitOpenclRunEnv_DS::Skipping populateGPUEnvFromDevice() b/c native cpu selected\n");
|
||||
// printf("[DS] InitOpenclRunEnv_DS::Skipping populateGPUEnvFromDevice()
|
||||
// b/c native cpu selected\n");
|
||||
}
|
||||
isInited = 1;
|
||||
}
|
||||
@ -811,43 +803,34 @@ int OpenclDevice::InitOpenclRunEnv_DeviceSelection( int argc ) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
OpenclDevice::OpenclDevice()
|
||||
{
|
||||
OpenclDevice::OpenclDevice() {
|
||||
// InitEnv();
|
||||
}
|
||||
|
||||
OpenclDevice::~OpenclDevice()
|
||||
{
|
||||
OpenclDevice::~OpenclDevice() {
|
||||
// ReleaseOpenclRunEnv();
|
||||
}
|
||||
|
||||
int OpenclDevice::ReleaseOpenclEnv( GPUEnv *gpuInfo )
|
||||
{
|
||||
int OpenclDevice::ReleaseOpenclEnv(GPUEnv* gpuInfo) {
|
||||
int i = 0;
|
||||
int clStatus = 0;
|
||||
|
||||
if ( !isInited )
|
||||
{
|
||||
if (!isInited) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
for ( i = 0; i < gpuEnv.mnFileCount; i++ )
|
||||
{
|
||||
if ( gpuEnv.mpArryPrograms[i] )
|
||||
{
|
||||
for (i = 0; i < gpuEnv.mnFileCount; i++) {
|
||||
if (gpuEnv.mpArryPrograms[i]) {
|
||||
clStatus = clReleaseProgram(gpuEnv.mpArryPrograms[i]);
|
||||
CHECK_OPENCL(clStatus, "clReleaseProgram");
|
||||
gpuEnv.mpArryPrograms[i] = nullptr;
|
||||
}
|
||||
}
|
||||
if ( gpuEnv.mpCmdQueue )
|
||||
{
|
||||
if (gpuEnv.mpCmdQueue) {
|
||||
clReleaseCommandQueue(gpuEnv.mpCmdQueue);
|
||||
gpuEnv.mpCmdQueue = nullptr;
|
||||
}
|
||||
if ( gpuEnv.mpContext )
|
||||
{
|
||||
if (gpuEnv.mpContext) {
|
||||
clReleaseContext(gpuEnv.mpContext);
|
||||
gpuEnv.mpContext = nullptr;
|
||||
}
|
||||
@ -856,8 +839,7 @@ int OpenclDevice::ReleaseOpenclEnv( GPUEnv *gpuInfo )
|
||||
delete[] gpuInfo->mpArryDevsID;
|
||||
return 1;
|
||||
}
|
||||
int OpenclDevice::BinaryGenerated( const char * clFileName, FILE ** fhandle )
|
||||
{
|
||||
int OpenclDevice::BinaryGenerated(const char* clFileName, FILE** fhandle) {
|
||||
unsigned int i = 0;
|
||||
cl_int clStatus;
|
||||
int status = 0;
|
||||
@ -879,15 +861,12 @@ int OpenclDevice::BinaryGenerated( const char * clFileName, FILE ** fhandle )
|
||||
*fhandle = fd;
|
||||
}
|
||||
return status;
|
||||
|
||||
}
|
||||
int OpenclDevice::CachedOfKernerPrg( const GPUEnv *gpuEnvCached, const char * clFileName )
|
||||
{
|
||||
int OpenclDevice::CachedOfKernerPrg(const GPUEnv* gpuEnvCached,
|
||||
const char* clFileName) {
|
||||
int i;
|
||||
for ( i = 0; i < gpuEnvCached->mnFileCount; i++ )
|
||||
{
|
||||
if ( strcasecmp( gpuEnvCached->mArryKnelSrcFile[i], clFileName ) == 0 )
|
||||
{
|
||||
for (i = 0; i < gpuEnvCached->mnFileCount; i++) {
|
||||
if (strcasecmp(gpuEnvCached->mArryKnelSrcFile[i], clFileName) == 0) {
|
||||
if (gpuEnvCached->mpArryPrograms[i] != nullptr) {
|
||||
return 1;
|
||||
}
|
||||
@ -896,8 +875,8 @@ int OpenclDevice::CachedOfKernerPrg( const GPUEnv *gpuEnvCached, const char * cl
|
||||
|
||||
return 0;
|
||||
}
|
||||
int OpenclDevice::WriteBinaryToFile( const char* fileName, const char* birary, size_t numBytes )
|
||||
{
|
||||
int OpenclDevice::WriteBinaryToFile(const char* fileName, const char* birary,
|
||||
size_t numBytes) {
|
||||
FILE* output = nullptr;
|
||||
output = fopen(fileName, "wb");
|
||||
if (output == nullptr) {
|
||||
@ -908,10 +887,9 @@ int OpenclDevice::WriteBinaryToFile( const char* fileName, const char* birary, s
|
||||
fclose(output);
|
||||
|
||||
return 1;
|
||||
|
||||
}
|
||||
int OpenclDevice::GeneratBinFromKernelSource( cl_program program, const char * clFileName )
|
||||
{
|
||||
int OpenclDevice::GeneratBinFromKernelSource(cl_program program,
|
||||
const char* clFileName) {
|
||||
unsigned int i = 0;
|
||||
cl_int clStatus;
|
||||
size_t* binarySizes;
|
||||
@ -944,33 +922,27 @@ int OpenclDevice::GeneratBinFromKernelSource( cl_program program, const char * c
|
||||
/* copy over all of the generated binaries. */
|
||||
std::vector<char*> binaries(numDevices);
|
||||
|
||||
for ( i = 0; i < numDevices; i++ )
|
||||
{
|
||||
if ( binarySizes[i] != 0 )
|
||||
{
|
||||
for (i = 0; i < numDevices; i++) {
|
||||
if (binarySizes[i] != 0) {
|
||||
binaries[i] = (char*)malloc(sizeof(char) * binarySizes[i]);
|
||||
if (binaries[i] == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
binaries[i] = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
clStatus = clGetProgramInfo(program, CL_PROGRAM_BINARIES,
|
||||
sizeof(char *) * numDevices,
|
||||
clStatus =
|
||||
clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(char*) * numDevices,
|
||||
&binaries[0], nullptr);
|
||||
CHECK_OPENCL(clStatus, "clGetProgramInfo");
|
||||
|
||||
/* dump out each binary into its own separate file. */
|
||||
for ( i = 0; i < numDevices; i++ )
|
||||
{
|
||||
for (i = 0; i < numDevices; i++) {
|
||||
char fileName[256] = {0}, cl_name[128] = {0};
|
||||
|
||||
if ( binarySizes[i] != 0 )
|
||||
{
|
||||
if (binarySizes[i] != 0) {
|
||||
char deviceName[1024];
|
||||
clStatus = clGetDeviceInfo(mpArryDevsID[i], CL_DEVICE_NAME,
|
||||
sizeof(deviceName), deviceName, nullptr);
|
||||
@ -981,8 +953,7 @@ int OpenclDevice::GeneratBinFromKernelSource( cl_program program, const char * c
|
||||
cl_name[str - clFileName] = '\0';
|
||||
sprintf(fileName, "%s-%s.bin", cl_name, deviceName);
|
||||
legalizeFileName(fileName);
|
||||
if ( !WriteBinaryToFile( fileName, binaries[i], binarySizes[i] ) )
|
||||
{
|
||||
if (!WriteBinaryToFile(fileName, binaries[i], binarySizes[i])) {
|
||||
printf("[OD] write binary[%s] failed\n", fileName);
|
||||
return 0;
|
||||
} // else
|
||||
@ -991,8 +962,7 @@ int OpenclDevice::GeneratBinFromKernelSource( cl_program program, const char * c
|
||||
}
|
||||
|
||||
// Release all resources and memory
|
||||
for ( i = 0; i < numDevices; i++ )
|
||||
{
|
||||
for (i = 0; i < numDevices; i++) {
|
||||
free(binaries[i]);
|
||||
}
|
||||
|
||||
@ -1005,8 +975,7 @@ int OpenclDevice::GeneratBinFromKernelSource( cl_program program, const char * c
|
||||
return 1;
|
||||
}
|
||||
|
||||
int OpenclDevice::CompileKernelFile( GPUEnv *gpuInfo, const char *buildOption )
|
||||
{
|
||||
int OpenclDevice::CompileKernelFile(GPUEnv* gpuInfo, const char* buildOption) {
|
||||
// PERF_COUNT_START("CompileKernelFile")
|
||||
cl_int clStatus = 0;
|
||||
size_t length;
|
||||
@ -1018,8 +987,7 @@ int OpenclDevice::CompileKernelFile( GPUEnv *gpuInfo, const char *buildOption )
|
||||
FILE *fd, *fd1;
|
||||
const char* filename = "kernel.cl";
|
||||
// fprintf(stderr, "[OD] CompileKernelFile ... \n");
|
||||
if ( CachedOfKernerPrg(gpuInfo, filename) == 1 )
|
||||
{
|
||||
if (CachedOfKernerPrg(gpuInfo, filename) == 1) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -1029,10 +997,10 @@ int OpenclDevice::CompileKernelFile( GPUEnv *gpuInfo, const char *buildOption )
|
||||
|
||||
source_size[0] = strlen(source);
|
||||
binaryExisted = 0;
|
||||
binaryExisted = BinaryGenerated( filename, &fd ); // don't check for binary during microbenchmark
|
||||
binaryExisted = BinaryGenerated(
|
||||
filename, &fd); // don't check for binary during microbenchmark
|
||||
// PERF_COUNT_SUB("BinaryGenerated")
|
||||
if ( binaryExisted == 1 )
|
||||
{
|
||||
if (binaryExisted == 1) {
|
||||
clStatus = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_NUM_DEVICES,
|
||||
sizeof(numDevices), &numDevices, nullptr);
|
||||
CHECK_OPENCL(clStatus, "clGetContextInfo");
|
||||
@ -1044,8 +1012,7 @@ int OpenclDevice::CompileKernelFile( GPUEnv *gpuInfo, const char *buildOption )
|
||||
b_error |= fseek(fd, 0, SEEK_END) < 0;
|
||||
b_error |= (length = ftell(fd)) <= 0;
|
||||
b_error |= fseek(fd, 0, SEEK_SET) < 0;
|
||||
if ( b_error )
|
||||
{
|
||||
if (b_error) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1065,19 +1032,17 @@ int OpenclDevice::CompileKernelFile( GPUEnv *gpuInfo, const char *buildOption )
|
||||
// PERF_COUNT_SUB("get devices")
|
||||
// fprintf(stderr, "[OD] Create kernel from binary\n");
|
||||
const uint8_t* c_binary = &binary[0];
|
||||
gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary( gpuInfo->mpContext,numDevices,
|
||||
&mpArryDevsID[0], &length, &c_binary,
|
||||
gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary(
|
||||
gpuInfo->mpContext, numDevices, &mpArryDevsID[0], &length, &c_binary,
|
||||
&binary_status, &clStatus);
|
||||
CHECK_OPENCL(clStatus, "clCreateProgramWithBinary");
|
||||
// PERF_COUNT_SUB("clCreateProgramWithBinary")
|
||||
// PERF_COUNT_SUB("binaryExisted")
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
// create a CL program using the kernel source
|
||||
// fprintf(stderr, "[OD] Create kernel from source\n");
|
||||
gpuInfo->mpArryPrograms[idx] = clCreateProgramWithSource( gpuInfo->mpContext, 1, &source,
|
||||
source_size, &clStatus);
|
||||
gpuInfo->mpArryPrograms[idx] = clCreateProgramWithSource(
|
||||
gpuInfo->mpContext, 1, &source, source_size, &clStatus);
|
||||
CHECK_OPENCL(clStatus, "clCreateProgramWithSource");
|
||||
// PERF_COUNT_SUB("!binaryExisted")
|
||||
}
|
||||
@ -1090,38 +1055,30 @@ int OpenclDevice::CompileKernelFile( GPUEnv *gpuInfo, const char *buildOption )
|
||||
// create a cl program executable for all the devices specified
|
||||
// printf("[OD] BuildProgram.\n");
|
||||
PERF_COUNT_START("OD::CompileKernel::clBuildProgram")
|
||||
if (!gpuInfo->mnIsUserCreated)
|
||||
{
|
||||
if (!gpuInfo->mnIsUserCreated) {
|
||||
clStatus =
|
||||
clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID,
|
||||
buildOption, nullptr, nullptr);
|
||||
// PERF_COUNT_SUB("clBuildProgram notUserCreated")
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
clStatus =
|
||||
clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &(gpuInfo->mpDevID),
|
||||
buildOption, nullptr, nullptr);
|
||||
// PERF_COUNT_SUB("clBuildProgram isUserCreated")
|
||||
}
|
||||
PERF_COUNT_END
|
||||
if ( clStatus != CL_SUCCESS )
|
||||
{
|
||||
if (clStatus != CL_SUCCESS) {
|
||||
printf("BuildProgram error!\n");
|
||||
if ( !gpuInfo->mnIsUserCreated )
|
||||
{
|
||||
if (!gpuInfo->mnIsUserCreated) {
|
||||
clStatus = clGetProgramBuildInfo(
|
||||
gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
|
||||
CL_PROGRAM_BUILD_LOG, 0, nullptr, &length);
|
||||
}
|
||||
else
|
||||
{
|
||||
clStatus = clGetProgramBuildInfo(
|
||||
gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
|
||||
} else {
|
||||
clStatus =
|
||||
clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
|
||||
CL_PROGRAM_BUILD_LOG, 0, nullptr, &length);
|
||||
}
|
||||
if ( clStatus != CL_SUCCESS )
|
||||
{
|
||||
if (clStatus != CL_SUCCESS) {
|
||||
printf("opencl create build log fail\n");
|
||||
return 0;
|
||||
}
|
||||
@ -1129,18 +1086,16 @@ PERF_COUNT_END
|
||||
if (buildLog == (char*)nullptr) {
|
||||
return 0;
|
||||
}
|
||||
if ( !gpuInfo->mnIsUserCreated )
|
||||
{
|
||||
clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
|
||||
if (!gpuInfo->mnIsUserCreated) {
|
||||
clStatus = clGetProgramBuildInfo(
|
||||
gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
|
||||
CL_PROGRAM_BUILD_LOG, length, buildLog, &length);
|
||||
} else {
|
||||
clStatus = clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx],
|
||||
gpuInfo->mpDevID, CL_PROGRAM_BUILD_LOG,
|
||||
length, buildLog, &length);
|
||||
}
|
||||
else
|
||||
{
|
||||
clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
|
||||
CL_PROGRAM_BUILD_LOG, length, buildLog, &length );
|
||||
}
|
||||
if ( clStatus != CL_SUCCESS )
|
||||
{
|
||||
if (clStatus != CL_SUCCESS) {
|
||||
printf("opencl program build info fail\n");
|
||||
return 0;
|
||||
}
|
||||
@ -1168,8 +1123,9 @@ PERF_COUNT_END
|
||||
return 1;
|
||||
}
|
||||
|
||||
l_uint32* OpenclDevice::pixReadFromTiffKernel(l_uint32 *tiffdata,l_int32 w,l_int32 h,l_int32 wpl,l_uint32 *line)
|
||||
{
|
||||
l_uint32* OpenclDevice::pixReadFromTiffKernel(l_uint32* tiffdata, l_int32 w,
|
||||
l_int32 h, l_int32 wpl,
|
||||
l_uint32* line) {
|
||||
PERF_COUNT_START("pixReadFromTiffKernel")
|
||||
cl_int clStatus;
|
||||
KernelEnv rEnv;
|
||||
@ -1190,12 +1146,15 @@ PERF_COUNT_START("pixReadFromTiffKernel")
|
||||
SetKernelEnv(&rEnv);
|
||||
|
||||
l_uint32* pResult = (l_uint32*)malloc(w * h * sizeof(l_uint32));
|
||||
rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "composeRGBPixel", &clStatus );
|
||||
rEnv.mpkKernel =
|
||||
clCreateKernel(rEnv.mpkProgram, "composeRGBPixel", &clStatus);
|
||||
CHECK_OPENCL(clStatus, "clCreateKernel composeRGBPixel");
|
||||
|
||||
// Allocate input and output OCL buffers
|
||||
valuesCl = allocateZeroCopyBuffer(rEnv, tiffdata, w*h, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &clStatus);
|
||||
outputCl = allocateZeroCopyBuffer(rEnv, pResult, w*h, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, &clStatus);
|
||||
valuesCl = allocateZeroCopyBuffer(
|
||||
rEnv, tiffdata, w * h, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &clStatus);
|
||||
outputCl = allocateZeroCopyBuffer(
|
||||
rEnv, pResult, w * h, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, &clStatus);
|
||||
|
||||
// Kernel arguments
|
||||
clStatus = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &valuesCl);
|
||||
@ -1217,9 +1176,9 @@ clStatus =
|
||||
CHECK_OPENCL(clStatus, "clEnqueueNDRangeKernel");
|
||||
|
||||
/* map results back from gpu */
|
||||
void *ptr = clEnqueueMapBuffer(rEnv.mpkCmdQueue, outputCl, CL_TRUE, CL_MAP_READ,
|
||||
0, w * h * sizeof(l_uint32), 0, nullptr, nullptr,
|
||||
&clStatus);
|
||||
void* ptr = clEnqueueMapBuffer(rEnv.mpkCmdQueue, outputCl, CL_TRUE,
|
||||
CL_MAP_READ, 0, w * h * sizeof(l_uint32), 0,
|
||||
nullptr, nullptr, &clStatus);
|
||||
CHECK_OPENCL(clStatus, "clEnqueueMapBuffer outputCl");
|
||||
clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, outputCl, ptr, 0, nullptr, nullptr);
|
||||
|
||||
@ -1230,7 +1189,8 @@ PERF_COUNT_END
|
||||
return pResult;
|
||||
}
|
||||
|
||||
//Morphology Dilate operation for 5x5 structuring element. Invokes the relevant OpenCL kernels
|
||||
// Morphology Dilate operation for 5x5 structuring element. Invokes the relevant
|
||||
// OpenCL kernels
|
||||
static cl_int pixDilateCL_55(l_int32 wpl, l_int32 h) {
|
||||
size_t globalThreads[2];
|
||||
cl_mem pixtemp;
|
||||
@ -1286,7 +1246,8 @@ static cl_int pixDilateCL_55(l_int32 wpl, l_int32 h) {
|
||||
return status;
|
||||
}
|
||||
|
||||
//Morphology Erode operation for 5x5 structuring element. Invokes the relevant OpenCL kernels
|
||||
// Morphology Erode operation for 5x5 structuring element. Invokes the relevant
|
||||
// OpenCL kernels
|
||||
static cl_int pixErodeCL_55(l_int32 wpl, l_int32 h) {
|
||||
size_t globalThreads[2];
|
||||
cl_mem pixtemp;
|
||||
@ -1624,7 +1585,8 @@ static cl_int pixSubtractCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1,
|
||||
}
|
||||
|
||||
// OpenCL implementation of Get Lines from pix function
|
||||
//Note: Assumes the source and dest opencl buffer are initialized. No check done
|
||||
// Note: Assumes the source and dest opencl buffer are initialized. No check
|
||||
// done
|
||||
void OpenclDevice::pixGetLinesCL(Pix* pixd, Pix* pixs, Pix** pix_vline,
|
||||
Pix** pix_hline, Pix** pixClosed,
|
||||
bool getpixClosed, l_int32 close_hsize,
|
||||
@ -1781,19 +1743,23 @@ int OpenclDevice::HistogramRectOCL(unsigned char *imageData,
|
||||
CHECK_OPENCL(clStatus,
|
||||
"clCreateKernel kernel_HistogramRectOneChannelReduction");
|
||||
} else {
|
||||
histKern.mpkKernel = clCreateKernel( histKern.mpkProgram, "kernel_HistogramRectAllChannels", &clStatus );
|
||||
histKern.mpkKernel = clCreateKernel(
|
||||
histKern.mpkProgram, "kernel_HistogramRectAllChannels", &clStatus);
|
||||
CHECK_OPENCL(clStatus, "clCreateKernel kernel_HistogramRectAllChannels");
|
||||
|
||||
histRedKern.mpkKernel = clCreateKernel( histRedKern.mpkProgram, "kernel_HistogramRectAllChannelsReduction", &clStatus );
|
||||
CHECK_OPENCL( clStatus, "clCreateKernel kernel_HistogramRectAllChannelsReduction");
|
||||
histRedKern.mpkKernel =
|
||||
clCreateKernel(histRedKern.mpkProgram,
|
||||
"kernel_HistogramRectAllChannelsReduction", &clStatus);
|
||||
CHECK_OPENCL(clStatus,
|
||||
"clCreateKernel kernel_HistogramRectAllChannelsReduction");
|
||||
}
|
||||
|
||||
void* ptr;
|
||||
|
||||
// Initialize tmpHistogramBuffer buffer
|
||||
ptr = clEnqueueMapBuffer(
|
||||
histKern.mpkCmdQueue, tmpHistogramBuffer, CL_TRUE, CL_MAP_WRITE, 0,
|
||||
tmpHistogramBins * sizeof(cl_uint), 0, nullptr, nullptr, &clStatus);
|
||||
ptr = clEnqueueMapBuffer(histKern.mpkCmdQueue, tmpHistogramBuffer, CL_TRUE,
|
||||
CL_MAP_WRITE, 0, tmpHistogramBins * sizeof(cl_uint),
|
||||
0, nullptr, nullptr, &clStatus);
|
||||
CHECK_OPENCL(clStatus, "clEnqueueMapBuffer tmpHistogramBuffer");
|
||||
|
||||
memset(ptr, 0, tmpHistogramBins * sizeof(cl_uint));
|
||||
@ -1805,8 +1771,7 @@ int OpenclDevice::HistogramRectOCL(unsigned char *imageData,
|
||||
clSetKernelArg(histKern.mpkKernel, 0, sizeof(cl_mem), &imageBuffer);
|
||||
CHECK_OPENCL(clStatus, "clSetKernelArg imageBuffer");
|
||||
cl_uint numPixels = width * height;
|
||||
clStatus =
|
||||
clSetKernelArg(histKern.mpkKernel, 1, sizeof(cl_uint), &numPixels);
|
||||
clStatus = clSetKernelArg(histKern.mpkKernel, 1, sizeof(cl_uint), &numPixels);
|
||||
CHECK_OPENCL(clStatus, "clSetKernelArg numPixels");
|
||||
clStatus = clSetKernelArg(histKern.mpkKernel, 2, sizeof(cl_mem),
|
||||
&tmpHistogramBuffer);
|
||||
@ -1826,8 +1791,8 @@ int OpenclDevice::HistogramRectOCL(unsigned char *imageData,
|
||||
/* launch histogram */
|
||||
PERF_COUNT_SUB("before")
|
||||
clStatus = clEnqueueNDRangeKernel(histKern.mpkCmdQueue, histKern.mpkKernel, 1,
|
||||
nullptr, global_work_size, local_work_size, 0,
|
||||
nullptr, nullptr);
|
||||
nullptr, global_work_size, local_work_size,
|
||||
0, nullptr, nullptr);
|
||||
CHECK_OPENCL(clStatus,
|
||||
"clEnqueueNDRangeKernel kernel_HistogramRectAllChannels");
|
||||
clFinish(histKern.mpkCmdQueue);
|
||||
@ -1838,7 +1803,9 @@ if (clStatus != 0) {
|
||||
clStatus = clEnqueueNDRangeKernel(
|
||||
histRedKern.mpkCmdQueue, histRedKern.mpkKernel, 1, nullptr,
|
||||
red_global_work_size, local_work_size, 0, nullptr, nullptr);
|
||||
CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel kernel_HistogramRectAllChannelsReduction" );
|
||||
CHECK_OPENCL(
|
||||
clStatus,
|
||||
"clEnqueueNDRangeKernel kernel_HistogramRectAllChannelsReduction");
|
||||
clFinish(histRedKern.mpkCmdQueue);
|
||||
if (clStatus != 0) {
|
||||
retVal = -1;
|
||||
@ -1982,8 +1949,6 @@ int OpenclDevice::ThresholdRectToPixOCL(unsigned char *imageData,
|
||||
return retVal;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/******************************************************************************
|
||||
* Data Types for Device Selection
|
||||
*****************************************************************************/
|
||||
@ -2108,7 +2073,9 @@ static double composeRGBPixelMicroBench(GPUEnv *env,
|
||||
timespec time_funct_start, time_funct_end;
|
||||
#endif
|
||||
// input data
|
||||
l_uint32 *tiffdata = (l_uint32 *)input.imageData;// same size and random data; data doesn't change workload
|
||||
l_uint32* tiffdata =
|
||||
(l_uint32*)input.imageData; // same size and random data; data doesn't
|
||||
// change workload
|
||||
|
||||
// function call
|
||||
if (type == DS_DEVICE_OPENCL_DEVICE) {
|
||||
@ -2126,13 +2093,15 @@ static double composeRGBPixelMicroBench(GPUEnv *env,
|
||||
wpl, nullptr);
|
||||
#if ON_WINDOWS
|
||||
QueryPerformanceCounter(&time_funct_end);
|
||||
time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart);
|
||||
time = (time_funct_end.QuadPart - time_funct_start.QuadPart) /
|
||||
(double)(freq.QuadPart);
|
||||
#elif ON_APPLE
|
||||
stop = mach_absolute_time();
|
||||
time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9;
|
||||
#else
|
||||
clock_gettime(CLOCK_MONOTONIC, &time_funct_end);
|
||||
time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
|
||||
time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 +
|
||||
(time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0;
|
||||
#endif
|
||||
|
||||
} else {
|
||||
@ -2160,18 +2129,19 @@ static double composeRGBPixelMicroBench(GPUEnv *env,
|
||||
}
|
||||
#if ON_WINDOWS
|
||||
QueryPerformanceCounter(&time_funct_end);
|
||||
time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart);
|
||||
time = (time_funct_end.QuadPart - time_funct_start.QuadPart) /
|
||||
(double)(freq.QuadPart);
|
||||
#elif ON_APPLE
|
||||
stop = mach_absolute_time();
|
||||
time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9;
|
||||
#else
|
||||
clock_gettime(CLOCK_MONOTONIC, &time_funct_end);
|
||||
time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
|
||||
time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 +
|
||||
(time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0;
|
||||
#endif
|
||||
pixDestroy(&pix);
|
||||
}
|
||||
|
||||
|
||||
// cleanup
|
||||
|
||||
return time;
|
||||
@ -2214,7 +2184,8 @@ static double histogramRectMicroBench(GPUEnv *env,
|
||||
|
||||
#if ON_WINDOWS
|
||||
QueryPerformanceCounter(&time_funct_end);
|
||||
time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart);
|
||||
time = (time_funct_end.QuadPart - time_funct_start.QuadPart) /
|
||||
(double)(freq.QuadPart);
|
||||
#elif ON_APPLE
|
||||
stop = mach_absolute_time();
|
||||
if (retVal == 0) {
|
||||
@ -2224,7 +2195,8 @@ static double histogramRectMicroBench(GPUEnv *env,
|
||||
}
|
||||
#else
|
||||
clock_gettime(CLOCK_MONOTONIC, &time_funct_end);
|
||||
time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
|
||||
time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 +
|
||||
(time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0;
|
||||
#endif
|
||||
} else {
|
||||
int* histogram = new int[kHistogramSize];
|
||||
@ -2241,13 +2213,15 @@ static double histogramRectMicroBench(GPUEnv *env,
|
||||
}
|
||||
#if ON_WINDOWS
|
||||
QueryPerformanceCounter(&time_funct_end);
|
||||
time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart);
|
||||
time = (time_funct_end.QuadPart - time_funct_start.QuadPart) /
|
||||
(double)(freq.QuadPart);
|
||||
#elif ON_APPLE
|
||||
stop = mach_absolute_time();
|
||||
time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9;
|
||||
#else
|
||||
clock_gettime(CLOCK_MONOTONIC, &time_funct_end);
|
||||
time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
|
||||
time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 +
|
||||
(time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0;
|
||||
#endif
|
||||
delete[] histogram;
|
||||
}
|
||||
@ -2270,8 +2244,8 @@ static void ThresholdRectToPix_Native(const unsigned char *imagedata,
|
||||
*pix = pixCreate(width, height, 1);
|
||||
uint32_t* pixdata = pixGetData(*pix);
|
||||
int wpl = pixGetWpl(*pix);
|
||||
const unsigned char* srcdata = imagedata + top * bytes_per_line +
|
||||
left * bytes_per_pixel;
|
||||
const unsigned char* srcdata =
|
||||
imagedata + top * bytes_per_line + left * bytes_per_pixel;
|
||||
for (int y = 0; y < height; ++y) {
|
||||
const uint8_t* linedata = srcdata;
|
||||
uint32_t* pixline = pixdata + y * wpl;
|
||||
@ -2310,12 +2284,7 @@ static double thresholdRectToPixMicroBench(GPUEnv *env,
|
||||
|
||||
// input data
|
||||
unsigned char pixelHi = (unsigned char)255;
|
||||
int thresholds[4] = {
|
||||
pixelHi,
|
||||
pixelHi,
|
||||
pixelHi,
|
||||
pixelHi
|
||||
};
|
||||
int thresholds[4] = {pixelHi, pixelHi, pixelHi, pixelHi};
|
||||
|
||||
// Pix* pix = pixCreate(width, height, 1);
|
||||
int top = 0;
|
||||
@ -2340,7 +2309,8 @@ static double thresholdRectToPixMicroBench(GPUEnv *env,
|
||||
|
||||
#if ON_WINDOWS
|
||||
QueryPerformanceCounter(&time_funct_end);
|
||||
time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart);
|
||||
time = (time_funct_end.QuadPart - time_funct_start.QuadPart) /
|
||||
(double)(freq.QuadPart);
|
||||
#elif ON_APPLE
|
||||
stop = mach_absolute_time();
|
||||
if (retVal == 0) {
|
||||
@ -2352,11 +2322,10 @@ static double thresholdRectToPixMicroBench(GPUEnv *env,
|
||||
|
||||
#else
|
||||
clock_gettime(CLOCK_MONOTONIC, &time_funct_end);
|
||||
time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
|
||||
time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 +
|
||||
(time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0;
|
||||
#endif
|
||||
} else {
|
||||
|
||||
|
||||
tesseract::ImageThresholder thresholder;
|
||||
thresholder.SetImage(input.pix);
|
||||
#if ON_WINDOWS
|
||||
@ -2367,18 +2336,21 @@ static double thresholdRectToPixMicroBench(GPUEnv *env,
|
||||
clock_gettime(CLOCK_MONOTONIC, &time_funct_start);
|
||||
#endif
|
||||
int hi_values[4] = {};
|
||||
ThresholdRectToPix_Native( input.imageData, input.numChannels, bytes_per_line,
|
||||
thresholds, hi_values, &input.pix );
|
||||
ThresholdRectToPix_Native(input.imageData, input.numChannels,
|
||||
bytes_per_line, thresholds, hi_values,
|
||||
&input.pix);
|
||||
|
||||
#if ON_WINDOWS
|
||||
QueryPerformanceCounter(&time_funct_end);
|
||||
time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart);
|
||||
time = (time_funct_end.QuadPart - time_funct_start.QuadPart) /
|
||||
(double)(freq.QuadPart);
|
||||
#elif ON_APPLE
|
||||
stop = mach_absolute_time();
|
||||
time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9;
|
||||
#else
|
||||
clock_gettime(CLOCK_MONOTONIC, &time_funct_end);
|
||||
time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
|
||||
time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 +
|
||||
(time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -2421,22 +2393,24 @@ static double getLineMasksMorphMicroBench(GPUEnv *env,
|
||||
OpenclDevice::gpuEnv = *env;
|
||||
OpenclDevice::initMorphCLAllocations(wpl, input.height, input.pix);
|
||||
Pix *pix_vline = nullptr, *pix_hline = nullptr, *pix_closed = nullptr;
|
||||
OpenclDevice::pixGetLinesCL(
|
||||
nullptr, input.pix, &pix_vline, &pix_hline, &pix_closed, true,
|
||||
closing_brick, closing_brick, max_line_width, max_line_width,
|
||||
min_line_length, min_line_length);
|
||||
OpenclDevice::pixGetLinesCL(nullptr, input.pix, &pix_vline, &pix_hline,
|
||||
&pix_closed, true, closing_brick, closing_brick,
|
||||
max_line_width, max_line_width, min_line_length,
|
||||
min_line_length);
|
||||
|
||||
OpenclDevice::releaseMorphCLBuffers();
|
||||
|
||||
#if ON_WINDOWS
|
||||
QueryPerformanceCounter(&time_funct_end);
|
||||
time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart);
|
||||
time = (time_funct_end.QuadPart - time_funct_start.QuadPart) /
|
||||
(double)(freq.QuadPart);
|
||||
#elif ON_APPLE
|
||||
stop = mach_absolute_time();
|
||||
time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9;
|
||||
#else
|
||||
clock_gettime(CLOCK_MONOTONIC, &time_funct_end);
|
||||
time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
|
||||
time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 +
|
||||
(time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0;
|
||||
#endif
|
||||
} else {
|
||||
#if ON_WINDOWS
|
||||
@ -2461,21 +2435,21 @@ static double getLineMasksMorphMicroBench(GPUEnv *env,
|
||||
|
||||
#if ON_WINDOWS
|
||||
QueryPerformanceCounter(&time_funct_end);
|
||||
time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart);
|
||||
time = (time_funct_end.QuadPart - time_funct_start.QuadPart) /
|
||||
(double)(freq.QuadPart);
|
||||
#elif ON_APPLE
|
||||
stop = mach_absolute_time();
|
||||
time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9;
|
||||
#else
|
||||
clock_gettime(CLOCK_MONOTONIC, &time_funct_end);
|
||||
time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
|
||||
time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 +
|
||||
(time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
return time;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/******************************************************************************
|
||||
* Device Selection
|
||||
*****************************************************************************/
|
||||
@ -2693,7 +2667,6 @@ ds_device OpenclDevice::getDeviceSelection( ) {
|
||||
return selectedDevice;
|
||||
}
|
||||
|
||||
|
||||
bool OpenclDevice::selectedDeviceIsOpenCL() {
|
||||
ds_device device = getDeviceSelection();
|
||||
return (device.type == DS_DEVICE_OPENCL_DEVICE);
|
||||
|
@ -72,19 +72,20 @@
|
||||
* 0 - no reporting
|
||||
* 1 - no reporting
|
||||
* 2 - report total function call time for functions we're tracking
|
||||
* 3 - optionally report breakdown of function calls (kernel launch, kernel time, data copies)
|
||||
* 3 - optionally report breakdown of function calls (kernel launch, kernel
|
||||
*time, data copies)
|
||||
************************************************************************************/
|
||||
#define PERF_COUNT_VERBOSE 1
|
||||
#define PERF_COUNT_REPORT_STR "[%36s], %24s, %11.6f\n"
|
||||
|
||||
|
||||
#if ON_WINDOWS
|
||||
|
||||
#if PERF_COUNT_VERBOSE >= 2
|
||||
#define PERF_COUNT_START(FUNCT_NAME) \
|
||||
char* funct_name = FUNCT_NAME; \
|
||||
double elapsed_time_sec; \
|
||||
LARGE_INTEGER freq, time_funct_start, time_funct_end, time_sub_start, time_sub_end; \
|
||||
LARGE_INTEGER freq, time_funct_start, time_funct_end, time_sub_start, \
|
||||
time_sub_end; \
|
||||
QueryPerformanceFrequency(&freq); \
|
||||
QueryPerformanceCounter(&time_funct_start); \
|
||||
time_sub_start = time_funct_start; \
|
||||
@ -111,7 +112,6 @@
|
||||
#define PERF_COUNT_SUB(SUB)
|
||||
#endif
|
||||
|
||||
|
||||
// not on windows
|
||||
#else
|
||||
|
||||
@ -169,8 +169,7 @@
|
||||
#define GROUPSIZE_HMORX 256
|
||||
#define GROUPSIZE_HMORY 1
|
||||
|
||||
typedef struct _KernelEnv
|
||||
{
|
||||
typedef struct _KernelEnv {
|
||||
cl_context mpkContext;
|
||||
cl_command_queue mpkCmdQueue;
|
||||
cl_program mpkProgram;
|
||||
@ -178,8 +177,7 @@ typedef struct _KernelEnv
|
||||
char mckKernelName[150];
|
||||
} KernelEnv;
|
||||
|
||||
typedef struct _OpenCLEnv
|
||||
{
|
||||
typedef struct _OpenCLEnv {
|
||||
cl_platform_id mpOclPlatformID;
|
||||
cl_context mpOclContext;
|
||||
cl_device_id mpOclDevsID;
|
||||
@ -188,14 +186,11 @@ typedef struct _OpenCLEnv
|
||||
typedef int (*cl_kernel_function)(void** userdata, KernelEnv* kenv);
|
||||
|
||||
#define CHECK_OPENCL(status, name) \
|
||||
if( status != CL_SUCCESS ) \
|
||||
{ \
|
||||
if (status != CL_SUCCESS) { \
|
||||
printf("OpenCL error code is %d at when %s .\n", status, name); \
|
||||
}
|
||||
|
||||
|
||||
typedef struct _GPUEnv
|
||||
{
|
||||
typedef struct _GPUEnv {
|
||||
// share vb in all modules in hb library
|
||||
cl_platform_id mpPlatformID;
|
||||
cl_device_type mDevType;
|
||||
@ -204,39 +199,47 @@ typedef struct _GPUEnv
|
||||
cl_device_id mpDevID;
|
||||
cl_command_queue mpCmdQueue;
|
||||
cl_kernel mpArryKernels[MAX_CLFILE_NUM];
|
||||
cl_program mpArryPrograms[MAX_CLFILE_NUM]; //one program object maps one kernel source file
|
||||
char mArryKnelSrcFile[MAX_CLFILE_NUM][256], //the max len of kernel file name is 256
|
||||
cl_program mpArryPrograms[MAX_CLFILE_NUM]; // one program object maps one
|
||||
// kernel source file
|
||||
char mArryKnelSrcFile[MAX_CLFILE_NUM]
|
||||
[256], // the max len of kernel file name is 256
|
||||
mArrykernelNames[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN + 1];
|
||||
cl_kernel_function mpArryKnelFuncs[MAX_CLKERNEL_NUM];
|
||||
int mnKernelCount, mnFileCount, // only one kernel file
|
||||
mnIsUserCreated; // 1: created , 0:no create and needed to create by opencl wrapper
|
||||
mnIsUserCreated; // 1: created , 0:no create and needed to create by
|
||||
// opencl wrapper
|
||||
int mnKhrFp64Flag;
|
||||
int mnAmdFp64Flag;
|
||||
|
||||
} GPUEnv;
|
||||
|
||||
|
||||
class OpenclDevice
|
||||
{
|
||||
|
||||
class OpenclDevice {
|
||||
public:
|
||||
static GPUEnv gpuEnv;
|
||||
static int isInited;
|
||||
OpenclDevice();
|
||||
~OpenclDevice();
|
||||
static int InitEnv(); // load dll, call InitOpenclRunEnv(0)
|
||||
static int InitOpenclRunEnv( int argc ); // RegistOpenclKernel, double flags, compile kernels
|
||||
static int InitOpenclRunEnv_DeviceSelection( int argc ); // RegistOpenclKernel, double flags, compile kernels
|
||||
static int InitOpenclRunEnv(
|
||||
int argc); // RegistOpenclKernel, double flags, compile kernels
|
||||
static int InitOpenclRunEnv_DeviceSelection(
|
||||
int argc); // RegistOpenclKernel, double flags, compile kernels
|
||||
static int RegistOpenclKernel();
|
||||
static int ReleaseOpenclRunEnv();
|
||||
static int ReleaseOpenclEnv(GPUEnv* gpuInfo);
|
||||
static int CompileKernelFile(GPUEnv* gpuInfo, const char* buildOption);
|
||||
static int CachedOfKernerPrg( const GPUEnv *gpuEnvCached, const char * clFileName );
|
||||
static int GeneratBinFromKernelSource( cl_program program, const char * clFileName );
|
||||
static int WriteBinaryToFile( const char* fileName, const char* birary, size_t numBytes );
|
||||
static int CachedOfKernerPrg(const GPUEnv* gpuEnvCached,
|
||||
const char* clFileName);
|
||||
static int GeneratBinFromKernelSource(cl_program program,
|
||||
const char* clFileName);
|
||||
static int WriteBinaryToFile(const char* fileName, const char* birary,
|
||||
size_t numBytes);
|
||||
static int BinaryGenerated(const char* clFileName, FILE** fhandle);
|
||||
//static int CompileKernelFile( const char *filename, GPUEnv *gpuInfo, const char *buildOption );
|
||||
static l_uint32* pixReadFromTiffKernel(l_uint32 *tiffdata,l_int32 w,l_int32 h,l_int32 wpl, l_uint32 *line);
|
||||
// static int CompileKernelFile( const char *filename, GPUEnv *gpuInfo, const
|
||||
// char *buildOption );
|
||||
static l_uint32* pixReadFromTiffKernel(l_uint32* tiffdata, l_int32 w,
|
||||
l_int32 h, l_int32 wpl,
|
||||
l_uint32* line);
|
||||
static int composeRGBPixelCl(int* tiffdata, int* line, int h, int w);
|
||||
|
||||
/* OpenCL implementations of Morphological operations*/
|
||||
@ -246,11 +249,10 @@ public:
|
||||
static void releaseMorphCLBuffers();
|
||||
|
||||
static void pixGetLinesCL(Pix* pixd, Pix* pixs, Pix** pix_vline,
|
||||
Pix **pix_hline, Pix **pixClosed,
|
||||
bool getpixClosed, l_int32 close_hsize,
|
||||
l_int32 close_vsize, l_int32 open_hsize,
|
||||
l_int32 open_vsize, l_int32 line_hsize,
|
||||
l_int32 line_vsize);
|
||||
Pix** pix_hline, Pix** pixClosed, bool getpixClosed,
|
||||
l_int32 close_hsize, l_int32 close_vsize,
|
||||
l_int32 open_hsize, l_int32 open_vsize,
|
||||
l_int32 line_hsize, l_int32 line_vsize);
|
||||
|
||||
// int InitOpenclAttr( OpenCLEnv * env );
|
||||
// int ReleaseKernel( KernelEnv * env );
|
||||
@ -259,9 +261,10 @@ public:
|
||||
// int RunKernel( const char *kernelName, void **userdata );
|
||||
// int ConvertToString( const char *filename, char **source );
|
||||
// int CheckKernelName( KernelEnv *envInfo, const char *kernelName );
|
||||
//int RegisterKernelWrapper( const char *kernelName, cl_kernel_function function );
|
||||
//int RunKernelWrapper( cl_kernel_function function, const char * kernelName, void **usrdata );
|
||||
//int GetKernelEnvAndFunc( const char *kernelName, KernelEnv *env, cl_kernel_function *function );
|
||||
// int RegisterKernelWrapper( const char *kernelName, cl_kernel_function
|
||||
// function ); int RunKernelWrapper( cl_kernel_function function, const char *
|
||||
// kernelName, void **usrdata ); int GetKernelEnvAndFunc( const char
|
||||
// *kernelName, KernelEnv *env, cl_kernel_function *function );
|
||||
|
||||
static int LoadOpencl();
|
||||
#ifdef WIN32
|
||||
@ -273,8 +276,8 @@ public:
|
||||
|
||||
/* for binarization */
|
||||
static int HistogramRectOCL(unsigned char* imagedata, int bytes_per_pixel,
|
||||
int bytes_per_line, int left, int top,
|
||||
int width, int height, int kHistogramSize,
|
||||
int bytes_per_line, int left, int top, int width,
|
||||
int height, int kHistogramSize,
|
||||
int* histogramAllChannels);
|
||||
|
||||
static int ThresholdRectToPixOCL(unsigned char* imagedata,
|
||||
|
Loading…
Reference in New Issue
Block a user