Merge pull request #1718 from stweil/opencl

Format OpenCL code
This commit is contained in:
zdenop 2018-06-30 20:14:04 +02:00 committed by GitHub
commit 5b14121449
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 998 additions and 1022 deletions

View File

@ -12,7 +12,7 @@
#define TESSERACT_OPENCL_OCLKERNELS_H_ #define TESSERACT_OPENCL_OCLKERNELS_H_
#ifndef USE_EXTERNAL_KERNEL #ifndef USE_EXTERNAL_KERNEL
#define KERNEL( ... )# __VA_ARGS__ "\n" #define KERNEL(...) #__VA_ARGS__ "\n"
// Double precision is a default of spreadsheets // Double precision is a default of spreadsheets
// cl_khr_fp64: Khronos extension // cl_khr_fp64: Khronos extension
// cl_amd_fp64: AMD extension // cl_amd_fp64: AMD extension

View File

@ -17,8 +17,8 @@
#define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS
#endif #endif
#include <cstdlib>
#include <cstdio> #include <cstdio>
#include <cstdlib>
#include <cstring> #include <cstring>
#ifdef __APPLE__ #ifdef __APPLE__
@ -34,12 +34,12 @@ typedef enum {
} ds_device_type; } ds_device_type;
typedef struct { typedef struct {
ds_device_type type; ds_device_type type;
cl_device_id oclDeviceID; cl_device_id oclDeviceID;
char* oclDeviceName; char* oclDeviceName;
char* oclDriverVersion; char* oclDriverVersion;
// a pointer to the score data, the content/format is application defined. // a pointer to the score data, the content/format is application defined.
void* score; void* score;
} ds_device; } ds_device;
#endif // USE_OPENCL #endif // USE_OPENCL

File diff suppressed because it is too large Load Diff

View File

@ -22,29 +22,29 @@
#if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || \ #if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || \
defined(__CYGWIN__) || defined(__MINGW32__) defined(__CYGWIN__) || defined(__MINGW32__)
#define ON_WINDOWS 1 #define ON_WINDOWS 1
#define ON_LINUX 0 #define ON_LINUX 0
#define ON_APPLE 0 #define ON_APPLE 0
#define ON_OTHER 0 #define ON_OTHER 0
#define IF_WINDOWS(X) X #define IF_WINDOWS(X) X
#define IF_LINUX(X) #define IF_LINUX(X)
#define IF_APPLE(X) #define IF_APPLE(X)
#define IF_OTHER(X) #define IF_OTHER(X)
#define NOT_WINDOWS(X) #define NOT_WINDOWS(X)
#elif defined( __linux__ ) #elif defined(__linux__)
#define ON_WINDOWS 0 #define ON_WINDOWS 0
#define ON_LINUX 1 #define ON_LINUX 1
#define ON_APPLE 0 #define ON_APPLE 0
#define ON_OTHER 0 #define ON_OTHER 0
#define IF_WINDOWS(X) #define IF_WINDOWS(X)
#define IF_LINUX(X) X #define IF_LINUX(X) X
#define IF_APPLE(X) #define IF_APPLE(X)
#define IF_OTHER(X) #define IF_OTHER(X)
#define NOT_WINDOWS(X) X #define NOT_WINDOWS(X) X
#elif defined( __APPLE__ ) #elif defined(__APPLE__)
#define ON_WINDOWS 0 #define ON_WINDOWS 0
#define ON_LINUX 0 #define ON_LINUX 0
#define ON_APPLE 1 #define ON_APPLE 1
#define ON_OTHER 0 #define ON_OTHER 0
#define IF_WINDOWS(X) #define IF_WINDOWS(X)
#define IF_LINUX(X) #define IF_LINUX(X)
#define IF_APPLE(X) X #define IF_APPLE(X) X
@ -52,9 +52,9 @@
#define NOT_WINDOWS(X) X #define NOT_WINDOWS(X) X
#else #else
#define ON_WINDOWS 0 #define ON_WINDOWS 0
#define ON_LINUX 0 #define ON_LINUX 0
#define ON_APPLE 0 #define ON_APPLE 0
#define ON_OTHER 1 #define ON_OTHER 1
#define IF_WINDOWS(X) #define IF_WINDOWS(X)
#define IF_LINUX(X) #define IF_LINUX(X)
#define IF_APPLE(X) #define IF_APPLE(X)
@ -72,23 +72,24 @@
* 0 - no reporting * 0 - no reporting
* 1 - no reporting * 1 - no reporting
* 2 - report total function call time for functions we're tracking * 2 - report total function call time for functions we're tracking
* 3 - optionally report breakdown of function calls (kernel launch, kernel time, data copies) * 3 - optionally report breakdown of function calls (kernel launch, kernel
*time, data copies)
************************************************************************************/ ************************************************************************************/
#define PERF_COUNT_VERBOSE 1 #define PERF_COUNT_VERBOSE 1
#define PERF_COUNT_REPORT_STR "[%36s], %24s, %11.6f\n" #define PERF_COUNT_REPORT_STR "[%36s], %24s, %11.6f\n"
#if ON_WINDOWS #if ON_WINDOWS
#if PERF_COUNT_VERBOSE >= 2 #if PERF_COUNT_VERBOSE >= 2
#define PERF_COUNT_START(FUNCT_NAME) \ #define PERF_COUNT_START(FUNCT_NAME) \
char *funct_name = FUNCT_NAME; \ char* funct_name = FUNCT_NAME; \
double elapsed_time_sec; \ double elapsed_time_sec; \
LARGE_INTEGER freq, time_funct_start, time_funct_end, time_sub_start, time_sub_end; \ LARGE_INTEGER freq, time_funct_start, time_funct_end, time_sub_start, \
QueryPerformanceFrequency(&freq); \ time_sub_end; \
QueryPerformanceCounter(&time_funct_start); \ QueryPerformanceFrequency(&freq); \
time_sub_start = time_funct_start; \ QueryPerformanceCounter(&time_funct_start); \
time_sub_end = time_funct_start; time_sub_start = time_funct_start; \
time_sub_end = time_funct_start;
#define PERF_COUNT_END \ #define PERF_COUNT_END \
QueryPerformanceCounter(&time_funct_end); \ QueryPerformanceCounter(&time_funct_end); \
@ -111,18 +112,17 @@
#define PERF_COUNT_SUB(SUB) #define PERF_COUNT_SUB(SUB)
#endif #endif
// not on windows // not on windows
#else #else
#if PERF_COUNT_VERBOSE >= 2 #if PERF_COUNT_VERBOSE >= 2
#define PERF_COUNT_START(FUNCT_NAME) \ #define PERF_COUNT_START(FUNCT_NAME) \
char *funct_name = FUNCT_NAME; \ char* funct_name = FUNCT_NAME; \
double elapsed_time_sec; \ double elapsed_time_sec; \
timespec time_funct_start, time_funct_end, time_sub_start, time_sub_end; \ timespec time_funct_start, time_funct_end, time_sub_start, time_sub_end; \
clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); \ clock_gettime(CLOCK_MONOTONIC, &time_funct_start); \
time_sub_start = time_funct_start; \ time_sub_start = time_funct_start; \
time_sub_end = time_funct_start; time_sub_end = time_funct_start;
#define PERF_COUNT_END \ #define PERF_COUNT_END \
clock_gettime(CLOCK_MONOTONIC, &time_funct_end); \ clock_gettime(CLOCK_MONOTONIC, &time_funct_end); \
@ -169,124 +169,127 @@
#define GROUPSIZE_HMORX 256 #define GROUPSIZE_HMORX 256
#define GROUPSIZE_HMORY 1 #define GROUPSIZE_HMORY 1
typedef struct _KernelEnv typedef struct _KernelEnv {
{ cl_context mpkContext;
cl_context mpkContext; cl_command_queue mpkCmdQueue;
cl_command_queue mpkCmdQueue; cl_program mpkProgram;
cl_program mpkProgram; cl_kernel mpkKernel;
cl_kernel mpkKernel; char mckKernelName[150];
char mckKernelName[150];
} KernelEnv; } KernelEnv;
typedef struct _OpenCLEnv typedef struct _OpenCLEnv {
{ cl_platform_id mpOclPlatformID;
cl_platform_id mpOclPlatformID; cl_context mpOclContext;
cl_context mpOclContext; cl_device_id mpOclDevsID;
cl_device_id mpOclDevsID; cl_command_queue mpOclCmdQueue;
cl_command_queue mpOclCmdQueue;
} OpenCLEnv; } OpenCLEnv;
typedef int ( *cl_kernel_function )( void **userdata, KernelEnv *kenv ); typedef int (*cl_kernel_function)(void** userdata, KernelEnv* kenv);
#define CHECK_OPENCL(status,name) \ #define CHECK_OPENCL(status, name) \
if( status != CL_SUCCESS ) \ if (status != CL_SUCCESS) { \
{ \ printf("OpenCL error code is %d at when %s .\n", status, name); \
printf ("OpenCL error code is %d at when %s .\n", status, name); \ }
}
typedef struct _GPUEnv {
typedef struct _GPUEnv // share vb in all modules in hb library
{ cl_platform_id mpPlatformID;
//share vb in all modules in hb library cl_device_type mDevType;
cl_platform_id mpPlatformID; cl_context mpContext;
cl_device_type mDevType; cl_device_id* mpArryDevsID;
cl_context mpContext; cl_device_id mpDevID;
cl_device_id *mpArryDevsID; cl_command_queue mpCmdQueue;
cl_device_id mpDevID; cl_kernel mpArryKernels[MAX_CLFILE_NUM];
cl_command_queue mpCmdQueue; cl_program mpArryPrograms[MAX_CLFILE_NUM]; // one program object maps one
cl_kernel mpArryKernels[MAX_CLFILE_NUM]; // kernel source file
cl_program mpArryPrograms[MAX_CLFILE_NUM]; //one program object maps one kernel source file char mArryKnelSrcFile[MAX_CLFILE_NUM]
char mArryKnelSrcFile[MAX_CLFILE_NUM][256], //the max len of kernel file name is 256 [256], // the max len of kernel file name is 256
mArrykernelNames[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN + 1]; mArrykernelNames[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN + 1];
cl_kernel_function mpArryKnelFuncs[MAX_CLKERNEL_NUM]; cl_kernel_function mpArryKnelFuncs[MAX_CLKERNEL_NUM];
int mnKernelCount, mnFileCount, // only one kernel file int mnKernelCount, mnFileCount, // only one kernel file
mnIsUserCreated; // 1: created , 0:no create and needed to create by opencl wrapper mnIsUserCreated; // 1: created , 0:no create and needed to create by
int mnKhrFp64Flag; // opencl wrapper
int mnAmdFp64Flag; int mnKhrFp64Flag;
int mnAmdFp64Flag;
} GPUEnv; } GPUEnv;
class OpenclDevice {
public:
static GPUEnv gpuEnv;
static int isInited;
OpenclDevice();
~OpenclDevice();
static int InitEnv(); // load dll, call InitOpenclRunEnv(0)
static int InitOpenclRunEnv(
int argc); // RegistOpenclKernel, double flags, compile kernels
static int InitOpenclRunEnv_DeviceSelection(
int argc); // RegistOpenclKernel, double flags, compile kernels
static int RegistOpenclKernel();
static int ReleaseOpenclRunEnv();
static int ReleaseOpenclEnv(GPUEnv* gpuInfo);
static int CompileKernelFile(GPUEnv* gpuInfo, const char* buildOption);
static int CachedOfKernerPrg(const GPUEnv* gpuEnvCached,
const char* clFileName);
static int GeneratBinFromKernelSource(cl_program program,
const char* clFileName);
static int WriteBinaryToFile(const char* fileName, const char* birary,
size_t numBytes);
static int BinaryGenerated(const char* clFileName, FILE** fhandle);
// static int CompileKernelFile( const char *filename, GPUEnv *gpuInfo, const
// char *buildOption );
static l_uint32* pixReadFromTiffKernel(l_uint32* tiffdata, l_int32 w,
l_int32 h, l_int32 wpl,
l_uint32* line);
static int composeRGBPixelCl(int* tiffdata, int* line, int h, int w);
class OpenclDevice /* OpenCL implementations of Morphological operations*/
{
public: // Initialization of OCL buffers used in Morph operations
static GPUEnv gpuEnv; static int initMorphCLAllocations(l_int32 wpl, l_int32 h, Pix* pixs);
static int isInited; static void releaseMorphCLBuffers();
OpenclDevice();
~OpenclDevice();
static int InitEnv(); // load dll, call InitOpenclRunEnv(0)
static int InitOpenclRunEnv( int argc ); // RegistOpenclKernel, double flags, compile kernels
static int InitOpenclRunEnv_DeviceSelection( int argc ); // RegistOpenclKernel, double flags, compile kernels
static int RegistOpenclKernel();
static int ReleaseOpenclRunEnv();
static int ReleaseOpenclEnv( GPUEnv *gpuInfo );
static int CompileKernelFile( GPUEnv *gpuInfo, const char *buildOption );
static int CachedOfKernerPrg( const GPUEnv *gpuEnvCached, const char * clFileName );
static int GeneratBinFromKernelSource( cl_program program, const char * clFileName );
static int WriteBinaryToFile( const char* fileName, const char* birary, size_t numBytes );
static int BinaryGenerated( const char * clFileName, FILE ** fhandle );
//static int CompileKernelFile( const char *filename, GPUEnv *gpuInfo, const char *buildOption );
static l_uint32* pixReadFromTiffKernel(l_uint32 *tiffdata,l_int32 w,l_int32 h,l_int32 wpl, l_uint32 *line);
static int composeRGBPixelCl(int *tiffdata,int *line,int h,int w);
/* OpenCL implementations of Morphological operations*/ static void pixGetLinesCL(Pix* pixd, Pix* pixs, Pix** pix_vline,
Pix** pix_hline, Pix** pixClosed, bool getpixClosed,
l_int32 close_hsize, l_int32 close_vsize,
l_int32 open_hsize, l_int32 open_vsize,
l_int32 line_hsize, l_int32 line_vsize);
//Initialization of OCL buffers used in Morph operations // int InitOpenclAttr( OpenCLEnv * env );
static int initMorphCLAllocations(l_int32 wpl, l_int32 h, Pix *pixs); // int ReleaseKernel( KernelEnv * env );
static void releaseMorphCLBuffers(); static int SetKernelEnv(KernelEnv* envInfo);
// int CreateKernel( char * kernelname, KernelEnv * env );
// int RunKernel( const char *kernelName, void **userdata );
// int ConvertToString( const char *filename, char **source );
// int CheckKernelName( KernelEnv *envInfo, const char *kernelName );
// int RegisterKernelWrapper( const char *kernelName, cl_kernel_function
// function ); int RunKernelWrapper( cl_kernel_function function, const char *
// kernelName, void **usrdata ); int GetKernelEnvAndFunc( const char
// *kernelName, KernelEnv *env, cl_kernel_function *function );
static void pixGetLinesCL(Pix *pixd, Pix *pixs, Pix **pix_vline, static int LoadOpencl();
Pix **pix_hline, Pix **pixClosed,
bool getpixClosed, l_int32 close_hsize,
l_int32 close_vsize, l_int32 open_hsize,
l_int32 open_vsize, l_int32 line_hsize,
l_int32 line_vsize);
//int InitOpenclAttr( OpenCLEnv * env );
//int ReleaseKernel( KernelEnv * env );
static int SetKernelEnv( KernelEnv *envInfo );
//int CreateKernel( char * kernelname, KernelEnv * env );
//int RunKernel( const char *kernelName, void **userdata );
//int ConvertToString( const char *filename, char **source );
//int CheckKernelName( KernelEnv *envInfo, const char *kernelName );
//int RegisterKernelWrapper( const char *kernelName, cl_kernel_function function );
//int RunKernelWrapper( cl_kernel_function function, const char * kernelName, void **usrdata );
//int GetKernelEnvAndFunc( const char *kernelName, KernelEnv *env, cl_kernel_function *function );
static int LoadOpencl();
#ifdef WIN32 #ifdef WIN32
//static int OpenclInite(); // static int OpenclInite();
static void FreeOpenclDll(); static void FreeOpenclDll();
#endif #endif
inline static int AddKernelConfig( int kCount, const char *kName ); inline static int AddKernelConfig(int kCount, const char* kName);
/* for binarization */ /* for binarization */
static int HistogramRectOCL(unsigned char *imagedata, int bytes_per_pixel, static int HistogramRectOCL(unsigned char* imagedata, int bytes_per_pixel,
int bytes_per_line, int left, int top, int bytes_per_line, int left, int top, int width,
int width, int height, int kHistogramSize, int height, int kHistogramSize,
int *histogramAllChannels); int* histogramAllChannels);
static int ThresholdRectToPixOCL(unsigned char *imagedata, static int ThresholdRectToPixOCL(unsigned char* imagedata,
int bytes_per_pixel, int bytes_per_line, int bytes_per_pixel, int bytes_per_line,
int *thresholds, int *hi_values, Pix **pix, int* thresholds, int* hi_values, Pix** pix,
int rect_height, int rect_width, int rect_height, int rect_width,
int rect_top, int rect_left); int rect_top, int rect_left);
static ds_device getDeviceSelection(); static ds_device getDeviceSelection();
static ds_device selectedDevice; static ds_device selectedDevice;
static bool deviceIsSelected; static bool deviceIsSelected;
static bool selectedDeviceIsOpenCL(); static bool selectedDeviceIsOpenCL();
}; };
#endif // USE_OPENCL #endif // USE_OPENCL