#include #include "allheaders.h" #include "pix.h" #ifdef USE_OPENCL #include "tiff.h" #include "tiffio.h" #endif #include "tprintf.h" // including CL/cl.h doesn't occur until USE_OPENCL defined below // platform preprocessor commands #if defined( WIN32 ) || defined( __WIN32__ ) || defined( _WIN32 ) || defined( __CYGWIN__ ) || defined( __MINGW32__ ) #define ON_WINDOWS 1 #define ON_LINUX 0 #define ON_APPLE 0 #define ON_OTHER 0 #define IF_WINDOWS(X) X #define IF_LINUX(X) #define IF_APPLE(X) #define IF_OTHER(X) #define NOT_WINDOWS(X) #elif defined( __linux__ ) #define ON_WINDOWS 0 #define ON_LINUX 1 #define ON_APPLE 0 #define ON_OTHER 0 #define IF_WINDOWS(X) #define IF_LINUX(X) X #define IF_APPLE(X) #define IF_OTHER(X) #define NOT_WINDOWS(X) X #elif defined( __APPLE__ ) #define ON_WINDOWS 0 #define ON_LINUX 0 #define ON_APPLE 1 #define ON_OTHER 0 #define IF_WINDOWS(X) #define IF_LINUX(X) #define IF_APPLE(X) X #define IF_OTHER(X) #define NOT_WINDOWS(X) X #else #define ON_WINDOWS 0 #define ON_LINUX 0 #define ON_APPLE 0 #define ON_OTHER 1 #define IF_WINDOWS(X) #define IF_LINUX(X) #define IF_APPLE(X) #define IF_OTHER(X) X #define NOT_WINDOWS(X) X #endif #if ON_LINUX #include #endif /************************************************************************************ * enable/disable reporting of performance * PERF_REPORT_LEVEL * 0 - no reporting * 1 - no reporting * 2 - report total function call time for functions we're tracking * 3 - optionally report breakdown of function calls (kernel launch, kernel time, data copies) ************************************************************************************/ #define PERF_COUNT_VERBOSE 1 #define PERF_COUNT_REPORT_STR "[%36s], %24s, %11.6f\n" #if ON_WINDOWS #if PERF_COUNT_VERBOSE >= 2 #define PERF_COUNT_START(FUNCT_NAME) \ char *funct_name = FUNCT_NAME; \ double elapsed_time_sec; \ LARGE_INTEGER freq, time_funct_start, time_funct_end, time_sub_start, time_sub_end; \ QueryPerformanceFrequency(&freq); \ QueryPerformanceCounter(&time_funct_start); \ time_sub_start = time_funct_start; \ time_sub_end = time_funct_start; #define PERF_COUNT_END \ QueryPerformanceCounter(&time_funct_end); \ elapsed_time_sec = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); \ printf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec); #else #define PERF_COUNT_START(FUNCT_NAME) #define PERF_COUNT_END #endif #if PERF_COUNT_VERBOSE >= 3 #define PERF_COUNT_SUB(SUB) \ QueryPerformanceCounter(&time_sub_end); \ elapsed_time_sec = (time_sub_end.QuadPart-time_sub_start.QuadPart)/(double)(freq.QuadPart); \ printf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \ time_sub_start = time_sub_end; #else #define PERF_COUNT_SUB(SUB) #endif // not on windows #else #if PERF_COUNT_VERBOSE >= 2 #define PERF_COUNT_START(FUNCT_NAME) \ char *funct_name = FUNCT_NAME; \ double elapsed_time_sec; \ timespec time_funct_start, time_funct_end, time_sub_start, time_sub_end; \ clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); \ time_sub_start = time_funct_start; \ time_sub_end = time_funct_start; #define PERF_COUNT_END \ clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); \ elapsed_time_sec = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; \ printf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec); #else #define PERF_COUNT_START(FUNCT_NAME) #define PERF_COUNT_END #endif #if PERF_COUNT_VERBOSE >= 3 #define PERF_COUNT_SUB(SUB) \ clock_gettime( CLOCK_MONOTONIC, &time_sub_end ); \ elapsed_time_sec = (time_sub_end.tv_sec - time_sub_start.tv_sec)*1.0 + (time_sub_end.tv_nsec - time_sub_start.tv_nsec)/1000000000.0; \ printf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \ time_sub_start = time_sub_end; #else #define PERF_COUNT_SUB(SUB) #endif #endif /************************************************************************** * enable/disable use of OpenCL **************************************************************************/ #ifdef USE_OPENCL #include "opencl_device_selection.h" #ifndef strcasecmp #define strcasecmp strcmp #endif #define MAX_KERNEL_STRING_LEN 64 #define MAX_CLFILE_NUM 50 #define MAX_CLKERNEL_NUM 200 #define MAX_KERNEL_NAME_LEN 64 #define CL_QUEUE_THREAD_HANDLE_AMD 0x403E #define GROUPSIZE_X 16 #define GROUPSIZE_Y 16 #define GROUPSIZE_HMORX 256 #define GROUPSIZE_HMORY 1 typedef struct _KernelEnv { cl_context mpkContext; cl_command_queue mpkCmdQueue; cl_program mpkProgram; cl_kernel mpkKernel; char mckKernelName[150]; } KernelEnv; typedef struct _OpenCLEnv { cl_platform_id mpOclPlatformID; cl_context mpOclContext; cl_device_id mpOclDevsID; cl_command_queue mpOclCmdQueue; } OpenCLEnv; typedef int ( *cl_kernel_function )( void **userdata, KernelEnv *kenv ); static l_int32 MORPH_BC = ASYMMETRIC_MORPH_BC; #define CHECK_OPENCL(status,name) \ if( status != CL_SUCCESS ) \ { \ printf ("OpenCL error code is %d at when %s .\n", status, name); \ } typedef struct _GPUEnv { //share vb in all modules in hb library cl_platform_id mpPlatformID; cl_device_type mDevType; cl_context mpContext; cl_device_id *mpArryDevsID; cl_device_id mpDevID; cl_command_queue mpCmdQueue; cl_kernel mpArryKernels[MAX_CLFILE_NUM]; cl_program mpArryPrograms[MAX_CLFILE_NUM]; //one program object maps one kernel source file char mArryKnelSrcFile[MAX_CLFILE_NUM][256], //the max len of kernel file name is 256 mArrykernelNames[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN + 1]; cl_kernel_function mpArryKnelFuncs[MAX_CLKERNEL_NUM]; int mnKernelCount, mnFileCount, // only one kernel file mnIsUserCreated; // 1: created , 0:no create and needed to create by opencl wrapper int mnKhrFp64Flag; int mnAmdFp64Flag; } GPUEnv; class OpenclDevice { public: static GPUEnv gpuEnv; static int isInited; OpenclDevice(); ~OpenclDevice(); static int InitEnv(); // load dll, call InitOpenclRunEnv(0) static int InitOpenclRunEnv( int argc ); // RegistOpenclKernel, double flags, compile kernels static int InitOpenclRunEnv_DeviceSelection( int argc ); // RegistOpenclKernel, double flags, compile kernels static int RegistOpenclKernel(); static int ReleaseOpenclRunEnv(); static int ReleaseOpenclEnv( GPUEnv *gpuInfo ); static int CompileKernelFile( GPUEnv *gpuInfo, const char *buildOption ); static int CachedOfKernerPrg( const GPUEnv *gpuEnvCached, const char * clFileName ); static int GeneratBinFromKernelSource( cl_program program, const char * clFileName ); static int WriteBinaryToFile( const char* fileName, const char* birary, size_t numBytes ); static int BinaryGenerated( const char * clFileName, FILE ** fhandle ); //static int CompileKernelFile( const char *filename, GPUEnv *gpuInfo, const char *buildOption ); static l_uint32* pixReadFromTiffKernel(l_uint32 *tiffdata,l_int32 w,l_int32 h,l_int32 wpl, l_uint32 *line); static Pix* pixReadTiffCl( const char *filename, l_int32 n ); static PIX * pixReadStreamTiffCl ( FILE *fp, l_int32 n ); static PIX * pixReadMemTiffCl(const l_uint8 *data, size_t size, l_int32 n); static PIX* pixReadFromTiffStreamCl(TIFF *tif); static int composeRGBPixelCl(int *tiffdata,int *line,int h,int w); static l_int32 getTiffStreamResolutionCl(TIFF *tif,l_int32 *pxres,l_int32 *pyres); static TIFF* fopenTiffCl(FILE *fp,const char *modestring); /* OpenCL implementations of Morphological operations*/ //Initialiation of OCL buffers used in Morph operations static int initMorphCLAllocations(l_int32 wpl, l_int32 h, PIX* pixs); static void releaseMorphCLBuffers(); // OpenCL implementation of Morphology Dilate static PIX* pixDilateBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize, bool reqDataCopy); // OpenCL implementation of Morphology Erode static PIX* pixErodeBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize, bool reqDataCopy); // OpenCL implementation of Morphology Close static PIX* pixCloseBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize, bool reqDataCopy); // OpenCL implementation of Morphology Open static PIX* pixOpenBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize, bool reqDataCopy); // OpenCL implementation of Morphology Open static PIX* pixSubtractCL(PIX *pixd, PIX *pixs1, PIX *pixs2, bool reqDataCopy); // OpenCL implementation of Morphology (Hollow = Closed - Open) static PIX* pixHollowCL(PIX *pixd, PIX *pixs, l_int32 close_hsize, l_int32 close_vsize, l_int32 open_hsize, l_int32 open_vsize, bool reqDataCopy); static void pixGetLinesCL(PIX *pixd, PIX *pixs, PIX** pix_vline, PIX** pix_hline, PIX** pixClosed, bool getpixClosed, l_int32 close_hsize, l_int32 close_vsize, l_int32 open_hsize, l_int32 open_vsize, l_int32 line_hsize, l_int32 line_vsize); //int InitOpenclAttr( OpenCLEnv * env ); //int ReleaseKernel( KernelEnv * env ); static int SetKernelEnv( KernelEnv *envInfo ); //int CreateKernel( char * kernelname, KernelEnv * env ); //int RunKernel( const char *kernelName, void **userdata ); //int ConvertToString( const char *filename, char **source ); //int CheckKernelName( KernelEnv *envInfo, const char *kernelName ); //int RegisterKernelWrapper( const char *kernelName, cl_kernel_function function ); //int RunKernelWrapper( cl_kernel_function function, const char * kernelName, void **usrdata ); //int GetKernelEnvAndFunc( const char *kernelName, KernelEnv *env, cl_kernel_function *function ); // static cl_device_id performDeviceSelection( ); //static bool thresholdRectToPixMicroBench( TessScoreEvaluationInputData input, ds_device_type type); static int LoadOpencl(); #ifdef WIN32 //static int OpenclInite(); static void FreeOpenclDll(); #endif inline static int AddKernelConfig( int kCount, const char *kName ); /* for binarization */ static int HistogramRectOCL( const unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height, int kHistogramSize, int *histogramAllChannels); static int ThresholdRectToPixOCL( const unsigned char* imagedata, int bytes_per_pixel, int bytes_per_line, const int* thresholds, const int* hi_values, Pix** pix, int rect_height, int rect_width, int rect_top, int rect_left); static Pix * pixConvertRGBToGrayOCL( Pix *pix, float weightRed = 0.3, float weightGreen = 0.5, float weightBlue = 0.2 ); static ds_device getDeviceSelection(); static ds_device selectedDevice; static bool deviceIsSelected; static bool selectedDeviceIsOpenCL(); static bool selectedDeviceIsNativeCPU(); }; #endif