Remove unsupported OpenCL code and related API functions (#4220)

Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
Stefan Weil 2024-04-11 18:15:39 +02:00 committed by GitHub
parent 912deb3978
commit d5e000bc58
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 63 additions and 3903 deletions

4
.gitignore vendored
View File

@ -83,10 +83,6 @@ __pycache__
*.traineddata
tessdata_*
# OpenCL
tesseract_opencl_profile_devices.dat
kernel*.bin
# build dirs
/build*
/*.dll

View File

@ -88,7 +88,6 @@ option(GRAPHICS_DISABLED "Disable disable graphics (ScrollView)" OFF)
option(DISABLED_LEGACY_ENGINE "Disable the legacy OCR engine" OFF)
option(ENABLE_LTO "Enable link-time optimization" OFF)
option(FAST_FLOAT "Enable float for LSTM" ON)
option(ENABLE_OPENCL "Enable unsupported experimental OpenCL support" OFF)
option(ENABLE_NATIVE
"Enable optimization for host CPU (could break HW compatibility)" OFF)
# see
@ -460,18 +459,6 @@ else()
endif(DISABLE_CURL)
endif()
if(ENABLE_OPENCL)
find_package(OpenCL)
if(OpenCL_FOUND)
include_directories(${OpenCL_INCLUDE_DIRS})
message(STATUS "OpenCL_INCLUDE_DIRS: ${OpenCL_INCLUDE_DIRS}")
message(STATUS "OpenCL_LIBRARY: ${OpenCL_LIBRARY}")
set(USE_OPENCL ON)
else()
set(USE_OPENCL OFF)
endif(OpenCL_FOUND)
endif(ENABLE_OPENCL)
# ##############################################################################
#
# configure
@ -565,11 +552,6 @@ message(STATUS "Disable the legacy OCR engine [DISABLED_LEGACY_ENGINE]: "
message(STATUS "Build training tools [BUILD_TRAINING_TOOLS]: "
"${BUILD_TRAINING_TOOLS}")
message(STATUS "Build tests [BUILD_TESTS]: ${BUILD_TESTS}")
if(ENABLE_OPENCL)
message(
STATUS
"Enable unsupported experimental OpenCL [ENABLE_OPENCL]: ${USE_OPENCL}")
endif(ENABLE_OPENCL)
message(STATUS "Use system ICU Library [USE_SYSTEM_ICU]: ${USE_SYSTEM_ICU}")
message(
STATUS "Install tesseract configs [INSTALL_CONFIGS]: ${INSTALL_CONFIGS}")
@ -608,7 +590,6 @@ file(
src/cutil/*.cpp
src/dict/*.cpp
src/lstm/*.cpp
src/opencl/*.cpp
src/textord/*.cpp
src/viewer/*.cpp
src/wordrec/*.cpp)
@ -749,7 +730,6 @@ file(
src/cutil/*.h
src/dict/*.h
src/lstm/*.h
src/opencl/*.h
src/textord/*.h
src/viewer/*.h
src/wordrec/*.h)
@ -816,7 +796,6 @@ target_include_directories(
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/cutil>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/dict>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/lstm>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/opencl>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/textord>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/viewer>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/wordrec>
@ -832,9 +811,6 @@ target_link_libraries(libtesseract PRIVATE ${LIB_Ws2_32} ${LIB_pthread})
if(OpenMP_CXX_FOUND)
target_link_libraries(libtesseract PUBLIC OpenMP::OpenMP_CXX)
endif()
if(OpenCL_FOUND)
target_link_libraries(libtesseract PUBLIC OpenCL::OpenCL)
endif()
if(LibArchive_FOUND)
target_link_libraries(libtesseract PUBLIC ${LibArchive_LIBRARIES})
endif(LibArchive_FOUND)

View File

@ -78,7 +78,6 @@ if VISIBILITY
AM_CPPFLAGS += -DTESS_EXPORTS
AM_CPPFLAGS += -fvisibility=hidden -fvisibility-inlines-hidden -fPIC
endif
AM_CPPFLAGS += $(OPENCL_CPPFLAGS)
AM_CXXFLAGS = $(OPENMP_CXXFLAGS)
@ -94,9 +93,6 @@ libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/classify
libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/cutil
libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/dict
libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/lstm
if OPENCL
libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/opencl
endif
libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/textord
libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/training/common
libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/viewer
@ -104,7 +100,7 @@ libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/wordrec
libtesseract_la_CPPFLAGS += $(libcurl_CFLAGS)
lib_LTLIBRARIES = libtesseract.la
libtesseract_la_LDFLAGS = $(LEPTONICA_LIBS) $(OPENCL_LDFLAGS)
libtesseract_la_LDFLAGS = $(LEPTONICA_LIBS)
libtesseract_la_LDFLAGS += $(libarchive_LIBS)
libtesseract_la_LDFLAGS += $(libcurl_LIBS)
libtesseract_la_LDFLAGS += $(TENSORFLOW_LIBS)
@ -127,9 +123,6 @@ libtesseract_la_SOURCES += src/api/wordstrboxrenderer.cpp
libtesseract_la_LIBADD = libtesseract_ccutil.la
libtesseract_la_LIBADD += libtesseract_lstm.la
libtesseract_la_LIBADD += libtesseract_native.la
if OPENCL
libtesseract_la_LIBADD += libtesseract_opencl.la
endif
# Rules for src/arch.
@ -561,23 +554,6 @@ if TENSORFLOW
libtesseract_lstm_la_SOURCES += src/lstm/tfnetwork.pb.cc
endif
# Rules for src/opencl.
if OPENCL
libtesseract_opencl_la_CPPFLAGS = $(AM_CPPFLAGS)
libtesseract_opencl_la_CPPFLAGS += $(OPENCL_CFLAGS)
libtesseract_opencl_la_CPPFLAGS += -I$(top_srcdir)/src/ccutil
libtesseract_opencl_la_CPPFLAGS += -I$(top_srcdir)/src/ccstruct
libtesseract_opencl_la_CPPFLAGS += -I$(top_srcdir)/src/ccmain
noinst_HEADERS += src/opencl/openclwrapper.h
noinst_HEADERS += src/opencl/oclkernels.h
noinst_LTLIBRARIES += libtesseract_opencl.la
libtesseract_opencl_la_SOURCES = src/opencl/openclwrapper.cpp
endif
# Rules for src/textord.
noinst_HEADERS += src/textord/alignedblob.h
@ -743,16 +719,12 @@ tesseract_CPPFLAGS += -I$(top_srcdir)/src/dict
tesseract_CPPFLAGS += -I$(top_srcdir)/src/textord
tesseract_CPPFLAGS += -I$(top_srcdir)/src/viewer
tesseract_CPPFLAGS += -I$(top_srcdir)/src/wordrec
if OPENCL
tesseract_CPPFLAGS += -I$(top_srcdir)/src/opencl
endif
tesseract_CPPFLAGS += $(AM_CPPFLAGS)
if VISIBILITY
tesseract_CPPFLAGS += -DTESS_IMPORTS
endif
tesseract_LDFLAGS = $(OPENCL_LDFLAGS)
tesseract_LDFLAGS += $(OPENMP_CXXFLAGS)
tesseract_LDFLAGS = $(OPENMP_CXXFLAGS)
tesseract_LDADD = libtesseract.la
tesseract_LDADD += $(LEPTONICA_LIBS)
@ -927,7 +899,6 @@ EXTRA_PROGRAMS += $(trainingtools)
extralib = libtesseract.la
extralib += $(libarchive_LIBS)
extralib += $(LEPTONICA_LIBS)
extralib += $(OPENCL_LDFLAGS)
extralib += $(TENSORFLOW_LIBS)
if T_WIN
extralib += -lws2_32
@ -1308,7 +1279,7 @@ TESTS = $(check_PROGRAMS)
apiexample_test_SOURCES = unittest/apiexample_test.cc
apiexample_test_CPPFLAGS = $(unittest_CPPFLAGS)
apiexample_test_LDFLAGS = $(OPENCL_LDFLAGS) $(LEPTONICA_LIBS)
apiexample_test_LDFLAGS = $(LEPTONICA_LIBS)
apiexample_test_LDADD = $(TESS_LIBS) $(LEPTONICA_LIBS)
if !DISABLED_LEGACY_ENGINE
@ -1494,7 +1465,7 @@ endif # !DISABLED_LEGACY_ENGINE
progress_test_SOURCES = unittest/progress_test.cc
progress_test_CPPFLAGS = $(unittest_CPPFLAGS)
progress_test_LDFLAGS = $(OPENCL_LDFLAGS) $(LEPTONICA_LIBS)
progress_test_LDFLAGS = $(LEPTONICA_LIBS)
progress_test_LDADD = $(GTEST_LIBS) $(GMOCK_LIBS) $(TESS_LIBS) $(LEPTONICA_LIBS)
qrsequence_test_SOURCES = unittest/qrsequence_test.cc

View File

@ -91,8 +91,6 @@ set(include_files_list
unistd.h
cairo/cairo-version.h
CL/cl.h
OpenCL/cl.h
pango-1.0/pango/pango-features.h
unicode/uchar.h
)
@ -121,7 +119,6 @@ file(APPEND ${AUTOCONFIG_SRC} "
#cmakedefine HAVE_NEON ${HAVE_NEON}
#cmakedefine HAVE_LIBARCHIVE ${HAVE_LIBARCHIVE}
#cmakedefine HAVE_LIBCURL ${HAVE_LIBCURL}
#cmakedefine USE_OPENCL ${USE_OPENCL}
")
if(TESSDATA_PREFIX)

View File

@ -31,7 +31,6 @@ source_group("classify" "${SSRC}/classify/${H_CPP}")
source_group("cutil" "${SSRC}/cutil/${H_CPP}")
source_group("dict" "${SSRC}/dict/${H_CPP}")
source_group("lstm" "${SSRC}/lstm/${H_CPP}")
source_group("opencl" "${SSRC}/opencl/${H_CPP}")
source_group("textord" "${SSRC}/textord/${H_CPP}")
source_group("viewer" "${SSRC}/viewer/${H_CPP}")
source_group("wordrec" "${SSRC}/wordrec/${H_CPP}")

View File

@ -79,8 +79,6 @@ AC_SUBST([AM_CPPFLAGS])
# Can be overridden with `configure --disable-silent-rules` or with `make V=1`.
AM_SILENT_RULES([yes])
OPENCL_INC="/opt/AMDAPP/include"
OPENCL_LIBS="-lOpenCL"
#############################
#
# Platform specific setup
@ -102,15 +100,12 @@ case "${host_os}" in
AM_CONDITIONAL([ADD_RT], true)
;;
*darwin*)
OPENCL_LIBS=""
OPENCL_INC=""
AM_CONDITIONAL([ADD_RT], false)
;;
*android*|openbsd*)
AM_CONDITIONAL([ADD_RT], false)
;;
powerpc-*-darwin*)
OPENCL_LIBS=""
;;
*)
# default
@ -263,19 +258,6 @@ have_tiff=false
# Note that the first usage of AC_CHECK_HEADERS must be unconditional.
AC_CHECK_HEADERS([tiffio.h], [have_tiff=true], [have_tiff=false])
# check whether to build opencl version
AC_MSG_CHECKING([--enable-opencl argument])
AC_ARG_ENABLE([opencl],
AS_HELP_STRING([--enable-opencl], [enable opencl build [default=no]]))
AC_MSG_RESULT([$enable_opencl])
# check for opencl header
have_opencl=false
if test "$enable_opencl" = "yes"; then
AC_CHECK_HEADERS([CL/cl.h], [have_opencl=true], [
AC_CHECK_HEADERS(OpenCL/cl.h, [have_opencl=true], [have_opencl=false])
])
fi
# Configure arguments which allow disabling some optional libraries.
AC_ARG_WITH([archive],
AS_HELP_STRING([--with-archive],
@ -323,9 +305,6 @@ m4_define([MY_CHECK_FRAMEWORK],
fi]
)
have_opencl_lib=false
OPENCL_CPPFLAGS=''
OPENCL_LDFLAGS=''
case "${host_os}" in
*darwin* | *-macos10*)
MY_CHECK_FRAMEWORK([Accelerate])
@ -333,36 +312,11 @@ case "${host_os}" in
AM_CPPFLAGS="-DHAVE_FRAMEWORK_ACCELERATE $AM_CPPFLAGS"
AM_LDFLAGS="$AM_LDFLAGS -framework Accelerate"
fi
MY_CHECK_FRAMEWORK([OpenCL])
if test "$enable_opencl" = "yes"; then
if test $my_cv_framework_OpenCL = no; then
AC_MSG_ERROR([Required OpenCL library not found!])
fi
AM_CPPFLAGS="-DUSE_OPENCL $AM_CPPFLAGS"
OPENCL_CPPFLAGS=""
OPENCL_LDFLAGS="-framework OpenCL"
fi
;;
*)
# default
if test "$enable_opencl" = "yes"; then
AC_CHECK_LIB([OpenCL], [clGetPlatformIDs],
[have_opencl_lib=true], [have_opencl_lib=false])
if !($have_opencl); then
AC_MSG_ERROR([Required OpenCL headers not found!])
fi
if !($have_opencl_lib); then
AC_MSG_ERROR([Required OpenCL library not found!])
fi
AM_CPPFLAGS="-DUSE_OPENCL $AM_CPPFLAGS"
OPENCL_CPPFLAGS="-I${OPENCL_INC}"
OPENCL_LDFLAGS="${OPENCL_LIBS}"
fi
;;
esac
AM_CONDITIONAL([OPENCL], [test "$enable_opencl" = "yes"])
AC_SUBST([OPENCL_CPPFLAGS])
AC_SUBST([OPENCL_LDFLAGS])
# check whether to build tesseract with -fvisibility=hidden -fvisibility-inlines-hidden
# http://gcc.gnu.org/wiki/Visibility

View File

@ -86,15 +86,6 @@ public:
*/
static const char *Version();
/**
* If compiled with OpenCL AND an available OpenCL
* device is deemed faster than serial code, then
* "device" is populated with the cl_device_id
* and returns sizeof(cl_device_id)
* otherwise *device=nullptr and returns 0.
*/
static size_t getOpenCLDevice(void **device);
/**
* Set the name of the input file. Needed for training and
* reading a UNLV zone file, and for searchable PDF output.

View File

@ -186,8 +186,6 @@ TESS_API int TessResultRendererImageNum(TessResultRenderer *renderer);
TESS_API TessBaseAPI *TessBaseAPICreate();
TESS_API void TessBaseAPIDelete(TessBaseAPI *handle);
TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI *handle, void **device);
TESS_API void TessBaseAPISetInputName(TessBaseAPI *handle, const char *name);
TESS_API const char *TessBaseAPIGetInputName(TessBaseAPI *handle);

View File

@ -41,9 +41,6 @@
#endif
#include "mutableiterator.h" // for MutableIterator
#include "normalis.h" // for kBlnBaselineOffset, kBlnXHeight
#if defined(USE_OPENCL)
# include "openclwrapper.h" // for OpenclDevice
#endif
#include "pageres.h" // for PAGE_RES_IT, WERD_RES, PAGE_RES, CR_DE...
#include "paragraphs.h" // for DetectParagraphs
#include "params.h" // for BoolParam, IntParam, DoubleParam, Stri...
@ -243,27 +240,6 @@ const char *TessBaseAPI::Version() {
return TESSERACT_VERSION_STR;
}
/**
* If compiled with OpenCL AND an available OpenCL
* device is deemed faster than serial code, then
* "device" is populated with the cl_device_id
* and returns sizeof(cl_device_id)
* otherwise *device=nullptr and returns 0.
*/
size_t TessBaseAPI::getOpenCLDevice(void **data) {
#ifdef USE_OPENCL
ds_device device = OpenclDevice::getDeviceSelection();
if (device.type == DS_DEVICE_OPENCL_DEVICE) {
*data = new cl_device_id;
memcpy(*data, &device.oclDeviceID, sizeof(cl_device_id));
return sizeof(cl_device_id);
}
#endif
*data = nullptr;
return 0;
}
/**
* Set the name of the input file. Needed only for training and
* loading a UNLV zone file.
@ -398,10 +374,6 @@ int TessBaseAPI::Init(const char *data, int data_size, const char *language, Ocr
delete tesseract_;
tesseract_ = nullptr;
}
#ifdef USE_OPENCL
OpenclDevice od;
od.InitEnv();
#endif
bool reset_classifier = true;
if (tesseract_ == nullptr) {
reset_classifier = false;

View File

@ -137,10 +137,6 @@ void TessBaseAPIDelete(TessBaseAPI *handle) {
delete handle;
}
size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI * /*handle*/, void **device) {
return TessBaseAPI::getOpenCLDevice(device);
}
void TessBaseAPISetInputName(TessBaseAPI *handle, const char *name) {
handle->SetInputName(name);
}

View File

@ -25,10 +25,6 @@
#include "thresholder.h"
#include "tprintf.h" // for tprintf
#if defined(USE_OPENCL)
# include "openclwrapper.h" // for OpenclDevice
#endif
#include <allheaders.h>
#include <tesseract/baseapi.h> // for api->GetIntVariable()
@ -388,19 +384,7 @@ void ImageThresholder::OtsuThresholdRectToPix(Image src_pix, Image *out_pix) con
int num_channels = OtsuThreshold(src_pix, rect_left_, rect_top_, rect_width_, rect_height_,
thresholds, hi_values);
// only use opencl if compiled w/ OpenCL and selected device is opencl
#ifdef USE_OPENCL
OpenclDevice od;
if (num_channels == 4 && od.selectedDeviceIsOpenCL() && rect_top_ == 0 && rect_left_ == 0) {
od.ThresholdRectToPixOCL((unsigned char *)pixGetData(src_pix), num_channels,
pixGetWpl(src_pix) * 4, &thresholds[0], &hi_values[0], out_pix /*pix_OCL*/,
rect_height_, rect_width_, rect_top_, rect_left_);
} else {
#endif
ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix);
#ifdef USE_OPENCL
}
#endif
}
/// Threshold the rectangle, taking everything except the src_pix

View File

@ -21,9 +21,6 @@
#include <allheaders.h>
#include <cstring>
#include "helpers.h"
#if defined(USE_OPENCL)
# include "openclwrapper.h" // for OpenclDevice
#endif
namespace tesseract {
@ -47,53 +44,6 @@ int OtsuThreshold(Image src_pix, int left, int top, int width, int height, std::
thresholds.resize(num_channels);
hi_values.resize(num_channels);
// only use opencl if compiled w/ OpenCL and selected device is opencl
#ifdef USE_OPENCL
// all of channel 0 then all of channel 1...
std::vector<int> histogramAllChannels(kHistogramSize * num_channels);
// Calculate Histogram on GPU
OpenclDevice od;
if (od.selectedDeviceIsOpenCL() && (num_channels == 1 || num_channels == 4) && top == 0 &&
left == 0) {
od.HistogramRectOCL(pixGetData(src_pix), num_channels, pixGetWpl(src_pix) * 4, left, top, width,
height, kHistogramSize, &histogramAllChannels[0]);
// Calculate Threshold from Histogram on cpu
for (int ch = 0; ch < num_channels; ++ch) {
thresholds[ch] = -1;
hi_values[ch] = -1;
int *histogram = &histogramAllChannels[kHistogramSize * ch];
int H;
int best_omega_0;
int best_t = OtsuStats(histogram, &H, &best_omega_0);
if (best_omega_0 == 0 || best_omega_0 == H) {
// This channel is empty.
continue;
}
// To be a convincing foreground we must have a small fraction of H
// or to be a convincing background we must have a large fraction of H.
// In between we assume this channel contains no thresholding information.
int hi_value = best_omega_0 < H * 0.5;
thresholds[ch] = best_t;
if (best_omega_0 > H * 0.75) {
any_good_hivalue = true;
hi_values[ch] = 0;
} else if (best_omega_0 < H * 0.25) {
any_good_hivalue = true;
hi_values[ch] = 1;
} else {
// In case all channels are like this, keep the best of the bad lot.
double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0;
if (hi_dist > best_hi_dist) {
best_hi_dist = hi_dist;
best_hi_value = hi_value;
best_hi_index = ch;
}
}
}
} else {
#endif
for (int ch = 0; ch < num_channels; ++ch) {
thresholds[ch] = -1;
hi_values[ch] = -1;
@ -128,9 +78,6 @@ int OtsuThreshold(Image src_pix, int left, int top, int width, int height, std::
}
}
}
#ifdef USE_OPENCL
}
#endif // USE_OPENCL
if (!any_good_hivalue) {
// Use the best of the ones that were not good enough.

View File

@ -1,926 +0,0 @@
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_OPENCL_OCLKERNELS_H_
# define TESSERACT_OPENCL_OCLKERNELS_H_
# ifndef USE_EXTERNAL_KERNEL
# define KERNEL(...) # __VA_ARGS__ "\n"
// Double precision is a default of spreadsheets
// cl_khr_fp64: Khronos extension
// cl_amd_fp64: AMD extension
// use build option outside to define fp_t
/////////////////////////////////////////////
static const char *kernel_src = KERNEL(
\n #ifdef KHR_DP_EXTENSION\n
\n #pragma OPENCL EXTENSION cl_khr_fp64
: enable\n
\n #elif AMD_DP_EXTENSION\n
\n #pragma OPENCL EXTENSION cl_amd_fp64
: enable\n
\n #else \n
\n #endif \n
__kernel void composeRGBPixel(__global uint *tiffdata, int w, int h, int wpl, __global uint *output) {
int i = get_global_id(1);
int j = get_global_id(0);
int tiffword, rval, gval, bval;
//Ignore the excess
if ((i >= h) || (j >= w))
return;
tiffword = tiffdata[i * w + j];
rval = ((tiffword)&0xff);
gval = (((tiffword) >> 8) & 0xff);
bval = (((tiffword) >> 16) & 0xff);
output[i * wpl + j] = (rval << (8 * (sizeof(uint) - 1 - 0))) | (gval << (8 * (sizeof(uint) - 1 - 1))) | (bval << (8 * (sizeof(uint) - 1 - 2)));
})
KERNEL(
\n__kernel void pixSubtract_inplace(__global int *dword, __global int *sword, const int wpl, const int h) {
const unsigned int row = get_global_id(1);
const unsigned int col = get_global_id(0);
const unsigned int pos = row * wpl + col;
//Ignore the execss
if (row >= h || col >= wpl)
return;
*(dword + pos) &= ~(*(sword + pos));
}\n)
KERNEL(
\n__kernel void morphoDilateHor_5x5(__global int *sword, __global int *dword, const int wpl, const int h) {
const unsigned int pos = get_global_id(0);
unsigned int prevword, nextword, currword, tempword;
unsigned int destword;
const int col = pos % wpl;
//Ignore the execss
if (pos >= (wpl * h))
return;
currword = *(sword + pos);
destword = currword;
//Handle boundary conditions
if (col == 0)
prevword = 0;
else
prevword = *(sword + pos - 1);
if (col == (wpl - 1))
nextword = 0;
else
nextword = *(sword + pos + 1);
//Loop unrolled
//1 bit to left and 1 bit to right
//Get the max value on LHS of every pixel
tempword = (prevword << (31)) | ((currword >> 1));
destword |= tempword;
//Get max value on RHS of every pixel
tempword = (currword << 1) | (nextword >> (31));
destword |= tempword;
//2 bit to left and 2 bit to right
//Get the max value on LHS of every pixel
tempword = (prevword << (30)) | ((currword >> 2));
destword |= tempword;
//Get max value on RHS of every pixel
tempword = (currword << 2) | (nextword >> (30));
destword |= tempword;
*(dword + pos) = destword;
}\n)
KERNEL(
\n__kernel void morphoDilateVer_5x5(__global int *sword, __global int *dword, const int wpl, const int h) {
const int col = get_global_id(0);
const int row = get_global_id(1);
const unsigned int pos = row * wpl + col;
unsigned int tempword;
unsigned int destword;
int i;
//Ignore the execss
if (row >= h || col >= wpl)
return;
destword = *(sword + pos);
//2 words above
i = (row - 2) < 0 ? row : (row - 2);
tempword = *(sword + i * wpl + col);
destword |= tempword;
//1 word above
i = (row - 1) < 0 ? row : (row - 1);
tempword = *(sword + i * wpl + col);
destword |= tempword;
//1 word below
i = (row >= (h - 1)) ? row : (row + 1);
tempword = *(sword + i * wpl + col);
destword |= tempword;
//2 words below
i = (row >= (h - 2)) ? row : (row + 2);
tempword = *(sword + i * wpl + col);
destword |= tempword;
*(dword + pos) = destword;
}\n)
KERNEL(
\n__kernel void morphoDilateHor(__global int *sword, __global int *dword, const int xp, const int xn, const int wpl, const int h) {
const int col = get_global_id(0);
const int row = get_global_id(1);
const unsigned int pos = row * wpl + col;
unsigned int parbitsxp, parbitsxn, nwords;
unsigned int destword, tempword, lastword, currword;
unsigned int lnextword, lprevword, rnextword, rprevword, firstword, secondword;
int i, j, siter, eiter;
//Ignore the execss
if (pos >= (wpl * h) || (xn < 1 && xp < 1))
return;
currword = *(sword + pos);
destword = currword;
parbitsxp = xp & 31;
parbitsxn = xn & 31;
nwords = xp >> 5;
if (parbitsxp > 0)
nwords += 1;
else
parbitsxp = 31;
siter = (col - nwords);
eiter = (col + nwords);
//Get prev word
if (col == 0)
firstword = 0x0;
else
firstword = *(sword + pos - 1);
//Get next word
if (col == (wpl - 1))
secondword = 0x0;
else
secondword = *(sword + pos + 1);
//Last partial bits on either side
for (i = 1; i <= parbitsxp; i++) {
//Get the max value on LHS of every pixel
tempword = ((i == parbitsxp) && (parbitsxp != parbitsxn)) ? 0x0 : (firstword << (32 - i)) | ((currword >> i));
destword |= tempword;
//Get max value on RHS of every pixel
tempword = (currword << i) | (secondword >> (32 - i));
destword |= tempword;
}
//Return if halfwidth <= 1 word
if (nwords == 1) {
if (xn == 32) {
destword |= firstword;
}
if (xp == 32) {
destword |= secondword;
}
*(dword + pos) = destword;
return;
}
if (siter < 0)
firstword = 0x0;
else
firstword = *(sword + row * wpl + siter);
if (eiter >= wpl)
lastword = 0x0;
else
lastword = *(sword + row * wpl + eiter);
for (i = 1; i < nwords; i++) {
//Gets LHS words
if ((siter + i) < 0)
secondword = 0x0;
else
secondword = *(sword + row * wpl + siter + i);
lprevword = firstword << (32 - parbitsxn) | secondword >> parbitsxn;
firstword = secondword;
if ((siter + i + 1) < 0)
secondword = 0x0;
else
secondword = *(sword + row * wpl + siter + i + 1);
lnextword = firstword << (32 - parbitsxn) | secondword >> parbitsxn;
//Gets RHS words
if ((eiter - i) >= wpl)
firstword = 0x0;
else
firstword = *(sword + row * wpl + eiter - i);
rnextword = firstword << parbitsxp | lastword >> (32 - parbitsxp);
lastword = firstword;
if ((eiter - i - 1) >= wpl)
firstword = 0x0;
else
firstword = *(sword + row * wpl + eiter - i - 1);
rprevword = firstword << parbitsxp | lastword >> (32 - parbitsxp);
for (j = 1; j < 32; j++) {
//OR LHS full words
tempword = (lprevword << j) | (lnextword >> (32 - j));
destword |= tempword;
//OR RHS full words
tempword = (rprevword << j) | (rnextword >> (32 - j));
destword |= tempword;
}
destword |= lprevword;
destword |= lnextword;
destword |= rprevword;
destword |= rnextword;
lastword = firstword;
firstword = secondword;
}
*(dword + pos) = destword;
}\n)
KERNEL(
\n__kernel void morphoDilateHor_32word(__global int *sword, __global int *dword, const int halfwidth, const int wpl, const int h, const char isEven) {
const int col = get_global_id(0);
const int row = get_global_id(1);
const unsigned int pos = row * wpl + col;
unsigned int prevword, nextword, currword, tempword;
unsigned int destword;
int i;
//Ignore the execss
if (pos >= (wpl * h))
return;
currword = *(sword + pos);
destword = currword;
//Handle boundary conditions
if (col == 0)
prevword = 0;
else
prevword = *(sword + pos - 1);
if (col == (wpl - 1))
nextword = 0;
else
nextword = *(sword + pos + 1);
for (i = 1; i <= halfwidth; i++) {
//Get the max value on LHS of every pixel
if (i == halfwidth && isEven) {
tempword = 0x0;
} else {
tempword = (prevword << (32 - i)) | ((currword >> i));
}
destword |= tempword;
//Get max value on RHS of every pixel
tempword = (currword << i) | (nextword >> (32 - i));
destword |= tempword;
}
*(dword + pos) = destword;
}\n)
KERNEL(
\n__kernel void morphoDilateVer(__global int *sword, __global int *dword, const int yp, const int wpl, const int h, const int yn) {
const int col = get_global_id(0);
const int row = get_global_id(1);
const unsigned int pos = row * wpl + col;
unsigned int tempword;
unsigned int destword;
int i, siter, eiter;
//Ignore the execss
if (row >= h || col >= wpl)
return;
destword = *(sword + pos);
//Set start position and end position considering the boundary conditions
siter = (row - yn) < 0 ? 0 : (row - yn);
eiter = (row >= (h - yp)) ? (h - 1) : (row + yp);
for (i = siter; i <= eiter; i++) {
tempword = *(sword + i * wpl + col);
destword |= tempword;
}
*(dword + pos) = destword;
}\n)
KERNEL(
\n__kernel void morphoErodeHor_5x5(__global int *sword, __global int *dword, const int wpl, const int h) {
const unsigned int pos = get_global_id(0);
unsigned int prevword, nextword, currword, tempword;
unsigned int destword;
const int col = pos % wpl;
//Ignore the execss
if (pos >= (wpl * h))
return;
currword = *(sword + pos);
destword = currword;
//Handle boundary conditions
if (col == 0)
prevword = 0xffffffff;
else
prevword = *(sword + pos - 1);
if (col == (wpl - 1))
nextword = 0xffffffff;
else
nextword = *(sword + pos + 1);
//Loop unrolled
//1 bit to left and 1 bit to right
//Get the min value on LHS of every pixel
tempword = (prevword << (31)) | ((currword >> 1));
destword &= tempword;
//Get min value on RHS of every pixel
tempword = (currword << 1) | (nextword >> (31));
destword &= tempword;
//2 bit to left and 2 bit to right
//Get the min value on LHS of every pixel
tempword = (prevword << (30)) | ((currword >> 2));
destword &= tempword;
//Get min value on RHS of every pixel
tempword = (currword << 2) | (nextword >> (30));
destword &= tempword;
*(dword + pos) = destword;
}\n)
KERNEL(
\n__kernel void morphoErodeVer_5x5(__global int *sword, __global int *dword, const int wpl, const int h, const int fwmask, const int lwmask) {
const int col = get_global_id(0);
const int row = get_global_id(1);
const unsigned int pos = row * wpl + col;
unsigned int tempword;
unsigned int destword;
int i;
//Ignore the execss
if (row >= h || col >= wpl)
return;
destword = *(sword + pos);
if (row < 2 || row >= (h - 2)) {
destword = 0x0;
} else {
//2 words above
//i = (row - 2) < 0 ? row : (row - 2);
i = (row - 2);
tempword = *(sword + i * wpl + col);
destword &= tempword;
//1 word above
//i = (row - 1) < 0 ? row : (row - 1);
i = (row - 1);
tempword = *(sword + i * wpl + col);
destword &= tempword;
//1 word below
//i = (row >= (h - 1)) ? row : (row + 1);
i = (row + 1);
tempword = *(sword + i * wpl + col);
destword &= tempword;
//2 words below
//i = (row >= (h - 2)) ? row : (row + 2);
i = (row + 2);
tempword = *(sword + i * wpl + col);
destword &= tempword;
if (col == 0) {
destword &= fwmask;
}
if (col == (wpl - 1)) {
destword &= lwmask;
}
}
*(dword + pos) = destword;
}\n)
KERNEL(
\n__kernel void morphoErodeHor(__global int *sword, __global int *dword, const int xp, const int xn, const int wpl, const int h, const char isAsymmetric, const int rwmask, const int lwmask) {
const int col = get_global_id(0);
const int row = get_global_id(1);
const unsigned int pos = row * wpl + col;
unsigned int parbitsxp, parbitsxn, nwords;
unsigned int destword, tempword, lastword, currword;
unsigned int lnextword, lprevword, rnextword, rprevword, firstword, secondword;
int i, j, siter, eiter;
//Ignore the execss
if (pos >= (wpl * h) || (xn < 1 && xp < 1))
return;
currword = *(sword + pos);
destword = currword;
parbitsxp = xp & 31;
parbitsxn = xn & 31;
nwords = xp >> 5;
if (parbitsxp > 0)
nwords += 1;
else
parbitsxp = 31;
siter = (col - nwords);
eiter = (col + nwords);
//Get prev word
if (col == 0)
firstword = 0xffffffff;
else
firstword = *(sword + pos - 1);
//Get next word
if (col == (wpl - 1))
secondword = 0xffffffff;
else
secondword = *(sword + pos + 1);
//Last partial bits on either side
for (i = 1; i <= parbitsxp; i++) {
//Get the max value on LHS of every pixel
tempword = (firstword << (32 - i)) | ((currword >> i));
destword &= tempword;
//Get max value on RHS of every pixel
tempword = ((i == parbitsxp) && (parbitsxp != parbitsxn)) ? 0xffffffff : (currword << i) | (secondword >> (32 - i));
//tempword = (currword << i) | (secondword >> (32 - i));
destword &= tempword;
}
//Return if halfwidth <= 1 word
if (nwords == 1) {
if (xp == 32) {
destword &= firstword;
}
if (xn == 32) {
destword &= secondword;
}
//Clear boundary pixels
if (isAsymmetric) {
if (col == 0)
destword &= rwmask;
if (col == (wpl - 1))
destword &= lwmask;
}
*(dword + pos) = destword;
return;
}
if (siter < 0)
firstword = 0xffffffff;
else
firstword = *(sword + row * wpl + siter);
if (eiter >= wpl)
lastword = 0xffffffff;
else
lastword = *(sword + row * wpl + eiter);
for (i = 1; i < nwords; i++) {
//Gets LHS words
if ((siter + i) < 0)
secondword = 0xffffffff;
else
secondword = *(sword + row * wpl + siter + i);
lprevword = firstword << (32 - parbitsxp) | secondword >> (parbitsxp);
firstword = secondword;
if ((siter + i + 1) < 0)
secondword = 0xffffffff;
else
secondword = *(sword + row * wpl + siter + i + 1);
lnextword = firstword << (32 - parbitsxp) | secondword >> (parbitsxp);
//Gets RHS words
if ((eiter - i) >= wpl)
firstword = 0xffffffff;
else
firstword = *(sword + row * wpl + eiter - i);
rnextword = firstword << parbitsxn | lastword >> (32 - parbitsxn);
lastword = firstword;
if ((eiter - i - 1) >= wpl)
firstword = 0xffffffff;
else
firstword = *(sword + row * wpl + eiter - i - 1);
rprevword = firstword << parbitsxn | lastword >> (32 - parbitsxn);
for (j = 0; j < 32; j++) {
//OR LHS full words
tempword = (lprevword << j) | (lnextword >> (32 - j));
destword &= tempword;
//OR RHS full words
tempword = (rprevword << j) | (rnextword >> (32 - j));
destword &= tempword;
}
destword &= lprevword;
destword &= lnextword;
destword &= rprevword;
destword &= rnextword;
lastword = firstword;
firstword = secondword;
}
if (isAsymmetric) {
//Clear boundary pixels
if (col < (nwords - 1))
destword = 0x0;
else if (col == (nwords - 1))
destword &= rwmask;
else if (col > (wpl - nwords))
destword = 0x0;
else if (col == (wpl - nwords))
destword &= lwmask;
}
*(dword + pos) = destword;
}\n)
KERNEL(
\n__kernel void morphoErodeHor_32word(__global int *sword, __global int *dword, const int halfwidth, const int wpl, const int h, const char clearBoundPixH, const int rwmask, const int lwmask, const char isEven) {
const int col = get_global_id(0);
const int row = get_global_id(1);
const unsigned int pos = row * wpl + col;
unsigned int prevword, nextword, currword, tempword, destword;
int i;
//Ignore the execss
if (pos >= (wpl * h))
return;
currword = *(sword + pos);
destword = currword;
//Handle boundary conditions
if (col == 0)
prevword = 0xffffffff;
else
prevword = *(sword + pos - 1);
if (col == (wpl - 1))
nextword = 0xffffffff;
else
nextword = *(sword + pos + 1);
for (i = 1; i <= halfwidth; i++) {
//Get the min value on LHS of every pixel
tempword = (prevword << (32 - i)) | ((currword >> i));
destword &= tempword;
//Get min value on RHS of every pixel
if (i == halfwidth && isEven) {
tempword = 0xffffffff;
} else {
tempword = (currword << i) | (nextword >> (32 - i));
}
destword &= tempword;
}
if (clearBoundPixH) {
if (col == 0) {
destword &= rwmask;
} else if (col == (wpl - 1)) {
destword &= lwmask;
}
}
*(dword + pos) = destword;
}\n)
KERNEL(
\n__kernel void morphoErodeVer(__global int *sword, __global int *dword, const int yp, const int wpl, const int h, const char clearBoundPixV, const int yn) {
const int col = get_global_id(0);
const int row = get_global_id(1);
const unsigned int pos = row * wpl + col;
unsigned int tempword, destword;
int i, siter, eiter;
//Ignore the execss
if (row >= h || col >= wpl)
return;
destword = *(sword + pos);
//Set start position and end position considering the boundary conditions
siter = (row - yp) < 0 ? 0 : (row - yp);
eiter = (row >= (h - yn)) ? (h - 1) : (row + yn);
for (i = siter; i <= eiter; i++) {
tempword = *(sword + i * wpl + col);
destword &= tempword;
}
//Clear boundary pixels
if (clearBoundPixV && ((row < yp) || ((h - row) <= yn))) {
destword = 0x0;
}
*(dword + pos) = destword;
}\n)
// HistogramRect Kernel: Accumulate
// assumes 4 channels, i.e., bytes_per_pixel = 4
// assumes number of pixels is multiple of 8
// data is laid out as
// ch0 ch1 ...
// bin0 bin1 bin2... bin0...
// rpt0,1,2...256 rpt0,1,2...
KERNEL(
\n #define HIST_REDUNDANCY 256\n
\n #define GROUP_SIZE 256\n
\n #define HIST_SIZE 256\n
\n #define NUM_CHANNELS 4\n
\n #define HR_UNROLL_SIZE 8 \n
\n #define HR_UNROLL_TYPE uchar8 \n
__attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_HistogramRectAllChannels(__global const uchar8 *data, uint numPixels, __global uint *histBuffer) {
// declare variables
uchar8 pixels;
int threadOffset = get_global_id(0) % HIST_REDUNDANCY;
// for each pixel/channel, accumulate in global memory
for (uint pc = get_global_id(0); pc < numPixels * NUM_CHANNELS / HR_UNROLL_SIZE; pc += get_global_size(0)) {
pixels = data[pc];
// channel bin thread
atomic_inc(&histBuffer[0 * HIST_SIZE * HIST_REDUNDANCY + pixels.s0 * HIST_REDUNDANCY + threadOffset]); // ch0
atomic_inc(&histBuffer[0 * HIST_SIZE * HIST_REDUNDANCY + pixels.s4 * HIST_REDUNDANCY + threadOffset]); // ch0
atomic_inc(&histBuffer[1 * HIST_SIZE * HIST_REDUNDANCY + pixels.s1 * HIST_REDUNDANCY + threadOffset]); // ch1
atomic_inc(&histBuffer[1 * HIST_SIZE * HIST_REDUNDANCY + pixels.s5 * HIST_REDUNDANCY + threadOffset]); // ch1
atomic_inc(&histBuffer[2 * HIST_SIZE * HIST_REDUNDANCY + pixels.s2 * HIST_REDUNDANCY + threadOffset]); // ch2
atomic_inc(&histBuffer[2 * HIST_SIZE * HIST_REDUNDANCY + pixels.s6 * HIST_REDUNDANCY + threadOffset]); // ch2
atomic_inc(&histBuffer[3 * HIST_SIZE * HIST_REDUNDANCY + pixels.s3 * HIST_REDUNDANCY + threadOffset]); // ch3
atomic_inc(&histBuffer[3 * HIST_SIZE * HIST_REDUNDANCY + pixels.s7 * HIST_REDUNDANCY + threadOffset]); // ch3
}
})
KERNEL(
// NUM_CHANNELS = 1
__attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_HistogramRectOneChannel(__global const uchar8 *data, uint numPixels, __global uint *histBuffer) {
// declare variables
uchar8 pixels;
int threadOffset = get_global_id(0) % HIST_REDUNDANCY;
// for each pixel/channel, accumulate in global memory
for (uint pc = get_global_id(0); pc < numPixels / HR_UNROLL_SIZE; pc += get_global_size(0)) {
pixels = data[pc];
// bin thread
atomic_inc(&histBuffer[pixels.s0 * HIST_REDUNDANCY + threadOffset]);
atomic_inc(&histBuffer[pixels.s1 * HIST_REDUNDANCY + threadOffset]);
atomic_inc(&histBuffer[pixels.s2 * HIST_REDUNDANCY + threadOffset]);
atomic_inc(&histBuffer[pixels.s3 * HIST_REDUNDANCY + threadOffset]);
atomic_inc(&histBuffer[pixels.s4 * HIST_REDUNDANCY + threadOffset]);
atomic_inc(&histBuffer[pixels.s5 * HIST_REDUNDANCY + threadOffset]);
atomic_inc(&histBuffer[pixels.s6 * HIST_REDUNDANCY + threadOffset]);
atomic_inc(&histBuffer[pixels.s7 * HIST_REDUNDANCY + threadOffset]);
}
})
// HistogramRect Kernel: Reduction
// only supports 4 channels
// each work group handles a single channel of a single histogram bin
KERNEL(__attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_HistogramRectAllChannelsReduction(int n, // unused pixel redundancy
__global uint *histBuffer, __global int *histResult) {
// declare variables
int channel = get_group_id(0) / HIST_SIZE;
int bin = get_group_id(0) % HIST_SIZE;
int value = 0;
// accumulate in register
for (uint i = get_local_id(0); i < HIST_REDUNDANCY; i += GROUP_SIZE) {
value += histBuffer[channel * HIST_SIZE * HIST_REDUNDANCY + bin * HIST_REDUNDANCY + i];
}
// reduction in local memory
__local int localHist[GROUP_SIZE];
localHist[get_local_id(0)] = value;
barrier(CLK_LOCAL_MEM_FENCE);
for (int stride = GROUP_SIZE / 2; stride >= 1; stride /= 2) {
if (get_local_id(0) < stride) {
value = localHist[get_local_id(0) + stride];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (get_local_id(0) < stride) {
localHist[get_local_id(0)] += value;
}
barrier(CLK_LOCAL_MEM_FENCE);
}
// write reduction to final result
if (get_local_id(0) == 0) {
histResult[get_group_id(0)] = localHist[0];
}
} // kernel_HistogramRectAllChannels
)
KERNEL(
// NUM_CHANNELS = 1
__attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_HistogramRectOneChannelReduction(int n, // unused pixel redundancy
__global uint *histBuffer, __global int *histResult) {
// declare variables
// int channel = get_group_id(0)/HIST_SIZE;
int bin = get_group_id(0) % HIST_SIZE;
int value = 0;
// accumulate in register
for (int i = get_local_id(0); i < HIST_REDUNDANCY; i += GROUP_SIZE) {
value += histBuffer[bin * HIST_REDUNDANCY + i];
}
// reduction in local memory
__local int localHist[GROUP_SIZE];
localHist[get_local_id(0)] = value;
barrier(CLK_LOCAL_MEM_FENCE);
for (int stride = GROUP_SIZE / 2; stride >= 1; stride /= 2) {
if (get_local_id(0) < stride) {
value = localHist[get_local_id(0) + stride];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (get_local_id(0) < stride) {
localHist[get_local_id(0)] += value;
}
barrier(CLK_LOCAL_MEM_FENCE);
}
// write reduction to final result
if (get_local_id(0) == 0) {
histResult[get_group_id(0)] = localHist[0];
}
} // kernel_HistogramRectOneChannelReduction
)
// ThresholdRectToPix Kernel
// only supports 4 channels
// imageData is input image (24-bits/pixel)
// pix is output image (1-bit/pixel)
KERNEL(
\n #define CHAR_VEC_WIDTH 4 \n
\n #define PIXELS_PER_WORD 32 \n
\n #define PIXELS_PER_BURST 8 \n
\n #define BURSTS_PER_WORD (PIXELS_PER_WORD)/(PIXELS_PER_BURST) \n
\n typedef union {
uchar s[PIXELS_PER_BURST * NUM_CHANNELS];
uchar4 v[(PIXELS_PER_BURST * NUM_CHANNELS) / CHAR_VEC_WIDTH];
} charVec;
__attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_ThresholdRectToPix(__global const uchar4 *imageData, int height, int width,
int wpl, // words per line
__global int *thresholds, __global int *hi_values, __global int *pix) {
// declare variables
int pThresholds[NUM_CHANNELS];
int pHi_Values[NUM_CHANNELS];
for (int i = 0; i < NUM_CHANNELS; i++) {
pThresholds[i] = thresholds[i];
pHi_Values[i] = hi_values[i];
}
// for each word (32 pixels) in output image
for (uint w = get_global_id(0); w < wpl * height; w += get_global_size(0)) {
unsigned int word = 0; // all bits start at zero
// for each burst in word
for (int b = 0; b < BURSTS_PER_WORD; b++) {
// load burst
charVec pixels;
int offset = (w / wpl) * width;
offset += (w % wpl) * PIXELS_PER_WORD;
offset += b * PIXELS_PER_BURST;
for (int i = 0; i < PIXELS_PER_BURST; ++i)
pixels.v[i] = imageData[offset + i];
// for each pixel in burst
for (int p = 0; p < PIXELS_PER_BURST; p++) {
for (int c = 0; c < NUM_CHANNELS; c++) {
unsigned char pixChan = pixels.s[p * NUM_CHANNELS + c];
if (pHi_Values[c] >= 0 && (pixChan > pThresholds[c]) == (pHi_Values[c] == 0)) {
const uint kTopBit = 0x80000000;
word |= (kTopBit >> ((b * PIXELS_PER_BURST + p) & 31));
}
}
}
}
pix[w] = word;
}
}
\n #define CHAR_VEC_WIDTH 8 \n
\n #define PIXELS_PER_WORD 32 \n
\n #define PIXELS_PER_BURST 8 \n
\n #define BURSTS_PER_WORD (PIXELS_PER_WORD) / (PIXELS_PER_BURST) \n
\n typedef union {
uchar s[PIXELS_PER_BURST * 1];
uchar8 v[(PIXELS_PER_BURST * 1) / CHAR_VEC_WIDTH];
} charVec1;
__attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_ThresholdRectToPix_OneChan(__global const uchar8 *imageData, int height, int width,
int wpl, // words per line of output image
__global int *thresholds, __global int *hi_values, __global int *pix) {
// declare variables
int pThresholds[1];
int pHi_Values[1];
for (int i = 0; i < 1; i++) {
pThresholds[i] = thresholds[i];
pHi_Values[i] = hi_values[i];
}
// for each word (32 pixels) in output image
for (uint w = get_global_id(0); w < wpl * height; w += get_global_size(0)) {
unsigned int word = 0; // all bits start at zero
// for each burst in word
for (int b = 0; b < BURSTS_PER_WORD; b++) {
// load burst
charVec1 pixels;
// for each char8 in burst
pixels.v[0] = imageData[w * BURSTS_PER_WORD + b + 0];
// for each pixel in burst
for (int p = 0; p < PIXELS_PER_BURST; p++) {
//int littleEndianIdx = p ^ 3;
//int bigEndianIdx = p;
int idx =
\n #ifdef __ENDIAN_LITTLE__\n
p ^
3;
\n #else \n
p;
\n #endif \n unsigned char pixChan = pixels.s[idx];
if (pHi_Values[0] >= 0 && (pixChan > pThresholds[0]) == (pHi_Values[0] == 0)) {
const uint kTopBit = 0x80000000;
word |= (kTopBit >> ((b * PIXELS_PER_BURST + p) & 31));
}
}
}
pix[w] = word;
}
})
; // close char*
# endif // USE_EXTERNAL_KERNEL
#endif // TESSERACT_OPENCL_OCLKERNELS_H_
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */

File diff suppressed because it is too large Load Diff

View File

@ -1,179 +0,0 @@
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_OPENCL_OPENCLWRAPPER_H_
#define TESSERACT_OPENCL_OPENCLWRAPPER_H_
#include <allheaders.h>
#include <cstdio>
#include "pix.h"
#include "tprintf.h"
// including CL/cl.h doesn't occur until USE_OPENCL defined below
/**************************************************************************
* enable/disable use of OpenCL
**************************************************************************/
#ifdef USE_OPENCL
# ifdef __APPLE__
# include <OpenCL/cl.h>
# else
# include <CL/cl.h>
# endif
namespace tesseract {
class Image;
struct TessDeviceScore;
// device type
enum ds_device_type { DS_DEVICE_NATIVE_CPU = 0, DS_DEVICE_OPENCL_DEVICE };
struct ds_device {
ds_device_type type;
cl_device_id oclDeviceID;
char *oclDeviceName;
char *oclDriverVersion;
// a pointer to the score data, the content/format is application defined.
TessDeviceScore *score;
};
# ifndef strcasecmp
# define strcasecmp strcmp
# endif
# define MAX_KERNEL_STRING_LEN 64
# define MAX_CLFILE_NUM 50
# define MAX_CLKERNEL_NUM 200
# define MAX_KERNEL_NAME_LEN 64
# define CL_QUEUE_THREAD_HANDLE_AMD 0x403E
# define GROUPSIZE_X 16
# define GROUPSIZE_Y 16
# define GROUPSIZE_HMORX 256
# define GROUPSIZE_HMORY 1
struct KernelEnv {
cl_context mpkContext;
cl_command_queue mpkCmdQueue;
cl_program mpkProgram;
cl_kernel mpkKernel;
char mckKernelName[150];
};
struct OpenCLEnv {
cl_platform_id mpOclPlatformID;
cl_context mpOclContext;
cl_device_id mpOclDevsID;
cl_command_queue mpOclCmdQueue;
};
typedef int (*cl_kernel_function)(void **userdata, KernelEnv *kenv);
# define CHECK_OPENCL(status, name) \
if (status != CL_SUCCESS) { \
tprintf("OpenCL error code is %d at when %s .\n", status, name); \
}
struct GPUEnv {
// share vb in all modules in hb library
cl_platform_id mpPlatformID;
cl_device_type mDevType;
cl_context mpContext;
cl_device_id *mpArryDevsID;
cl_device_id mpDevID;
cl_command_queue mpCmdQueue;
cl_kernel mpArryKernels[MAX_CLFILE_NUM];
cl_program mpArryPrograms[MAX_CLFILE_NUM]; // one program object maps one
// kernel source file
char mArryKnelSrcFile[MAX_CLFILE_NUM][256], // the max len of kernel file name is 256
mArrykernelNames[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN + 1];
cl_kernel_function mpArryKnelFuncs[MAX_CLKERNEL_NUM];
int mnKernelCount, mnFileCount, // only one kernel file
mnIsUserCreated; // 1: created , 0:no create and needed to create by
// opencl wrapper
int mnKhrFp64Flag;
int mnAmdFp64Flag;
};
class OpenclDevice {
public:
static GPUEnv gpuEnv;
static int isInited;
OpenclDevice();
~OpenclDevice();
static int InitEnv(); // load dll, call InitOpenclRunEnv(0)
static int InitOpenclRunEnv(int argc); // RegistOpenclKernel, double flags, compile kernels
static int InitOpenclRunEnv_DeviceSelection(
int argc); // RegistOpenclKernel, double flags, compile kernels
static int RegistOpenclKernel();
static int ReleaseOpenclRunEnv();
static int ReleaseOpenclEnv(GPUEnv *gpuInfo);
static int CompileKernelFile(GPUEnv *gpuInfo, const char *buildOption);
static int CachedOfKernerPrg(const GPUEnv *gpuEnvCached, const char *clFileName);
static int GeneratBinFromKernelSource(cl_program program, const char *clFileName);
static int WriteBinaryToFile(const char *fileName, const char *birary, size_t numBytes);
static int BinaryGenerated(const char *clFileName, FILE **fhandle);
// static int CompileKernelFile( const char *filename, GPUEnv *gpuInfo, const
// char *buildOption );
static l_uint32 *pixReadFromTiffKernel(l_uint32 *tiffdata, l_int32 w, l_int32 h, l_int32 wpl,
l_uint32 *line);
static int composeRGBPixelCl(int *tiffdata, int *line, int h, int w);
/* OpenCL implementations of Morphological operations*/
// Initialization of OCL buffers used in Morph operations
static int initMorphCLAllocations(l_int32 wpl, l_int32 h, Image pixs);
static void releaseMorphCLBuffers();
static void pixGetLinesCL(Image pixd, Image pixs, Image *pix_vline, Image *pix_hline, Image *pixClosed,
bool getpixClosed, l_int32 close_hsize, l_int32 close_vsize,
l_int32 open_hsize, l_int32 open_vsize, l_int32 line_hsize,
l_int32 line_vsize);
// int InitOpenclAttr( OpenCLEnv * env );
// int ReleaseKernel( KernelEnv * env );
static int SetKernelEnv(KernelEnv *envInfo);
// int CreateKernel( char * kernelname, KernelEnv * env );
// int RunKernel( const char *kernelName, void **userdata );
// int ConvertToString( const char *filename, char **source );
// int CheckKernelName( KernelEnv *envInfo, const char *kernelName );
// int RegisterKernelWrapper( const char *kernelName, cl_kernel_function
// function ); int RunKernelWrapper( cl_kernel_function function, const char *
// kernelName, void **usrdata ); int GetKernelEnvAndFunc( const char
// *kernelName, KernelEnv *env, cl_kernel_function *function );
static int LoadOpencl();
# ifdef WIN32
// static int OpenclInite();
static void FreeOpenclDll();
# endif
inline static int AddKernelConfig(int kCount, const char *kName);
/* for binarization */
static int HistogramRectOCL(void *imagedata, int bytes_per_pixel, int bytes_per_line, int left,
int top, int width, int height, int kHistogramSize,
int *histogramAllChannels);
static int ThresholdRectToPixOCL(unsigned char *imagedata, int bytes_per_pixel,
int bytes_per_line, int *thresholds, int *hi_values, Image *pix,
int rect_height, int rect_width, int rect_top, int rect_left);
static ds_device getDeviceSelection();
static ds_device selectedDevice;
static bool deviceIsSelected;
static bool selectedDeviceIsOpenCL();
};
}
#endif // USE_OPENCL
#endif // TESSERACT_OPENCL_OPENCLWRAPPER_H_

View File

@ -34,9 +34,6 @@
#include <allheaders.h>
#include <tesseract/baseapi.h>
#include "dict.h"
#if defined(USE_OPENCL)
# include "openclwrapper.h" // for OpenclDevice
#endif
#include <tesseract/renderer.h>
#include "simddetect.h"
#include "tesseractclass.h" // for AnyTessLang
@ -112,34 +109,6 @@ static void PrintVersionInfo() {
printf(" %s\n", versionStrP);
lept_free(versionStrP);
#ifdef USE_OPENCL
cl_platform_id platform[4];
cl_uint num_platforms;
printf(" OpenCL info:\n");
if (clGetPlatformIDs(4, platform, &num_platforms) == CL_SUCCESS) {
printf(" Found %u platform(s).\n", num_platforms);
for (unsigned n = 0; n < num_platforms; n++) {
char info[256];
if (clGetPlatformInfo(platform[n], CL_PLATFORM_NAME, 256, info, 0) == CL_SUCCESS) {
printf(" Platform %u name: %s.\n", n + 1, info);
}
if (clGetPlatformInfo(platform[n], CL_PLATFORM_VERSION, 256, info, 0) == CL_SUCCESS) {
printf(" Version: %s.\n", info);
}
cl_device_id devices[2];
cl_uint num_devices;
if (clGetDeviceIDs(platform[n], CL_DEVICE_TYPE_ALL, 2, devices, &num_devices) == CL_SUCCESS) {
printf(" Found %u device(s).\n", num_devices);
for (unsigned i = 0; i < num_devices; ++i) {
if (clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0) == CL_SUCCESS) {
printf(" Device %u name: %s.\n", i + 1, info);
}
}
}
}
}
#endif
#if defined(HAVE_NEON) || defined(__aarch64__)
if (tesseract::SIMDDetect::IsNEONAvailable())
printf(" Found NEON\n");

View File

@ -27,9 +27,6 @@
#include "edgblob.h"
#include "linefind.h"
#include "tabvector.h"
#if defined(USE_OPENCL)
# include "openclwrapper.h" // for OpenclDevice
#endif
#include <algorithm>
@ -469,18 +466,6 @@ static void GetLineMasks(int resolution, Image src_pix, Image *pix_vline, Image
}
int closing_brick = max_line_width / 3;
// only use opencl if compiled w/ OpenCL and selected device is opencl
#ifdef USE_OPENCL
if (OpenclDevice::selectedDeviceIsOpenCL()) {
// OpenCL pixGetLines Operation
int clStatus =
OpenclDevice::initMorphCLAllocations(pixGetWpl(src_pix), pixGetHeight(src_pix), src_pix);
bool getpixclosed = pix_music_mask != nullptr;
OpenclDevice::pixGetLinesCL(nullptr, src_pix, pix_vline, pix_hline, &pix_closed, getpixclosed,
closing_brick, closing_brick, max_line_width, max_line_width,
min_line_length, min_line_length);
} else {
#endif
// Close up small holes, making it less likely that false alarms are found
// in thickened text (as it will become more solid) and also smoothing over
// some line breaks and nicks in the edges of the lines.
@ -508,9 +493,6 @@ static void GetLineMasks(int resolution, Image src_pix, Image *pix_vline, Image
*pix_hline = pixOpenBrick(nullptr, pix_hollow, min_line_length, 1);
pix_hollow.destroy();
#ifdef USE_OPENCL
}
#endif
// Lines are sufficiently rare, that it is worth checking for a zero image.
bool v_empty = pix_vline->isZero();

1
sw.cpp
View File

@ -21,7 +21,6 @@ void build(Solution &s)
libtesseract.Public += "include"_idir;
libtesseract.Protected +=
"src/opencl"_id,
"src/ccmain"_id,
"src/api"_id,
"src/dict"_id,

View File

@ -12,5 +12,5 @@ URL: https://github.com/tesseract-ocr/tesseract
Version: @VERSION@
Requires.private: lept
Libs: -L${libdir} -ltesseract @libarchive_LIBS@ @libcurl_LIBS@ @TENSORFLOW_LIBS@
Libs.private: -lpthread @OPENCL_LDFLAGS@
Libs.private: -lpthread
Cflags: -I${includedir}