mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-23 18:49:08 +08:00
Remove unsupported OpenCL code and related API functions (#4220)
Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
parent
912deb3978
commit
d5e000bc58
4
.gitignore
vendored
4
.gitignore
vendored
@ -83,10 +83,6 @@ __pycache__
|
||||
*.traineddata
|
||||
tessdata_*
|
||||
|
||||
# OpenCL
|
||||
tesseract_opencl_profile_devices.dat
|
||||
kernel*.bin
|
||||
|
||||
# build dirs
|
||||
/build*
|
||||
/*.dll
|
||||
|
@ -88,7 +88,6 @@ option(GRAPHICS_DISABLED "Disable disable graphics (ScrollView)" OFF)
|
||||
option(DISABLED_LEGACY_ENGINE "Disable the legacy OCR engine" OFF)
|
||||
option(ENABLE_LTO "Enable link-time optimization" OFF)
|
||||
option(FAST_FLOAT "Enable float for LSTM" ON)
|
||||
option(ENABLE_OPENCL "Enable unsupported experimental OpenCL support" OFF)
|
||||
option(ENABLE_NATIVE
|
||||
"Enable optimization for host CPU (could break HW compatibility)" OFF)
|
||||
# see
|
||||
@ -460,18 +459,6 @@ else()
|
||||
endif(DISABLE_CURL)
|
||||
endif()
|
||||
|
||||
if(ENABLE_OPENCL)
|
||||
find_package(OpenCL)
|
||||
if(OpenCL_FOUND)
|
||||
include_directories(${OpenCL_INCLUDE_DIRS})
|
||||
message(STATUS "OpenCL_INCLUDE_DIRS: ${OpenCL_INCLUDE_DIRS}")
|
||||
message(STATUS "OpenCL_LIBRARY: ${OpenCL_LIBRARY}")
|
||||
set(USE_OPENCL ON)
|
||||
else()
|
||||
set(USE_OPENCL OFF)
|
||||
endif(OpenCL_FOUND)
|
||||
endif(ENABLE_OPENCL)
|
||||
|
||||
# ##############################################################################
|
||||
#
|
||||
# configure
|
||||
@ -565,11 +552,6 @@ message(STATUS "Disable the legacy OCR engine [DISABLED_LEGACY_ENGINE]: "
|
||||
message(STATUS "Build training tools [BUILD_TRAINING_TOOLS]: "
|
||||
"${BUILD_TRAINING_TOOLS}")
|
||||
message(STATUS "Build tests [BUILD_TESTS]: ${BUILD_TESTS}")
|
||||
if(ENABLE_OPENCL)
|
||||
message(
|
||||
STATUS
|
||||
"Enable unsupported experimental OpenCL [ENABLE_OPENCL]: ${USE_OPENCL}")
|
||||
endif(ENABLE_OPENCL)
|
||||
message(STATUS "Use system ICU Library [USE_SYSTEM_ICU]: ${USE_SYSTEM_ICU}")
|
||||
message(
|
||||
STATUS "Install tesseract configs [INSTALL_CONFIGS]: ${INSTALL_CONFIGS}")
|
||||
@ -608,7 +590,6 @@ file(
|
||||
src/cutil/*.cpp
|
||||
src/dict/*.cpp
|
||||
src/lstm/*.cpp
|
||||
src/opencl/*.cpp
|
||||
src/textord/*.cpp
|
||||
src/viewer/*.cpp
|
||||
src/wordrec/*.cpp)
|
||||
@ -749,7 +730,6 @@ file(
|
||||
src/cutil/*.h
|
||||
src/dict/*.h
|
||||
src/lstm/*.h
|
||||
src/opencl/*.h
|
||||
src/textord/*.h
|
||||
src/viewer/*.h
|
||||
src/wordrec/*.h)
|
||||
@ -816,7 +796,6 @@ target_include_directories(
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/cutil>
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/dict>
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/lstm>
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/opencl>
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/textord>
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/viewer>
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/wordrec>
|
||||
@ -832,9 +811,6 @@ target_link_libraries(libtesseract PRIVATE ${LIB_Ws2_32} ${LIB_pthread})
|
||||
if(OpenMP_CXX_FOUND)
|
||||
target_link_libraries(libtesseract PUBLIC OpenMP::OpenMP_CXX)
|
||||
endif()
|
||||
if(OpenCL_FOUND)
|
||||
target_link_libraries(libtesseract PUBLIC OpenCL::OpenCL)
|
||||
endif()
|
||||
if(LibArchive_FOUND)
|
||||
target_link_libraries(libtesseract PUBLIC ${LibArchive_LIBRARIES})
|
||||
endif(LibArchive_FOUND)
|
||||
|
37
Makefile.am
37
Makefile.am
@ -78,7 +78,6 @@ if VISIBILITY
|
||||
AM_CPPFLAGS += -DTESS_EXPORTS
|
||||
AM_CPPFLAGS += -fvisibility=hidden -fvisibility-inlines-hidden -fPIC
|
||||
endif
|
||||
AM_CPPFLAGS += $(OPENCL_CPPFLAGS)
|
||||
|
||||
AM_CXXFLAGS = $(OPENMP_CXXFLAGS)
|
||||
|
||||
@ -94,9 +93,6 @@ libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/classify
|
||||
libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/cutil
|
||||
libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/dict
|
||||
libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/lstm
|
||||
if OPENCL
|
||||
libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/opencl
|
||||
endif
|
||||
libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/textord
|
||||
libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/training/common
|
||||
libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/viewer
|
||||
@ -104,7 +100,7 @@ libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/wordrec
|
||||
libtesseract_la_CPPFLAGS += $(libcurl_CFLAGS)
|
||||
|
||||
lib_LTLIBRARIES = libtesseract.la
|
||||
libtesseract_la_LDFLAGS = $(LEPTONICA_LIBS) $(OPENCL_LDFLAGS)
|
||||
libtesseract_la_LDFLAGS = $(LEPTONICA_LIBS)
|
||||
libtesseract_la_LDFLAGS += $(libarchive_LIBS)
|
||||
libtesseract_la_LDFLAGS += $(libcurl_LIBS)
|
||||
libtesseract_la_LDFLAGS += $(TENSORFLOW_LIBS)
|
||||
@ -127,9 +123,6 @@ libtesseract_la_SOURCES += src/api/wordstrboxrenderer.cpp
|
||||
libtesseract_la_LIBADD = libtesseract_ccutil.la
|
||||
libtesseract_la_LIBADD += libtesseract_lstm.la
|
||||
libtesseract_la_LIBADD += libtesseract_native.la
|
||||
if OPENCL
|
||||
libtesseract_la_LIBADD += libtesseract_opencl.la
|
||||
endif
|
||||
|
||||
# Rules for src/arch.
|
||||
|
||||
@ -561,23 +554,6 @@ if TENSORFLOW
|
||||
libtesseract_lstm_la_SOURCES += src/lstm/tfnetwork.pb.cc
|
||||
endif
|
||||
|
||||
# Rules for src/opencl.
|
||||
|
||||
if OPENCL
|
||||
libtesseract_opencl_la_CPPFLAGS = $(AM_CPPFLAGS)
|
||||
libtesseract_opencl_la_CPPFLAGS += $(OPENCL_CFLAGS)
|
||||
libtesseract_opencl_la_CPPFLAGS += -I$(top_srcdir)/src/ccutil
|
||||
libtesseract_opencl_la_CPPFLAGS += -I$(top_srcdir)/src/ccstruct
|
||||
libtesseract_opencl_la_CPPFLAGS += -I$(top_srcdir)/src/ccmain
|
||||
|
||||
noinst_HEADERS += src/opencl/openclwrapper.h
|
||||
noinst_HEADERS += src/opencl/oclkernels.h
|
||||
|
||||
noinst_LTLIBRARIES += libtesseract_opencl.la
|
||||
|
||||
libtesseract_opencl_la_SOURCES = src/opencl/openclwrapper.cpp
|
||||
endif
|
||||
|
||||
# Rules for src/textord.
|
||||
|
||||
noinst_HEADERS += src/textord/alignedblob.h
|
||||
@ -743,16 +719,12 @@ tesseract_CPPFLAGS += -I$(top_srcdir)/src/dict
|
||||
tesseract_CPPFLAGS += -I$(top_srcdir)/src/textord
|
||||
tesseract_CPPFLAGS += -I$(top_srcdir)/src/viewer
|
||||
tesseract_CPPFLAGS += -I$(top_srcdir)/src/wordrec
|
||||
if OPENCL
|
||||
tesseract_CPPFLAGS += -I$(top_srcdir)/src/opencl
|
||||
endif
|
||||
tesseract_CPPFLAGS += $(AM_CPPFLAGS)
|
||||
if VISIBILITY
|
||||
tesseract_CPPFLAGS += -DTESS_IMPORTS
|
||||
endif
|
||||
|
||||
tesseract_LDFLAGS = $(OPENCL_LDFLAGS)
|
||||
tesseract_LDFLAGS += $(OPENMP_CXXFLAGS)
|
||||
tesseract_LDFLAGS = $(OPENMP_CXXFLAGS)
|
||||
|
||||
tesseract_LDADD = libtesseract.la
|
||||
tesseract_LDADD += $(LEPTONICA_LIBS)
|
||||
@ -927,7 +899,6 @@ EXTRA_PROGRAMS += $(trainingtools)
|
||||
extralib = libtesseract.la
|
||||
extralib += $(libarchive_LIBS)
|
||||
extralib += $(LEPTONICA_LIBS)
|
||||
extralib += $(OPENCL_LDFLAGS)
|
||||
extralib += $(TENSORFLOW_LIBS)
|
||||
if T_WIN
|
||||
extralib += -lws2_32
|
||||
@ -1308,7 +1279,7 @@ TESTS = $(check_PROGRAMS)
|
||||
|
||||
apiexample_test_SOURCES = unittest/apiexample_test.cc
|
||||
apiexample_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
apiexample_test_LDFLAGS = $(OPENCL_LDFLAGS) $(LEPTONICA_LIBS)
|
||||
apiexample_test_LDFLAGS = $(LEPTONICA_LIBS)
|
||||
apiexample_test_LDADD = $(TESS_LIBS) $(LEPTONICA_LIBS)
|
||||
|
||||
if !DISABLED_LEGACY_ENGINE
|
||||
@ -1494,7 +1465,7 @@ endif # !DISABLED_LEGACY_ENGINE
|
||||
|
||||
progress_test_SOURCES = unittest/progress_test.cc
|
||||
progress_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
progress_test_LDFLAGS = $(OPENCL_LDFLAGS) $(LEPTONICA_LIBS)
|
||||
progress_test_LDFLAGS = $(LEPTONICA_LIBS)
|
||||
progress_test_LDADD = $(GTEST_LIBS) $(GMOCK_LIBS) $(TESS_LIBS) $(LEPTONICA_LIBS)
|
||||
|
||||
qrsequence_test_SOURCES = unittest/qrsequence_test.cc
|
||||
|
@ -91,8 +91,6 @@ set(include_files_list
|
||||
unistd.h
|
||||
|
||||
cairo/cairo-version.h
|
||||
CL/cl.h
|
||||
OpenCL/cl.h
|
||||
pango-1.0/pango/pango-features.h
|
||||
unicode/uchar.h
|
||||
)
|
||||
@ -121,7 +119,6 @@ file(APPEND ${AUTOCONFIG_SRC} "
|
||||
#cmakedefine HAVE_NEON ${HAVE_NEON}
|
||||
#cmakedefine HAVE_LIBARCHIVE ${HAVE_LIBARCHIVE}
|
||||
#cmakedefine HAVE_LIBCURL ${HAVE_LIBCURL}
|
||||
#cmakedefine USE_OPENCL ${USE_OPENCL}
|
||||
")
|
||||
|
||||
if(TESSDATA_PREFIX)
|
||||
|
@ -31,7 +31,6 @@ source_group("classify" "${SSRC}/classify/${H_CPP}")
|
||||
source_group("cutil" "${SSRC}/cutil/${H_CPP}")
|
||||
source_group("dict" "${SSRC}/dict/${H_CPP}")
|
||||
source_group("lstm" "${SSRC}/lstm/${H_CPP}")
|
||||
source_group("opencl" "${SSRC}/opencl/${H_CPP}")
|
||||
source_group("textord" "${SSRC}/textord/${H_CPP}")
|
||||
source_group("viewer" "${SSRC}/viewer/${H_CPP}")
|
||||
source_group("wordrec" "${SSRC}/wordrec/${H_CPP}")
|
||||
|
46
configure.ac
46
configure.ac
@ -79,8 +79,6 @@ AC_SUBST([AM_CPPFLAGS])
|
||||
# Can be overridden with `configure --disable-silent-rules` or with `make V=1`.
|
||||
AM_SILENT_RULES([yes])
|
||||
|
||||
OPENCL_INC="/opt/AMDAPP/include"
|
||||
OPENCL_LIBS="-lOpenCL"
|
||||
#############################
|
||||
#
|
||||
# Platform specific setup
|
||||
@ -102,15 +100,12 @@ case "${host_os}" in
|
||||
AM_CONDITIONAL([ADD_RT], true)
|
||||
;;
|
||||
*darwin*)
|
||||
OPENCL_LIBS=""
|
||||
OPENCL_INC=""
|
||||
AM_CONDITIONAL([ADD_RT], false)
|
||||
;;
|
||||
*android*|openbsd*)
|
||||
AM_CONDITIONAL([ADD_RT], false)
|
||||
;;
|
||||
powerpc-*-darwin*)
|
||||
OPENCL_LIBS=""
|
||||
;;
|
||||
*)
|
||||
# default
|
||||
@ -263,19 +258,6 @@ have_tiff=false
|
||||
# Note that the first usage of AC_CHECK_HEADERS must be unconditional.
|
||||
AC_CHECK_HEADERS([tiffio.h], [have_tiff=true], [have_tiff=false])
|
||||
|
||||
# check whether to build opencl version
|
||||
AC_MSG_CHECKING([--enable-opencl argument])
|
||||
AC_ARG_ENABLE([opencl],
|
||||
AS_HELP_STRING([--enable-opencl], [enable opencl build [default=no]]))
|
||||
AC_MSG_RESULT([$enable_opencl])
|
||||
# check for opencl header
|
||||
have_opencl=false
|
||||
if test "$enable_opencl" = "yes"; then
|
||||
AC_CHECK_HEADERS([CL/cl.h], [have_opencl=true], [
|
||||
AC_CHECK_HEADERS(OpenCL/cl.h, [have_opencl=true], [have_opencl=false])
|
||||
])
|
||||
fi
|
||||
|
||||
# Configure arguments which allow disabling some optional libraries.
|
||||
AC_ARG_WITH([archive],
|
||||
AS_HELP_STRING([--with-archive],
|
||||
@ -323,9 +305,6 @@ m4_define([MY_CHECK_FRAMEWORK],
|
||||
fi]
|
||||
)
|
||||
|
||||
have_opencl_lib=false
|
||||
OPENCL_CPPFLAGS=''
|
||||
OPENCL_LDFLAGS=''
|
||||
case "${host_os}" in
|
||||
*darwin* | *-macos10*)
|
||||
MY_CHECK_FRAMEWORK([Accelerate])
|
||||
@ -333,36 +312,11 @@ case "${host_os}" in
|
||||
AM_CPPFLAGS="-DHAVE_FRAMEWORK_ACCELERATE $AM_CPPFLAGS"
|
||||
AM_LDFLAGS="$AM_LDFLAGS -framework Accelerate"
|
||||
fi
|
||||
MY_CHECK_FRAMEWORK([OpenCL])
|
||||
if test "$enable_opencl" = "yes"; then
|
||||
if test $my_cv_framework_OpenCL = no; then
|
||||
AC_MSG_ERROR([Required OpenCL library not found!])
|
||||
fi
|
||||
AM_CPPFLAGS="-DUSE_OPENCL $AM_CPPFLAGS"
|
||||
OPENCL_CPPFLAGS=""
|
||||
OPENCL_LDFLAGS="-framework OpenCL"
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
# default
|
||||
if test "$enable_opencl" = "yes"; then
|
||||
AC_CHECK_LIB([OpenCL], [clGetPlatformIDs],
|
||||
[have_opencl_lib=true], [have_opencl_lib=false])
|
||||
if !($have_opencl); then
|
||||
AC_MSG_ERROR([Required OpenCL headers not found!])
|
||||
fi
|
||||
if !($have_opencl_lib); then
|
||||
AC_MSG_ERROR([Required OpenCL library not found!])
|
||||
fi
|
||||
AM_CPPFLAGS="-DUSE_OPENCL $AM_CPPFLAGS"
|
||||
OPENCL_CPPFLAGS="-I${OPENCL_INC}"
|
||||
OPENCL_LDFLAGS="${OPENCL_LIBS}"
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
AM_CONDITIONAL([OPENCL], [test "$enable_opencl" = "yes"])
|
||||
AC_SUBST([OPENCL_CPPFLAGS])
|
||||
AC_SUBST([OPENCL_LDFLAGS])
|
||||
|
||||
# check whether to build tesseract with -fvisibility=hidden -fvisibility-inlines-hidden
|
||||
# http://gcc.gnu.org/wiki/Visibility
|
||||
|
@ -86,15 +86,6 @@ public:
|
||||
*/
|
||||
static const char *Version();
|
||||
|
||||
/**
|
||||
* If compiled with OpenCL AND an available OpenCL
|
||||
* device is deemed faster than serial code, then
|
||||
* "device" is populated with the cl_device_id
|
||||
* and returns sizeof(cl_device_id)
|
||||
* otherwise *device=nullptr and returns 0.
|
||||
*/
|
||||
static size_t getOpenCLDevice(void **device);
|
||||
|
||||
/**
|
||||
* Set the name of the input file. Needed for training and
|
||||
* reading a UNLV zone file, and for searchable PDF output.
|
||||
|
@ -186,8 +186,6 @@ TESS_API int TessResultRendererImageNum(TessResultRenderer *renderer);
|
||||
TESS_API TessBaseAPI *TessBaseAPICreate();
|
||||
TESS_API void TessBaseAPIDelete(TessBaseAPI *handle);
|
||||
|
||||
TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI *handle, void **device);
|
||||
|
||||
TESS_API void TessBaseAPISetInputName(TessBaseAPI *handle, const char *name);
|
||||
TESS_API const char *TessBaseAPIGetInputName(TessBaseAPI *handle);
|
||||
|
||||
|
@ -41,9 +41,6 @@
|
||||
#endif
|
||||
#include "mutableiterator.h" // for MutableIterator
|
||||
#include "normalis.h" // for kBlnBaselineOffset, kBlnXHeight
|
||||
#if defined(USE_OPENCL)
|
||||
# include "openclwrapper.h" // for OpenclDevice
|
||||
#endif
|
||||
#include "pageres.h" // for PAGE_RES_IT, WERD_RES, PAGE_RES, CR_DE...
|
||||
#include "paragraphs.h" // for DetectParagraphs
|
||||
#include "params.h" // for BoolParam, IntParam, DoubleParam, Stri...
|
||||
@ -243,27 +240,6 @@ const char *TessBaseAPI::Version() {
|
||||
return TESSERACT_VERSION_STR;
|
||||
}
|
||||
|
||||
/**
|
||||
* If compiled with OpenCL AND an available OpenCL
|
||||
* device is deemed faster than serial code, then
|
||||
* "device" is populated with the cl_device_id
|
||||
* and returns sizeof(cl_device_id)
|
||||
* otherwise *device=nullptr and returns 0.
|
||||
*/
|
||||
size_t TessBaseAPI::getOpenCLDevice(void **data) {
|
||||
#ifdef USE_OPENCL
|
||||
ds_device device = OpenclDevice::getDeviceSelection();
|
||||
if (device.type == DS_DEVICE_OPENCL_DEVICE) {
|
||||
*data = new cl_device_id;
|
||||
memcpy(*data, &device.oclDeviceID, sizeof(cl_device_id));
|
||||
return sizeof(cl_device_id);
|
||||
}
|
||||
#endif
|
||||
|
||||
*data = nullptr;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the name of the input file. Needed only for training and
|
||||
* loading a UNLV zone file.
|
||||
@ -398,10 +374,6 @@ int TessBaseAPI::Init(const char *data, int data_size, const char *language, Ocr
|
||||
delete tesseract_;
|
||||
tesseract_ = nullptr;
|
||||
}
|
||||
#ifdef USE_OPENCL
|
||||
OpenclDevice od;
|
||||
od.InitEnv();
|
||||
#endif
|
||||
bool reset_classifier = true;
|
||||
if (tesseract_ == nullptr) {
|
||||
reset_classifier = false;
|
||||
|
@ -137,10 +137,6 @@ void TessBaseAPIDelete(TessBaseAPI *handle) {
|
||||
delete handle;
|
||||
}
|
||||
|
||||
size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI * /*handle*/, void **device) {
|
||||
return TessBaseAPI::getOpenCLDevice(device);
|
||||
}
|
||||
|
||||
void TessBaseAPISetInputName(TessBaseAPI *handle, const char *name) {
|
||||
handle->SetInputName(name);
|
||||
}
|
||||
|
@ -25,10 +25,6 @@
|
||||
#include "thresholder.h"
|
||||
#include "tprintf.h" // for tprintf
|
||||
|
||||
#if defined(USE_OPENCL)
|
||||
# include "openclwrapper.h" // for OpenclDevice
|
||||
#endif
|
||||
|
||||
#include <allheaders.h>
|
||||
#include <tesseract/baseapi.h> // for api->GetIntVariable()
|
||||
|
||||
@ -388,19 +384,7 @@ void ImageThresholder::OtsuThresholdRectToPix(Image src_pix, Image *out_pix) con
|
||||
|
||||
int num_channels = OtsuThreshold(src_pix, rect_left_, rect_top_, rect_width_, rect_height_,
|
||||
thresholds, hi_values);
|
||||
// only use opencl if compiled w/ OpenCL and selected device is opencl
|
||||
#ifdef USE_OPENCL
|
||||
OpenclDevice od;
|
||||
if (num_channels == 4 && od.selectedDeviceIsOpenCL() && rect_top_ == 0 && rect_left_ == 0) {
|
||||
od.ThresholdRectToPixOCL((unsigned char *)pixGetData(src_pix), num_channels,
|
||||
pixGetWpl(src_pix) * 4, &thresholds[0], &hi_values[0], out_pix /*pix_OCL*/,
|
||||
rect_height_, rect_width_, rect_top_, rect_left_);
|
||||
} else {
|
||||
#endif
|
||||
ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix);
|
||||
#ifdef USE_OPENCL
|
||||
}
|
||||
#endif
|
||||
ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix);
|
||||
}
|
||||
|
||||
/// Threshold the rectangle, taking everything except the src_pix
|
||||
|
@ -21,9 +21,6 @@
|
||||
#include <allheaders.h>
|
||||
#include <cstring>
|
||||
#include "helpers.h"
|
||||
#if defined(USE_OPENCL)
|
||||
# include "openclwrapper.h" // for OpenclDevice
|
||||
#endif
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
@ -47,90 +44,40 @@ int OtsuThreshold(Image src_pix, int left, int top, int width, int height, std::
|
||||
thresholds.resize(num_channels);
|
||||
hi_values.resize(num_channels);
|
||||
|
||||
// only use opencl if compiled w/ OpenCL and selected device is opencl
|
||||
#ifdef USE_OPENCL
|
||||
// all of channel 0 then all of channel 1...
|
||||
std::vector<int> histogramAllChannels(kHistogramSize * num_channels);
|
||||
|
||||
// Calculate Histogram on GPU
|
||||
OpenclDevice od;
|
||||
if (od.selectedDeviceIsOpenCL() && (num_channels == 1 || num_channels == 4) && top == 0 &&
|
||||
left == 0) {
|
||||
od.HistogramRectOCL(pixGetData(src_pix), num_channels, pixGetWpl(src_pix) * 4, left, top, width,
|
||||
height, kHistogramSize, &histogramAllChannels[0]);
|
||||
|
||||
// Calculate Threshold from Histogram on cpu
|
||||
for (int ch = 0; ch < num_channels; ++ch) {
|
||||
thresholds[ch] = -1;
|
||||
hi_values[ch] = -1;
|
||||
int *histogram = &histogramAllChannels[kHistogramSize * ch];
|
||||
int H;
|
||||
int best_omega_0;
|
||||
int best_t = OtsuStats(histogram, &H, &best_omega_0);
|
||||
if (best_omega_0 == 0 || best_omega_0 == H) {
|
||||
// This channel is empty.
|
||||
continue;
|
||||
}
|
||||
// To be a convincing foreground we must have a small fraction of H
|
||||
// or to be a convincing background we must have a large fraction of H.
|
||||
// In between we assume this channel contains no thresholding information.
|
||||
int hi_value = best_omega_0 < H * 0.5;
|
||||
thresholds[ch] = best_t;
|
||||
if (best_omega_0 > H * 0.75) {
|
||||
any_good_hivalue = true;
|
||||
hi_values[ch] = 0;
|
||||
} else if (best_omega_0 < H * 0.25) {
|
||||
any_good_hivalue = true;
|
||||
hi_values[ch] = 1;
|
||||
} else {
|
||||
// In case all channels are like this, keep the best of the bad lot.
|
||||
double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0;
|
||||
if (hi_dist > best_hi_dist) {
|
||||
best_hi_dist = hi_dist;
|
||||
best_hi_value = hi_value;
|
||||
best_hi_index = ch;
|
||||
}
|
||||
for (int ch = 0; ch < num_channels; ++ch) {
|
||||
thresholds[ch] = -1;
|
||||
hi_values[ch] = -1;
|
||||
// Compute the histogram of the image rectangle.
|
||||
int histogram[kHistogramSize];
|
||||
HistogramRect(src_pix, ch, left, top, width, height, histogram);
|
||||
int H;
|
||||
int best_omega_0;
|
||||
int best_t = OtsuStats(histogram, &H, &best_omega_0);
|
||||
if (best_omega_0 == 0 || best_omega_0 == H) {
|
||||
// This channel is empty.
|
||||
continue;
|
||||
}
|
||||
// To be a convincing foreground we must have a small fraction of H
|
||||
// or to be a convincing background we must have a large fraction of H.
|
||||
// In between we assume this channel contains no thresholding information.
|
||||
int hi_value = best_omega_0 < H * 0.5;
|
||||
thresholds[ch] = best_t;
|
||||
if (best_omega_0 > H * 0.75) {
|
||||
any_good_hivalue = true;
|
||||
hi_values[ch] = 0;
|
||||
} else if (best_omega_0 < H * 0.25) {
|
||||
any_good_hivalue = true;
|
||||
hi_values[ch] = 1;
|
||||
} else {
|
||||
// In case all channels are like this, keep the best of the bad lot.
|
||||
double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0;
|
||||
if (hi_dist > best_hi_dist) {
|
||||
best_hi_dist = hi_dist;
|
||||
best_hi_value = hi_value;
|
||||
best_hi_index = ch;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
#endif
|
||||
for (int ch = 0; ch < num_channels; ++ch) {
|
||||
thresholds[ch] = -1;
|
||||
hi_values[ch] = -1;
|
||||
// Compute the histogram of the image rectangle.
|
||||
int histogram[kHistogramSize];
|
||||
HistogramRect(src_pix, ch, left, top, width, height, histogram);
|
||||
int H;
|
||||
int best_omega_0;
|
||||
int best_t = OtsuStats(histogram, &H, &best_omega_0);
|
||||
if (best_omega_0 == 0 || best_omega_0 == H) {
|
||||
// This channel is empty.
|
||||
continue;
|
||||
}
|
||||
// To be a convincing foreground we must have a small fraction of H
|
||||
// or to be a convincing background we must have a large fraction of H.
|
||||
// In between we assume this channel contains no thresholding information.
|
||||
int hi_value = best_omega_0 < H * 0.5;
|
||||
thresholds[ch] = best_t;
|
||||
if (best_omega_0 > H * 0.75) {
|
||||
any_good_hivalue = true;
|
||||
hi_values[ch] = 0;
|
||||
} else if (best_omega_0 < H * 0.25) {
|
||||
any_good_hivalue = true;
|
||||
hi_values[ch] = 1;
|
||||
} else {
|
||||
// In case all channels are like this, keep the best of the bad lot.
|
||||
double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0;
|
||||
if (hi_dist > best_hi_dist) {
|
||||
best_hi_dist = hi_dist;
|
||||
best_hi_value = hi_value;
|
||||
best_hi_index = ch;
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef USE_OPENCL
|
||||
}
|
||||
#endif // USE_OPENCL
|
||||
|
||||
if (!any_good_hivalue) {
|
||||
// Use the best of the ones that were not good enough.
|
||||
|
@ -1,926 +0,0 @@
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_OPENCL_OCLKERNELS_H_
|
||||
# define TESSERACT_OPENCL_OCLKERNELS_H_
|
||||
|
||||
# ifndef USE_EXTERNAL_KERNEL
|
||||
# define KERNEL(...) # __VA_ARGS__ "\n"
|
||||
// Double precision is a default of spreadsheets
|
||||
// cl_khr_fp64: Khronos extension
|
||||
// cl_amd_fp64: AMD extension
|
||||
// use build option outside to define fp_t
|
||||
/////////////////////////////////////////////
|
||||
static const char *kernel_src = KERNEL(
|
||||
\n #ifdef KHR_DP_EXTENSION\n
|
||||
\n #pragma OPENCL EXTENSION cl_khr_fp64
|
||||
: enable\n
|
||||
\n #elif AMD_DP_EXTENSION\n
|
||||
\n #pragma OPENCL EXTENSION cl_amd_fp64
|
||||
: enable\n
|
||||
\n #else \n
|
||||
\n #endif \n
|
||||
__kernel void composeRGBPixel(__global uint *tiffdata, int w, int h, int wpl, __global uint *output) {
|
||||
int i = get_global_id(1);
|
||||
int j = get_global_id(0);
|
||||
int tiffword, rval, gval, bval;
|
||||
|
||||
//Ignore the excess
|
||||
if ((i >= h) || (j >= w))
|
||||
return;
|
||||
|
||||
tiffword = tiffdata[i * w + j];
|
||||
rval = ((tiffword)&0xff);
|
||||
gval = (((tiffword) >> 8) & 0xff);
|
||||
bval = (((tiffword) >> 16) & 0xff);
|
||||
output[i * wpl + j] = (rval << (8 * (sizeof(uint) - 1 - 0))) | (gval << (8 * (sizeof(uint) - 1 - 1))) | (bval << (8 * (sizeof(uint) - 1 - 2)));
|
||||
})
|
||||
|
||||
KERNEL(
|
||||
\n__kernel void pixSubtract_inplace(__global int *dword, __global int *sword, const int wpl, const int h) {
|
||||
const unsigned int row = get_global_id(1);
|
||||
const unsigned int col = get_global_id(0);
|
||||
const unsigned int pos = row * wpl + col;
|
||||
|
||||
//Ignore the execss
|
||||
if (row >= h || col >= wpl)
|
||||
return;
|
||||
|
||||
*(dword + pos) &= ~(*(sword + pos));
|
||||
}\n)
|
||||
|
||||
KERNEL(
|
||||
\n__kernel void morphoDilateHor_5x5(__global int *sword, __global int *dword, const int wpl, const int h) {
|
||||
const unsigned int pos = get_global_id(0);
|
||||
unsigned int prevword, nextword, currword, tempword;
|
||||
unsigned int destword;
|
||||
const int col = pos % wpl;
|
||||
|
||||
//Ignore the execss
|
||||
if (pos >= (wpl * h))
|
||||
return;
|
||||
|
||||
currword = *(sword + pos);
|
||||
destword = currword;
|
||||
|
||||
//Handle boundary conditions
|
||||
if (col == 0)
|
||||
prevword = 0;
|
||||
else
|
||||
prevword = *(sword + pos - 1);
|
||||
|
||||
if (col == (wpl - 1))
|
||||
nextword = 0;
|
||||
else
|
||||
nextword = *(sword + pos + 1);
|
||||
|
||||
//Loop unrolled
|
||||
|
||||
//1 bit to left and 1 bit to right
|
||||
//Get the max value on LHS of every pixel
|
||||
tempword = (prevword << (31)) | ((currword >> 1));
|
||||
destword |= tempword;
|
||||
//Get max value on RHS of every pixel
|
||||
tempword = (currword << 1) | (nextword >> (31));
|
||||
destword |= tempword;
|
||||
|
||||
//2 bit to left and 2 bit to right
|
||||
//Get the max value on LHS of every pixel
|
||||
tempword = (prevword << (30)) | ((currword >> 2));
|
||||
destword |= tempword;
|
||||
//Get max value on RHS of every pixel
|
||||
tempword = (currword << 2) | (nextword >> (30));
|
||||
destword |= tempword;
|
||||
|
||||
*(dword + pos) = destword;
|
||||
}\n)
|
||||
|
||||
KERNEL(
|
||||
\n__kernel void morphoDilateVer_5x5(__global int *sword, __global int *dword, const int wpl, const int h) {
|
||||
const int col = get_global_id(0);
|
||||
const int row = get_global_id(1);
|
||||
const unsigned int pos = row * wpl + col;
|
||||
unsigned int tempword;
|
||||
unsigned int destword;
|
||||
int i;
|
||||
|
||||
//Ignore the execss
|
||||
if (row >= h || col >= wpl)
|
||||
return;
|
||||
|
||||
destword = *(sword + pos);
|
||||
|
||||
//2 words above
|
||||
i = (row - 2) < 0 ? row : (row - 2);
|
||||
tempword = *(sword + i * wpl + col);
|
||||
destword |= tempword;
|
||||
|
||||
//1 word above
|
||||
i = (row - 1) < 0 ? row : (row - 1);
|
||||
tempword = *(sword + i * wpl + col);
|
||||
destword |= tempword;
|
||||
|
||||
//1 word below
|
||||
i = (row >= (h - 1)) ? row : (row + 1);
|
||||
tempword = *(sword + i * wpl + col);
|
||||
destword |= tempword;
|
||||
|
||||
//2 words below
|
||||
i = (row >= (h - 2)) ? row : (row + 2);
|
||||
tempword = *(sword + i * wpl + col);
|
||||
destword |= tempword;
|
||||
|
||||
*(dword + pos) = destword;
|
||||
}\n)
|
||||
|
||||
KERNEL(
|
||||
\n__kernel void morphoDilateHor(__global int *sword, __global int *dword, const int xp, const int xn, const int wpl, const int h) {
|
||||
const int col = get_global_id(0);
|
||||
const int row = get_global_id(1);
|
||||
const unsigned int pos = row * wpl + col;
|
||||
unsigned int parbitsxp, parbitsxn, nwords;
|
||||
unsigned int destword, tempword, lastword, currword;
|
||||
unsigned int lnextword, lprevword, rnextword, rprevword, firstword, secondword;
|
||||
int i, j, siter, eiter;
|
||||
|
||||
//Ignore the execss
|
||||
if (pos >= (wpl * h) || (xn < 1 && xp < 1))
|
||||
return;
|
||||
|
||||
currword = *(sword + pos);
|
||||
destword = currword;
|
||||
|
||||
parbitsxp = xp & 31;
|
||||
parbitsxn = xn & 31;
|
||||
nwords = xp >> 5;
|
||||
|
||||
if (parbitsxp > 0)
|
||||
nwords += 1;
|
||||
else
|
||||
parbitsxp = 31;
|
||||
|
||||
siter = (col - nwords);
|
||||
eiter = (col + nwords);
|
||||
|
||||
//Get prev word
|
||||
if (col == 0)
|
||||
firstword = 0x0;
|
||||
else
|
||||
firstword = *(sword + pos - 1);
|
||||
|
||||
//Get next word
|
||||
if (col == (wpl - 1))
|
||||
secondword = 0x0;
|
||||
else
|
||||
secondword = *(sword + pos + 1);
|
||||
|
||||
//Last partial bits on either side
|
||||
for (i = 1; i <= parbitsxp; i++) {
|
||||
//Get the max value on LHS of every pixel
|
||||
tempword = ((i == parbitsxp) && (parbitsxp != parbitsxn)) ? 0x0 : (firstword << (32 - i)) | ((currword >> i));
|
||||
|
||||
destword |= tempword;
|
||||
|
||||
//Get max value on RHS of every pixel
|
||||
tempword = (currword << i) | (secondword >> (32 - i));
|
||||
destword |= tempword;
|
||||
}
|
||||
|
||||
//Return if halfwidth <= 1 word
|
||||
if (nwords == 1) {
|
||||
if (xn == 32) {
|
||||
destword |= firstword;
|
||||
}
|
||||
if (xp == 32) {
|
||||
destword |= secondword;
|
||||
}
|
||||
|
||||
*(dword + pos) = destword;
|
||||
return;
|
||||
}
|
||||
|
||||
if (siter < 0)
|
||||
firstword = 0x0;
|
||||
else
|
||||
firstword = *(sword + row * wpl + siter);
|
||||
|
||||
if (eiter >= wpl)
|
||||
lastword = 0x0;
|
||||
else
|
||||
lastword = *(sword + row * wpl + eiter);
|
||||
|
||||
for (i = 1; i < nwords; i++) {
|
||||
//Gets LHS words
|
||||
if ((siter + i) < 0)
|
||||
secondword = 0x0;
|
||||
else
|
||||
secondword = *(sword + row * wpl + siter + i);
|
||||
|
||||
lprevword = firstword << (32 - parbitsxn) | secondword >> parbitsxn;
|
||||
|
||||
firstword = secondword;
|
||||
|
||||
if ((siter + i + 1) < 0)
|
||||
secondword = 0x0;
|
||||
else
|
||||
secondword = *(sword + row * wpl + siter + i + 1);
|
||||
|
||||
lnextword = firstword << (32 - parbitsxn) | secondword >> parbitsxn;
|
||||
|
||||
//Gets RHS words
|
||||
if ((eiter - i) >= wpl)
|
||||
firstword = 0x0;
|
||||
else
|
||||
firstword = *(sword + row * wpl + eiter - i);
|
||||
|
||||
rnextword = firstword << parbitsxp | lastword >> (32 - parbitsxp);
|
||||
|
||||
lastword = firstword;
|
||||
if ((eiter - i - 1) >= wpl)
|
||||
firstword = 0x0;
|
||||
else
|
||||
firstword = *(sword + row * wpl + eiter - i - 1);
|
||||
|
||||
rprevword = firstword << parbitsxp | lastword >> (32 - parbitsxp);
|
||||
|
||||
for (j = 1; j < 32; j++) {
|
||||
//OR LHS full words
|
||||
tempword = (lprevword << j) | (lnextword >> (32 - j));
|
||||
destword |= tempword;
|
||||
|
||||
//OR RHS full words
|
||||
tempword = (rprevword << j) | (rnextword >> (32 - j));
|
||||
destword |= tempword;
|
||||
}
|
||||
|
||||
destword |= lprevword;
|
||||
destword |= lnextword;
|
||||
destword |= rprevword;
|
||||
destword |= rnextword;
|
||||
|
||||
lastword = firstword;
|
||||
firstword = secondword;
|
||||
}
|
||||
|
||||
*(dword + pos) = destword;
|
||||
}\n)
|
||||
|
||||
KERNEL(
|
||||
\n__kernel void morphoDilateHor_32word(__global int *sword, __global int *dword, const int halfwidth, const int wpl, const int h, const char isEven) {
|
||||
const int col = get_global_id(0);
|
||||
const int row = get_global_id(1);
|
||||
const unsigned int pos = row * wpl + col;
|
||||
unsigned int prevword, nextword, currword, tempword;
|
||||
unsigned int destword;
|
||||
int i;
|
||||
|
||||
//Ignore the execss
|
||||
if (pos >= (wpl * h))
|
||||
return;
|
||||
|
||||
currword = *(sword + pos);
|
||||
destword = currword;
|
||||
|
||||
//Handle boundary conditions
|
||||
if (col == 0)
|
||||
prevword = 0;
|
||||
else
|
||||
prevword = *(sword + pos - 1);
|
||||
|
||||
if (col == (wpl - 1))
|
||||
nextword = 0;
|
||||
else
|
||||
nextword = *(sword + pos + 1);
|
||||
|
||||
for (i = 1; i <= halfwidth; i++) {
|
||||
//Get the max value on LHS of every pixel
|
||||
if (i == halfwidth && isEven) {
|
||||
tempword = 0x0;
|
||||
} else {
|
||||
tempword = (prevword << (32 - i)) | ((currword >> i));
|
||||
}
|
||||
|
||||
destword |= tempword;
|
||||
|
||||
//Get max value on RHS of every pixel
|
||||
tempword = (currword << i) | (nextword >> (32 - i));
|
||||
|
||||
destword |= tempword;
|
||||
}
|
||||
|
||||
*(dword + pos) = destword;
|
||||
}\n)
|
||||
|
||||
KERNEL(
|
||||
\n__kernel void morphoDilateVer(__global int *sword, __global int *dword, const int yp, const int wpl, const int h, const int yn) {
|
||||
const int col = get_global_id(0);
|
||||
const int row = get_global_id(1);
|
||||
const unsigned int pos = row * wpl + col;
|
||||
unsigned int tempword;
|
||||
unsigned int destword;
|
||||
int i, siter, eiter;
|
||||
|
||||
//Ignore the execss
|
||||
if (row >= h || col >= wpl)
|
||||
return;
|
||||
|
||||
destword = *(sword + pos);
|
||||
|
||||
//Set start position and end position considering the boundary conditions
|
||||
siter = (row - yn) < 0 ? 0 : (row - yn);
|
||||
eiter = (row >= (h - yp)) ? (h - 1) : (row + yp);
|
||||
|
||||
for (i = siter; i <= eiter; i++) {
|
||||
tempword = *(sword + i * wpl + col);
|
||||
|
||||
destword |= tempword;
|
||||
}
|
||||
|
||||
*(dword + pos) = destword;
|
||||
}\n)
|
||||
|
||||
KERNEL(
|
||||
\n__kernel void morphoErodeHor_5x5(__global int *sword, __global int *dword, const int wpl, const int h) {
|
||||
const unsigned int pos = get_global_id(0);
|
||||
unsigned int prevword, nextword, currword, tempword;
|
||||
unsigned int destword;
|
||||
const int col = pos % wpl;
|
||||
|
||||
//Ignore the execss
|
||||
if (pos >= (wpl * h))
|
||||
return;
|
||||
|
||||
currword = *(sword + pos);
|
||||
destword = currword;
|
||||
|
||||
//Handle boundary conditions
|
||||
if (col == 0)
|
||||
prevword = 0xffffffff;
|
||||
else
|
||||
prevword = *(sword + pos - 1);
|
||||
|
||||
if (col == (wpl - 1))
|
||||
nextword = 0xffffffff;
|
||||
else
|
||||
nextword = *(sword + pos + 1);
|
||||
|
||||
//Loop unrolled
|
||||
|
||||
//1 bit to left and 1 bit to right
|
||||
//Get the min value on LHS of every pixel
|
||||
tempword = (prevword << (31)) | ((currword >> 1));
|
||||
destword &= tempword;
|
||||
//Get min value on RHS of every pixel
|
||||
tempword = (currword << 1) | (nextword >> (31));
|
||||
destword &= tempword;
|
||||
|
||||
//2 bit to left and 2 bit to right
|
||||
//Get the min value on LHS of every pixel
|
||||
tempword = (prevword << (30)) | ((currword >> 2));
|
||||
destword &= tempword;
|
||||
//Get min value on RHS of every pixel
|
||||
tempword = (currword << 2) | (nextword >> (30));
|
||||
destword &= tempword;
|
||||
|
||||
*(dword + pos) = destword;
|
||||
}\n)
|
||||
|
||||
KERNEL(
|
||||
\n__kernel void morphoErodeVer_5x5(__global int *sword, __global int *dword, const int wpl, const int h, const int fwmask, const int lwmask) {
|
||||
const int col = get_global_id(0);
|
||||
const int row = get_global_id(1);
|
||||
const unsigned int pos = row * wpl + col;
|
||||
unsigned int tempword;
|
||||
unsigned int destword;
|
||||
int i;
|
||||
|
||||
//Ignore the execss
|
||||
if (row >= h || col >= wpl)
|
||||
return;
|
||||
|
||||
destword = *(sword + pos);
|
||||
|
||||
if (row < 2 || row >= (h - 2)) {
|
||||
destword = 0x0;
|
||||
} else {
|
||||
//2 words above
|
||||
//i = (row - 2) < 0 ? row : (row - 2);
|
||||
i = (row - 2);
|
||||
tempword = *(sword + i * wpl + col);
|
||||
destword &= tempword;
|
||||
|
||||
//1 word above
|
||||
//i = (row - 1) < 0 ? row : (row - 1);
|
||||
i = (row - 1);
|
||||
tempword = *(sword + i * wpl + col);
|
||||
destword &= tempword;
|
||||
|
||||
//1 word below
|
||||
//i = (row >= (h - 1)) ? row : (row + 1);
|
||||
i = (row + 1);
|
||||
tempword = *(sword + i * wpl + col);
|
||||
destword &= tempword;
|
||||
|
||||
//2 words below
|
||||
//i = (row >= (h - 2)) ? row : (row + 2);
|
||||
i = (row + 2);
|
||||
tempword = *(sword + i * wpl + col);
|
||||
destword &= tempword;
|
||||
|
||||
if (col == 0) {
|
||||
destword &= fwmask;
|
||||
}
|
||||
if (col == (wpl - 1)) {
|
||||
destword &= lwmask;
|
||||
}
|
||||
}
|
||||
|
||||
*(dword + pos) = destword;
|
||||
}\n)
|
||||
|
||||
KERNEL(
|
||||
\n__kernel void morphoErodeHor(__global int *sword, __global int *dword, const int xp, const int xn, const int wpl, const int h, const char isAsymmetric, const int rwmask, const int lwmask) {
|
||||
const int col = get_global_id(0);
|
||||
const int row = get_global_id(1);
|
||||
const unsigned int pos = row * wpl + col;
|
||||
unsigned int parbitsxp, parbitsxn, nwords;
|
||||
unsigned int destword, tempword, lastword, currword;
|
||||
unsigned int lnextword, lprevword, rnextword, rprevword, firstword, secondword;
|
||||
int i, j, siter, eiter;
|
||||
|
||||
//Ignore the execss
|
||||
if (pos >= (wpl * h) || (xn < 1 && xp < 1))
|
||||
return;
|
||||
|
||||
currword = *(sword + pos);
|
||||
destword = currword;
|
||||
|
||||
parbitsxp = xp & 31;
|
||||
parbitsxn = xn & 31;
|
||||
nwords = xp >> 5;
|
||||
|
||||
if (parbitsxp > 0)
|
||||
nwords += 1;
|
||||
else
|
||||
parbitsxp = 31;
|
||||
|
||||
siter = (col - nwords);
|
||||
eiter = (col + nwords);
|
||||
|
||||
//Get prev word
|
||||
if (col == 0)
|
||||
firstword = 0xffffffff;
|
||||
else
|
||||
firstword = *(sword + pos - 1);
|
||||
|
||||
//Get next word
|
||||
if (col == (wpl - 1))
|
||||
secondword = 0xffffffff;
|
||||
else
|
||||
secondword = *(sword + pos + 1);
|
||||
|
||||
//Last partial bits on either side
|
||||
for (i = 1; i <= parbitsxp; i++) {
|
||||
//Get the max value on LHS of every pixel
|
||||
tempword = (firstword << (32 - i)) | ((currword >> i));
|
||||
destword &= tempword;
|
||||
|
||||
//Get max value on RHS of every pixel
|
||||
tempword = ((i == parbitsxp) && (parbitsxp != parbitsxn)) ? 0xffffffff : (currword << i) | (secondword >> (32 - i));
|
||||
|
||||
//tempword = (currword << i) | (secondword >> (32 - i));
|
||||
destword &= tempword;
|
||||
}
|
||||
|
||||
//Return if halfwidth <= 1 word
|
||||
if (nwords == 1) {
|
||||
if (xp == 32) {
|
||||
destword &= firstword;
|
||||
}
|
||||
if (xn == 32) {
|
||||
destword &= secondword;
|
||||
}
|
||||
|
||||
//Clear boundary pixels
|
||||
if (isAsymmetric) {
|
||||
if (col == 0)
|
||||
destword &= rwmask;
|
||||
if (col == (wpl - 1))
|
||||
destword &= lwmask;
|
||||
}
|
||||
|
||||
*(dword + pos) = destword;
|
||||
return;
|
||||
}
|
||||
|
||||
if (siter < 0)
|
||||
firstword = 0xffffffff;
|
||||
else
|
||||
firstword = *(sword + row * wpl + siter);
|
||||
|
||||
if (eiter >= wpl)
|
||||
lastword = 0xffffffff;
|
||||
else
|
||||
lastword = *(sword + row * wpl + eiter);
|
||||
|
||||
for (i = 1; i < nwords; i++) {
|
||||
//Gets LHS words
|
||||
if ((siter + i) < 0)
|
||||
secondword = 0xffffffff;
|
||||
else
|
||||
secondword = *(sword + row * wpl + siter + i);
|
||||
|
||||
lprevword = firstword << (32 - parbitsxp) | secondword >> (parbitsxp);
|
||||
|
||||
firstword = secondword;
|
||||
|
||||
if ((siter + i + 1) < 0)
|
||||
secondword = 0xffffffff;
|
||||
else
|
||||
secondword = *(sword + row * wpl + siter + i + 1);
|
||||
|
||||
lnextword = firstword << (32 - parbitsxp) | secondword >> (parbitsxp);
|
||||
|
||||
//Gets RHS words
|
||||
if ((eiter - i) >= wpl)
|
||||
firstword = 0xffffffff;
|
||||
else
|
||||
firstword = *(sword + row * wpl + eiter - i);
|
||||
|
||||
rnextword = firstword << parbitsxn | lastword >> (32 - parbitsxn);
|
||||
|
||||
lastword = firstword;
|
||||
if ((eiter - i - 1) >= wpl)
|
||||
firstword = 0xffffffff;
|
||||
else
|
||||
firstword = *(sword + row * wpl + eiter - i - 1);
|
||||
|
||||
rprevword = firstword << parbitsxn | lastword >> (32 - parbitsxn);
|
||||
|
||||
for (j = 0; j < 32; j++) {
|
||||
//OR LHS full words
|
||||
tempword = (lprevword << j) | (lnextword >> (32 - j));
|
||||
destword &= tempword;
|
||||
|
||||
//OR RHS full words
|
||||
tempword = (rprevword << j) | (rnextword >> (32 - j));
|
||||
destword &= tempword;
|
||||
}
|
||||
|
||||
destword &= lprevword;
|
||||
destword &= lnextword;
|
||||
destword &= rprevword;
|
||||
destword &= rnextword;
|
||||
|
||||
lastword = firstword;
|
||||
firstword = secondword;
|
||||
}
|
||||
|
||||
if (isAsymmetric) {
|
||||
//Clear boundary pixels
|
||||
if (col < (nwords - 1))
|
||||
destword = 0x0;
|
||||
else if (col == (nwords - 1))
|
||||
destword &= rwmask;
|
||||
else if (col > (wpl - nwords))
|
||||
destword = 0x0;
|
||||
else if (col == (wpl - nwords))
|
||||
destword &= lwmask;
|
||||
}
|
||||
|
||||
*(dword + pos) = destword;
|
||||
}\n)
|
||||
|
||||
KERNEL(
|
||||
\n__kernel void morphoErodeHor_32word(__global int *sword, __global int *dword, const int halfwidth, const int wpl, const int h, const char clearBoundPixH, const int rwmask, const int lwmask, const char isEven) {
|
||||
const int col = get_global_id(0);
|
||||
const int row = get_global_id(1);
|
||||
const unsigned int pos = row * wpl + col;
|
||||
unsigned int prevword, nextword, currword, tempword, destword;
|
||||
int i;
|
||||
|
||||
//Ignore the execss
|
||||
if (pos >= (wpl * h))
|
||||
return;
|
||||
|
||||
currword = *(sword + pos);
|
||||
destword = currword;
|
||||
|
||||
//Handle boundary conditions
|
||||
if (col == 0)
|
||||
prevword = 0xffffffff;
|
||||
else
|
||||
prevword = *(sword + pos - 1);
|
||||
|
||||
if (col == (wpl - 1))
|
||||
nextword = 0xffffffff;
|
||||
else
|
||||
nextword = *(sword + pos + 1);
|
||||
|
||||
for (i = 1; i <= halfwidth; i++) {
|
||||
//Get the min value on LHS of every pixel
|
||||
tempword = (prevword << (32 - i)) | ((currword >> i));
|
||||
|
||||
destword &= tempword;
|
||||
|
||||
//Get min value on RHS of every pixel
|
||||
if (i == halfwidth && isEven) {
|
||||
tempword = 0xffffffff;
|
||||
} else {
|
||||
tempword = (currword << i) | (nextword >> (32 - i));
|
||||
}
|
||||
|
||||
destword &= tempword;
|
||||
}
|
||||
|
||||
if (clearBoundPixH) {
|
||||
if (col == 0) {
|
||||
destword &= rwmask;
|
||||
} else if (col == (wpl - 1)) {
|
||||
destword &= lwmask;
|
||||
}
|
||||
}
|
||||
|
||||
*(dword + pos) = destword;
|
||||
}\n)
|
||||
|
||||
KERNEL(
|
||||
\n__kernel void morphoErodeVer(__global int *sword, __global int *dword, const int yp, const int wpl, const int h, const char clearBoundPixV, const int yn) {
|
||||
const int col = get_global_id(0);
|
||||
const int row = get_global_id(1);
|
||||
const unsigned int pos = row * wpl + col;
|
||||
unsigned int tempword, destword;
|
||||
int i, siter, eiter;
|
||||
|
||||
//Ignore the execss
|
||||
if (row >= h || col >= wpl)
|
||||
return;
|
||||
|
||||
destword = *(sword + pos);
|
||||
|
||||
//Set start position and end position considering the boundary conditions
|
||||
siter = (row - yp) < 0 ? 0 : (row - yp);
|
||||
eiter = (row >= (h - yn)) ? (h - 1) : (row + yn);
|
||||
|
||||
for (i = siter; i <= eiter; i++) {
|
||||
tempword = *(sword + i * wpl + col);
|
||||
|
||||
destword &= tempword;
|
||||
}
|
||||
|
||||
//Clear boundary pixels
|
||||
if (clearBoundPixV && ((row < yp) || ((h - row) <= yn))) {
|
||||
destword = 0x0;
|
||||
}
|
||||
|
||||
*(dword + pos) = destword;
|
||||
}\n)
|
||||
|
||||
// HistogramRect Kernel: Accumulate
|
||||
// assumes 4 channels, i.e., bytes_per_pixel = 4
|
||||
// assumes number of pixels is multiple of 8
|
||||
// data is laid out as
|
||||
// ch0 ch1 ...
|
||||
// bin0 bin1 bin2... bin0...
|
||||
// rpt0,1,2...256 rpt0,1,2...
|
||||
KERNEL(
|
||||
\n #define HIST_REDUNDANCY 256\n
|
||||
\n #define GROUP_SIZE 256\n
|
||||
\n #define HIST_SIZE 256\n
|
||||
\n #define NUM_CHANNELS 4\n
|
||||
\n #define HR_UNROLL_SIZE 8 \n
|
||||
\n #define HR_UNROLL_TYPE uchar8 \n
|
||||
|
||||
__attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_HistogramRectAllChannels(__global const uchar8 *data, uint numPixels, __global uint *histBuffer) {
|
||||
// declare variables
|
||||
uchar8 pixels;
|
||||
int threadOffset = get_global_id(0) % HIST_REDUNDANCY;
|
||||
|
||||
// for each pixel/channel, accumulate in global memory
|
||||
for (uint pc = get_global_id(0); pc < numPixels * NUM_CHANNELS / HR_UNROLL_SIZE; pc += get_global_size(0)) {
|
||||
pixels = data[pc];
|
||||
// channel bin thread
|
||||
atomic_inc(&histBuffer[0 * HIST_SIZE * HIST_REDUNDANCY + pixels.s0 * HIST_REDUNDANCY + threadOffset]); // ch0
|
||||
atomic_inc(&histBuffer[0 * HIST_SIZE * HIST_REDUNDANCY + pixels.s4 * HIST_REDUNDANCY + threadOffset]); // ch0
|
||||
atomic_inc(&histBuffer[1 * HIST_SIZE * HIST_REDUNDANCY + pixels.s1 * HIST_REDUNDANCY + threadOffset]); // ch1
|
||||
atomic_inc(&histBuffer[1 * HIST_SIZE * HIST_REDUNDANCY + pixels.s5 * HIST_REDUNDANCY + threadOffset]); // ch1
|
||||
atomic_inc(&histBuffer[2 * HIST_SIZE * HIST_REDUNDANCY + pixels.s2 * HIST_REDUNDANCY + threadOffset]); // ch2
|
||||
atomic_inc(&histBuffer[2 * HIST_SIZE * HIST_REDUNDANCY + pixels.s6 * HIST_REDUNDANCY + threadOffset]); // ch2
|
||||
atomic_inc(&histBuffer[3 * HIST_SIZE * HIST_REDUNDANCY + pixels.s3 * HIST_REDUNDANCY + threadOffset]); // ch3
|
||||
atomic_inc(&histBuffer[3 * HIST_SIZE * HIST_REDUNDANCY + pixels.s7 * HIST_REDUNDANCY + threadOffset]); // ch3
|
||||
}
|
||||
})
|
||||
|
||||
KERNEL(
|
||||
// NUM_CHANNELS = 1
|
||||
__attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_HistogramRectOneChannel(__global const uchar8 *data, uint numPixels, __global uint *histBuffer) {
|
||||
// declare variables
|
||||
uchar8 pixels;
|
||||
int threadOffset = get_global_id(0) % HIST_REDUNDANCY;
|
||||
|
||||
// for each pixel/channel, accumulate in global memory
|
||||
for (uint pc = get_global_id(0); pc < numPixels / HR_UNROLL_SIZE; pc += get_global_size(0)) {
|
||||
pixels = data[pc];
|
||||
// bin thread
|
||||
atomic_inc(&histBuffer[pixels.s0 * HIST_REDUNDANCY + threadOffset]);
|
||||
atomic_inc(&histBuffer[pixels.s1 * HIST_REDUNDANCY + threadOffset]);
|
||||
atomic_inc(&histBuffer[pixels.s2 * HIST_REDUNDANCY + threadOffset]);
|
||||
atomic_inc(&histBuffer[pixels.s3 * HIST_REDUNDANCY + threadOffset]);
|
||||
atomic_inc(&histBuffer[pixels.s4 * HIST_REDUNDANCY + threadOffset]);
|
||||
atomic_inc(&histBuffer[pixels.s5 * HIST_REDUNDANCY + threadOffset]);
|
||||
atomic_inc(&histBuffer[pixels.s6 * HIST_REDUNDANCY + threadOffset]);
|
||||
atomic_inc(&histBuffer[pixels.s7 * HIST_REDUNDANCY + threadOffset]);
|
||||
}
|
||||
})
|
||||
|
||||
// HistogramRect Kernel: Reduction
|
||||
// only supports 4 channels
|
||||
// each work group handles a single channel of a single histogram bin
|
||||
KERNEL(__attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_HistogramRectAllChannelsReduction(int n, // unused pixel redundancy
|
||||
__global uint *histBuffer, __global int *histResult) {
|
||||
// declare variables
|
||||
int channel = get_group_id(0) / HIST_SIZE;
|
||||
int bin = get_group_id(0) % HIST_SIZE;
|
||||
int value = 0;
|
||||
|
||||
// accumulate in register
|
||||
for (uint i = get_local_id(0); i < HIST_REDUNDANCY; i += GROUP_SIZE) {
|
||||
value += histBuffer[channel * HIST_SIZE * HIST_REDUNDANCY + bin * HIST_REDUNDANCY + i];
|
||||
}
|
||||
|
||||
// reduction in local memory
|
||||
__local int localHist[GROUP_SIZE];
|
||||
localHist[get_local_id(0)] = value;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
for (int stride = GROUP_SIZE / 2; stride >= 1; stride /= 2) {
|
||||
if (get_local_id(0) < stride) {
|
||||
value = localHist[get_local_id(0) + stride];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (get_local_id(0) < stride) {
|
||||
localHist[get_local_id(0)] += value;
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
// write reduction to final result
|
||||
if (get_local_id(0) == 0) {
|
||||
histResult[get_group_id(0)] = localHist[0];
|
||||
}
|
||||
} // kernel_HistogramRectAllChannels
|
||||
)
|
||||
|
||||
KERNEL(
|
||||
// NUM_CHANNELS = 1
|
||||
__attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_HistogramRectOneChannelReduction(int n, // unused pixel redundancy
|
||||
__global uint *histBuffer, __global int *histResult) {
|
||||
// declare variables
|
||||
// int channel = get_group_id(0)/HIST_SIZE;
|
||||
int bin = get_group_id(0) % HIST_SIZE;
|
||||
int value = 0;
|
||||
|
||||
// accumulate in register
|
||||
for (int i = get_local_id(0); i < HIST_REDUNDANCY; i += GROUP_SIZE) {
|
||||
value += histBuffer[bin * HIST_REDUNDANCY + i];
|
||||
}
|
||||
|
||||
// reduction in local memory
|
||||
__local int localHist[GROUP_SIZE];
|
||||
localHist[get_local_id(0)] = value;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
for (int stride = GROUP_SIZE / 2; stride >= 1; stride /= 2) {
|
||||
if (get_local_id(0) < stride) {
|
||||
value = localHist[get_local_id(0) + stride];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (get_local_id(0) < stride) {
|
||||
localHist[get_local_id(0)] += value;
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
// write reduction to final result
|
||||
if (get_local_id(0) == 0) {
|
||||
histResult[get_group_id(0)] = localHist[0];
|
||||
}
|
||||
} // kernel_HistogramRectOneChannelReduction
|
||||
)
|
||||
|
||||
// ThresholdRectToPix Kernel
|
||||
// only supports 4 channels
|
||||
// imageData is input image (24-bits/pixel)
|
||||
// pix is output image (1-bit/pixel)
|
||||
KERNEL(
|
||||
\n #define CHAR_VEC_WIDTH 4 \n
|
||||
\n #define PIXELS_PER_WORD 32 \n
|
||||
\n #define PIXELS_PER_BURST 8 \n
|
||||
\n #define BURSTS_PER_WORD (PIXELS_PER_WORD)/(PIXELS_PER_BURST) \n
|
||||
\n typedef union {
|
||||
uchar s[PIXELS_PER_BURST * NUM_CHANNELS];
|
||||
uchar4 v[(PIXELS_PER_BURST * NUM_CHANNELS) / CHAR_VEC_WIDTH];
|
||||
} charVec;
|
||||
|
||||
__attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_ThresholdRectToPix(__global const uchar4 *imageData, int height, int width,
|
||||
int wpl, // words per line
|
||||
__global int *thresholds, __global int *hi_values, __global int *pix) {
|
||||
// declare variables
|
||||
int pThresholds[NUM_CHANNELS];
|
||||
int pHi_Values[NUM_CHANNELS];
|
||||
for (int i = 0; i < NUM_CHANNELS; i++) {
|
||||
pThresholds[i] = thresholds[i];
|
||||
pHi_Values[i] = hi_values[i];
|
||||
}
|
||||
|
||||
// for each word (32 pixels) in output image
|
||||
for (uint w = get_global_id(0); w < wpl * height; w += get_global_size(0)) {
|
||||
unsigned int word = 0; // all bits start at zero
|
||||
// for each burst in word
|
||||
for (int b = 0; b < BURSTS_PER_WORD; b++) {
|
||||
// load burst
|
||||
charVec pixels;
|
||||
int offset = (w / wpl) * width;
|
||||
offset += (w % wpl) * PIXELS_PER_WORD;
|
||||
offset += b * PIXELS_PER_BURST;
|
||||
|
||||
for (int i = 0; i < PIXELS_PER_BURST; ++i)
|
||||
pixels.v[i] = imageData[offset + i];
|
||||
|
||||
// for each pixel in burst
|
||||
for (int p = 0; p < PIXELS_PER_BURST; p++) {
|
||||
for (int c = 0; c < NUM_CHANNELS; c++) {
|
||||
unsigned char pixChan = pixels.s[p * NUM_CHANNELS + c];
|
||||
if (pHi_Values[c] >= 0 && (pixChan > pThresholds[c]) == (pHi_Values[c] == 0)) {
|
||||
const uint kTopBit = 0x80000000;
|
||||
word |= (kTopBit >> ((b * PIXELS_PER_BURST + p) & 31));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
pix[w] = word;
|
||||
}
|
||||
}
|
||||
|
||||
\n #define CHAR_VEC_WIDTH 8 \n
|
||||
\n #define PIXELS_PER_WORD 32 \n
|
||||
\n #define PIXELS_PER_BURST 8 \n
|
||||
\n #define BURSTS_PER_WORD (PIXELS_PER_WORD) / (PIXELS_PER_BURST) \n
|
||||
\n typedef union {
|
||||
uchar s[PIXELS_PER_BURST * 1];
|
||||
uchar8 v[(PIXELS_PER_BURST * 1) / CHAR_VEC_WIDTH];
|
||||
} charVec1;
|
||||
|
||||
__attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_ThresholdRectToPix_OneChan(__global const uchar8 *imageData, int height, int width,
|
||||
int wpl, // words per line of output image
|
||||
__global int *thresholds, __global int *hi_values, __global int *pix) {
|
||||
// declare variables
|
||||
int pThresholds[1];
|
||||
int pHi_Values[1];
|
||||
for (int i = 0; i < 1; i++) {
|
||||
pThresholds[i] = thresholds[i];
|
||||
pHi_Values[i] = hi_values[i];
|
||||
}
|
||||
|
||||
// for each word (32 pixels) in output image
|
||||
for (uint w = get_global_id(0); w < wpl * height; w += get_global_size(0)) {
|
||||
unsigned int word = 0; // all bits start at zero
|
||||
|
||||
// for each burst in word
|
||||
for (int b = 0; b < BURSTS_PER_WORD; b++) {
|
||||
// load burst
|
||||
charVec1 pixels;
|
||||
// for each char8 in burst
|
||||
pixels.v[0] = imageData[w * BURSTS_PER_WORD + b + 0];
|
||||
|
||||
// for each pixel in burst
|
||||
for (int p = 0; p < PIXELS_PER_BURST; p++) {
|
||||
//int littleEndianIdx = p ^ 3;
|
||||
//int bigEndianIdx = p;
|
||||
int idx =
|
||||
\n #ifdef __ENDIAN_LITTLE__\n
|
||||
p ^
|
||||
3;
|
||||
\n #else \n
|
||||
p;
|
||||
\n #endif \n unsigned char pixChan = pixels.s[idx];
|
||||
if (pHi_Values[0] >= 0 && (pixChan > pThresholds[0]) == (pHi_Values[0] == 0)) {
|
||||
const uint kTopBit = 0x80000000;
|
||||
word |= (kTopBit >> ((b * PIXELS_PER_BURST + p) & 31));
|
||||
}
|
||||
}
|
||||
}
|
||||
pix[w] = word;
|
||||
}
|
||||
})
|
||||
|
||||
; // close char*
|
||||
|
||||
# endif // USE_EXTERNAL_KERNEL
|
||||
#endif // TESSERACT_OPENCL_OCLKERNELS_H_
|
||||
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|
File diff suppressed because it is too large
Load Diff
@ -1,179 +0,0 @@
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_OPENCL_OPENCLWRAPPER_H_
|
||||
#define TESSERACT_OPENCL_OPENCLWRAPPER_H_
|
||||
|
||||
#include <allheaders.h>
|
||||
#include <cstdio>
|
||||
#include "pix.h"
|
||||
#include "tprintf.h"
|
||||
|
||||
// including CL/cl.h doesn't occur until USE_OPENCL defined below
|
||||
|
||||
/**************************************************************************
|
||||
* enable/disable use of OpenCL
|
||||
**************************************************************************/
|
||||
|
||||
#ifdef USE_OPENCL
|
||||
|
||||
# ifdef __APPLE__
|
||||
# include <OpenCL/cl.h>
|
||||
# else
|
||||
# include <CL/cl.h>
|
||||
# endif
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class Image;
|
||||
struct TessDeviceScore;
|
||||
|
||||
// device type
|
||||
enum ds_device_type { DS_DEVICE_NATIVE_CPU = 0, DS_DEVICE_OPENCL_DEVICE };
|
||||
|
||||
struct ds_device {
|
||||
ds_device_type type;
|
||||
cl_device_id oclDeviceID;
|
||||
char *oclDeviceName;
|
||||
char *oclDriverVersion;
|
||||
// a pointer to the score data, the content/format is application defined.
|
||||
TessDeviceScore *score;
|
||||
};
|
||||
|
||||
# ifndef strcasecmp
|
||||
# define strcasecmp strcmp
|
||||
# endif
|
||||
|
||||
# define MAX_KERNEL_STRING_LEN 64
|
||||
# define MAX_CLFILE_NUM 50
|
||||
# define MAX_CLKERNEL_NUM 200
|
||||
# define MAX_KERNEL_NAME_LEN 64
|
||||
# define CL_QUEUE_THREAD_HANDLE_AMD 0x403E
|
||||
# define GROUPSIZE_X 16
|
||||
# define GROUPSIZE_Y 16
|
||||
# define GROUPSIZE_HMORX 256
|
||||
# define GROUPSIZE_HMORY 1
|
||||
|
||||
struct KernelEnv {
|
||||
cl_context mpkContext;
|
||||
cl_command_queue mpkCmdQueue;
|
||||
cl_program mpkProgram;
|
||||
cl_kernel mpkKernel;
|
||||
char mckKernelName[150];
|
||||
};
|
||||
|
||||
struct OpenCLEnv {
|
||||
cl_platform_id mpOclPlatformID;
|
||||
cl_context mpOclContext;
|
||||
cl_device_id mpOclDevsID;
|
||||
cl_command_queue mpOclCmdQueue;
|
||||
};
|
||||
typedef int (*cl_kernel_function)(void **userdata, KernelEnv *kenv);
|
||||
|
||||
# define CHECK_OPENCL(status, name) \
|
||||
if (status != CL_SUCCESS) { \
|
||||
tprintf("OpenCL error code is %d at when %s .\n", status, name); \
|
||||
}
|
||||
|
||||
struct GPUEnv {
|
||||
// share vb in all modules in hb library
|
||||
cl_platform_id mpPlatformID;
|
||||
cl_device_type mDevType;
|
||||
cl_context mpContext;
|
||||
cl_device_id *mpArryDevsID;
|
||||
cl_device_id mpDevID;
|
||||
cl_command_queue mpCmdQueue;
|
||||
cl_kernel mpArryKernels[MAX_CLFILE_NUM];
|
||||
cl_program mpArryPrograms[MAX_CLFILE_NUM]; // one program object maps one
|
||||
// kernel source file
|
||||
char mArryKnelSrcFile[MAX_CLFILE_NUM][256], // the max len of kernel file name is 256
|
||||
mArrykernelNames[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN + 1];
|
||||
cl_kernel_function mpArryKnelFuncs[MAX_CLKERNEL_NUM];
|
||||
int mnKernelCount, mnFileCount, // only one kernel file
|
||||
mnIsUserCreated; // 1: created , 0:no create and needed to create by
|
||||
// opencl wrapper
|
||||
int mnKhrFp64Flag;
|
||||
int mnAmdFp64Flag;
|
||||
};
|
||||
|
||||
class OpenclDevice {
|
||||
public:
|
||||
static GPUEnv gpuEnv;
|
||||
static int isInited;
|
||||
OpenclDevice();
|
||||
~OpenclDevice();
|
||||
static int InitEnv(); // load dll, call InitOpenclRunEnv(0)
|
||||
static int InitOpenclRunEnv(int argc); // RegistOpenclKernel, double flags, compile kernels
|
||||
static int InitOpenclRunEnv_DeviceSelection(
|
||||
int argc); // RegistOpenclKernel, double flags, compile kernels
|
||||
static int RegistOpenclKernel();
|
||||
static int ReleaseOpenclRunEnv();
|
||||
static int ReleaseOpenclEnv(GPUEnv *gpuInfo);
|
||||
static int CompileKernelFile(GPUEnv *gpuInfo, const char *buildOption);
|
||||
static int CachedOfKernerPrg(const GPUEnv *gpuEnvCached, const char *clFileName);
|
||||
static int GeneratBinFromKernelSource(cl_program program, const char *clFileName);
|
||||
static int WriteBinaryToFile(const char *fileName, const char *birary, size_t numBytes);
|
||||
static int BinaryGenerated(const char *clFileName, FILE **fhandle);
|
||||
// static int CompileKernelFile( const char *filename, GPUEnv *gpuInfo, const
|
||||
// char *buildOption );
|
||||
static l_uint32 *pixReadFromTiffKernel(l_uint32 *tiffdata, l_int32 w, l_int32 h, l_int32 wpl,
|
||||
l_uint32 *line);
|
||||
static int composeRGBPixelCl(int *tiffdata, int *line, int h, int w);
|
||||
|
||||
/* OpenCL implementations of Morphological operations*/
|
||||
|
||||
// Initialization of OCL buffers used in Morph operations
|
||||
static int initMorphCLAllocations(l_int32 wpl, l_int32 h, Image pixs);
|
||||
static void releaseMorphCLBuffers();
|
||||
|
||||
static void pixGetLinesCL(Image pixd, Image pixs, Image *pix_vline, Image *pix_hline, Image *pixClosed,
|
||||
bool getpixClosed, l_int32 close_hsize, l_int32 close_vsize,
|
||||
l_int32 open_hsize, l_int32 open_vsize, l_int32 line_hsize,
|
||||
l_int32 line_vsize);
|
||||
|
||||
// int InitOpenclAttr( OpenCLEnv * env );
|
||||
// int ReleaseKernel( KernelEnv * env );
|
||||
static int SetKernelEnv(KernelEnv *envInfo);
|
||||
// int CreateKernel( char * kernelname, KernelEnv * env );
|
||||
// int RunKernel( const char *kernelName, void **userdata );
|
||||
// int ConvertToString( const char *filename, char **source );
|
||||
// int CheckKernelName( KernelEnv *envInfo, const char *kernelName );
|
||||
// int RegisterKernelWrapper( const char *kernelName, cl_kernel_function
|
||||
// function ); int RunKernelWrapper( cl_kernel_function function, const char *
|
||||
// kernelName, void **usrdata ); int GetKernelEnvAndFunc( const char
|
||||
// *kernelName, KernelEnv *env, cl_kernel_function *function );
|
||||
|
||||
static int LoadOpencl();
|
||||
# ifdef WIN32
|
||||
// static int OpenclInite();
|
||||
static void FreeOpenclDll();
|
||||
# endif
|
||||
|
||||
inline static int AddKernelConfig(int kCount, const char *kName);
|
||||
|
||||
/* for binarization */
|
||||
static int HistogramRectOCL(void *imagedata, int bytes_per_pixel, int bytes_per_line, int left,
|
||||
int top, int width, int height, int kHistogramSize,
|
||||
int *histogramAllChannels);
|
||||
|
||||
static int ThresholdRectToPixOCL(unsigned char *imagedata, int bytes_per_pixel,
|
||||
int bytes_per_line, int *thresholds, int *hi_values, Image *pix,
|
||||
int rect_height, int rect_width, int rect_top, int rect_left);
|
||||
|
||||
static ds_device getDeviceSelection();
|
||||
static ds_device selectedDevice;
|
||||
static bool deviceIsSelected;
|
||||
static bool selectedDeviceIsOpenCL();
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // USE_OPENCL
|
||||
#endif // TESSERACT_OPENCL_OPENCLWRAPPER_H_
|
@ -34,9 +34,6 @@
|
||||
#include <allheaders.h>
|
||||
#include <tesseract/baseapi.h>
|
||||
#include "dict.h"
|
||||
#if defined(USE_OPENCL)
|
||||
# include "openclwrapper.h" // for OpenclDevice
|
||||
#endif
|
||||
#include <tesseract/renderer.h>
|
||||
#include "simddetect.h"
|
||||
#include "tesseractclass.h" // for AnyTessLang
|
||||
@ -112,34 +109,6 @@ static void PrintVersionInfo() {
|
||||
printf(" %s\n", versionStrP);
|
||||
lept_free(versionStrP);
|
||||
|
||||
#ifdef USE_OPENCL
|
||||
cl_platform_id platform[4];
|
||||
cl_uint num_platforms;
|
||||
|
||||
printf(" OpenCL info:\n");
|
||||
if (clGetPlatformIDs(4, platform, &num_platforms) == CL_SUCCESS) {
|
||||
printf(" Found %u platform(s).\n", num_platforms);
|
||||
for (unsigned n = 0; n < num_platforms; n++) {
|
||||
char info[256];
|
||||
if (clGetPlatformInfo(platform[n], CL_PLATFORM_NAME, 256, info, 0) == CL_SUCCESS) {
|
||||
printf(" Platform %u name: %s.\n", n + 1, info);
|
||||
}
|
||||
if (clGetPlatformInfo(platform[n], CL_PLATFORM_VERSION, 256, info, 0) == CL_SUCCESS) {
|
||||
printf(" Version: %s.\n", info);
|
||||
}
|
||||
cl_device_id devices[2];
|
||||
cl_uint num_devices;
|
||||
if (clGetDeviceIDs(platform[n], CL_DEVICE_TYPE_ALL, 2, devices, &num_devices) == CL_SUCCESS) {
|
||||
printf(" Found %u device(s).\n", num_devices);
|
||||
for (unsigned i = 0; i < num_devices; ++i) {
|
||||
if (clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0) == CL_SUCCESS) {
|
||||
printf(" Device %u name: %s.\n", i + 1, info);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAVE_NEON) || defined(__aarch64__)
|
||||
if (tesseract::SIMDDetect::IsNEONAvailable())
|
||||
printf(" Found NEON\n");
|
||||
|
@ -27,9 +27,6 @@
|
||||
#include "edgblob.h"
|
||||
#include "linefind.h"
|
||||
#include "tabvector.h"
|
||||
#if defined(USE_OPENCL)
|
||||
# include "openclwrapper.h" // for OpenclDevice
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
@ -469,48 +466,33 @@ static void GetLineMasks(int resolution, Image src_pix, Image *pix_vline, Image
|
||||
}
|
||||
int closing_brick = max_line_width / 3;
|
||||
|
||||
// only use opencl if compiled w/ OpenCL and selected device is opencl
|
||||
#ifdef USE_OPENCL
|
||||
if (OpenclDevice::selectedDeviceIsOpenCL()) {
|
||||
// OpenCL pixGetLines Operation
|
||||
int clStatus =
|
||||
OpenclDevice::initMorphCLAllocations(pixGetWpl(src_pix), pixGetHeight(src_pix), src_pix);
|
||||
bool getpixclosed = pix_music_mask != nullptr;
|
||||
OpenclDevice::pixGetLinesCL(nullptr, src_pix, pix_vline, pix_hline, &pix_closed, getpixclosed,
|
||||
closing_brick, closing_brick, max_line_width, max_line_width,
|
||||
min_line_length, min_line_length);
|
||||
} else {
|
||||
#endif
|
||||
// Close up small holes, making it less likely that false alarms are found
|
||||
// in thickened text (as it will become more solid) and also smoothing over
|
||||
// some line breaks and nicks in the edges of the lines.
|
||||
pix_closed = pixCloseBrick(nullptr, src_pix, closing_brick, closing_brick);
|
||||
if (pixa_display != nullptr) {
|
||||
pixaAddPix(pixa_display, pix_closed, L_CLONE);
|
||||
}
|
||||
// Open up with a big box to detect solid areas, which can then be
|
||||
// subtracted. This is very generous and will leave in even quite wide
|
||||
// lines.
|
||||
Image pix_solid = pixOpenBrick(nullptr, pix_closed, max_line_width, max_line_width);
|
||||
if (pixa_display != nullptr) {
|
||||
pixaAddPix(pixa_display, pix_solid, L_CLONE);
|
||||
}
|
||||
pix_hollow = pixSubtract(nullptr, pix_closed, pix_solid);
|
||||
|
||||
pix_solid.destroy();
|
||||
|
||||
// Now open up in both directions independently to find lines of at least
|
||||
// 1 inch/kMinLineLengthFraction in length.
|
||||
if (pixa_display != nullptr) {
|
||||
pixaAddPix(pixa_display, pix_hollow, L_CLONE);
|
||||
}
|
||||
*pix_vline = pixOpenBrick(nullptr, pix_hollow, 1, min_line_length);
|
||||
*pix_hline = pixOpenBrick(nullptr, pix_hollow, min_line_length, 1);
|
||||
|
||||
pix_hollow.destroy();
|
||||
#ifdef USE_OPENCL
|
||||
// Close up small holes, making it less likely that false alarms are found
|
||||
// in thickened text (as it will become more solid) and also smoothing over
|
||||
// some line breaks and nicks in the edges of the lines.
|
||||
pix_closed = pixCloseBrick(nullptr, src_pix, closing_brick, closing_brick);
|
||||
if (pixa_display != nullptr) {
|
||||
pixaAddPix(pixa_display, pix_closed, L_CLONE);
|
||||
}
|
||||
#endif
|
||||
// Open up with a big box to detect solid areas, which can then be
|
||||
// subtracted. This is very generous and will leave in even quite wide
|
||||
// lines.
|
||||
Image pix_solid = pixOpenBrick(nullptr, pix_closed, max_line_width, max_line_width);
|
||||
if (pixa_display != nullptr) {
|
||||
pixaAddPix(pixa_display, pix_solid, L_CLONE);
|
||||
}
|
||||
pix_hollow = pixSubtract(nullptr, pix_closed, pix_solid);
|
||||
|
||||
pix_solid.destroy();
|
||||
|
||||
// Now open up in both directions independently to find lines of at least
|
||||
// 1 inch/kMinLineLengthFraction in length.
|
||||
if (pixa_display != nullptr) {
|
||||
pixaAddPix(pixa_display, pix_hollow, L_CLONE);
|
||||
}
|
||||
*pix_vline = pixOpenBrick(nullptr, pix_hollow, 1, min_line_length);
|
||||
*pix_hline = pixOpenBrick(nullptr, pix_hollow, min_line_length, 1);
|
||||
|
||||
pix_hollow.destroy();
|
||||
|
||||
// Lines are sufficiently rare, that it is worth checking for a zero image.
|
||||
bool v_empty = pix_vline->isZero();
|
||||
|
1
sw.cpp
1
sw.cpp
@ -21,7 +21,6 @@ void build(Solution &s)
|
||||
|
||||
libtesseract.Public += "include"_idir;
|
||||
libtesseract.Protected +=
|
||||
"src/opencl"_id,
|
||||
"src/ccmain"_id,
|
||||
"src/api"_id,
|
||||
"src/dict"_id,
|
||||
|
@ -12,5 +12,5 @@ URL: https://github.com/tesseract-ocr/tesseract
|
||||
Version: @VERSION@
|
||||
Requires.private: lept
|
||||
Libs: -L${libdir} -ltesseract @libarchive_LIBS@ @libcurl_LIBS@ @TENSORFLOW_LIBS@
|
||||
Libs.private: -lpthread @OPENCL_LDFLAGS@
|
||||
Libs.private: -lpthread
|
||||
Cflags: -I${includedir}
|
||||
|
Loading…
Reference in New Issue
Block a user