mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 14:41:36 +08:00
Merge branch 'master' of github.com-egorpugin:tesseract-ocr/tesseract
This commit is contained in:
commit
742a087507
@ -127,8 +127,8 @@ configure_file(${AUTOCONFIG_SRC} ${AUTOCONFIG} @ONLY)
|
||||
set(INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/include" "${CMAKE_INSTALL_PREFIX}/include/tesseract")
|
||||
|
||||
configure_file(
|
||||
${CMAKE_SOURCE_DIR}/api/version.h.in
|
||||
${CMAKE_BINARY_DIR}/api/version.h @ONLY)
|
||||
${CMAKE_SOURCE_DIR}/api/tess_version.h.in
|
||||
${CMAKE_BINARY_DIR}/api/tess_version.h @ONLY)
|
||||
configure_file(
|
||||
${CMAKE_SOURCE_DIR}/vs2010/tesseract/tesseract.rc.in
|
||||
${CMAKE_BINARY_DIR}/vs2010/tesseract/tesseract.rc @ONLY)
|
||||
@ -160,6 +160,7 @@ include_directories(${Leptonica_INCLUDE_DIRS})
|
||||
include_directories(${CMAKE_BINARY_DIR})
|
||||
|
||||
include_directories(api)
|
||||
include_directories(${CMAKE_BINARY_DIR}/api)
|
||||
include_directories(arch)
|
||||
include_directories(ccmain)
|
||||
include_directories(ccstruct)
|
||||
@ -328,6 +329,7 @@ install(FILES
|
||||
api/baseapi.h
|
||||
api/capi.h
|
||||
api/renderer.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/api/tess_version.h
|
||||
|
||||
#from arch/makefile.am
|
||||
arch/dotproductavx.h
|
||||
|
@ -5,7 +5,7 @@ AM_CPPFLAGS += -DLOCALEDIR=\"$(localedir)\"\
|
||||
-I$(top_srcdir)/textord -I$(top_srcdir)/dict \
|
||||
-I$(top_srcdir)/classify -I$(top_srcdir)/ccmain \
|
||||
-I$(top_srcdir)/wordrec -I$(top_srcdir)/cutil \
|
||||
-I$(top_srcdir)/opencl
|
||||
-I$(top_srcdir)/opencl -I$(top_builddir)/api
|
||||
|
||||
AM_CPPFLAGS += $(OPENCL_CPPFLAGS)
|
||||
|
||||
@ -13,7 +13,7 @@ if VISIBILITY
|
||||
AM_CPPFLAGS += -fvisibility=hidden -fvisibility-inlines-hidden
|
||||
endif
|
||||
|
||||
include_HEADERS = apitypes.h baseapi.h capi.h renderer.h
|
||||
include_HEADERS = apitypes.h baseapi.h capi.h renderer.h tess_version.h
|
||||
lib_LTLIBRARIES =
|
||||
|
||||
noinst_LTLIBRARIES = libtesseract_api.la
|
||||
|
@ -24,6 +24,7 @@
|
||||
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
|
||||
// complexity of includes here. Use forward declarations wherever possible
|
||||
// and hide includes of complex types in baseapi.cpp.
|
||||
#include "tess_version.h"
|
||||
#include "apitypes.h"
|
||||
#include "pageiterator.h"
|
||||
#include "platform.h"
|
||||
|
30
api/tess_version.h.in
Normal file
30
api/tess_version.h.in
Normal file
@ -0,0 +1,30 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: version.h
|
||||
// Description: Version information
|
||||
//
|
||||
// (C) Copyright 2018, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_API_VERSION_H_
|
||||
#define TESSERACT_API_VERSION_H_
|
||||
|
||||
#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
|
||||
#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
|
||||
#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
|
||||
#define TESSERACT_VERSION \
|
||||
(TESSERACT_MAJOR_VERSION << 16 | \
|
||||
TESSERACT_MINOR_VERSION << 8 | \
|
||||
TESSERACT_MICRO_VERSION)
|
||||
#define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"
|
||||
|
||||
#endif // TESSERACT_API_VERSION_H_
|
@ -1,8 +0,0 @@
|
||||
#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
|
||||
#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
|
||||
#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
|
||||
#define TESSERACT_VERSION \
|
||||
(TESSERACT_MAJOR_VERSION << 16 | \
|
||||
TESSERACT_MINOR_VERSION << 8 | \
|
||||
TESSERACT_MICRO_VERSION)
|
||||
#define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"
|
24
configure.ac
24
configure.ac
@ -65,7 +65,6 @@ AC_SUBST([GENERIC_RELEASE])
|
||||
AC_SUBST([GENERIC_VERSION])
|
||||
|
||||
AC_CONFIG_HEADERS([config_auto.h:config/config.h.in])
|
||||
AM_MAINTAINER_MODE
|
||||
|
||||
# default conditional
|
||||
AM_CONDITIONAL([T_WIN], false)
|
||||
@ -415,6 +414,17 @@ AC_SYS_LARGEFILE
|
||||
|
||||
AC_CHECK_FUNCS([getline])
|
||||
|
||||
# ----------------------------------------
|
||||
# Check for programs needed to build documentation.
|
||||
# ----------------------------------------
|
||||
|
||||
AC_CHECK_PROG([have_asciidoc], asciidoc, true, false)
|
||||
if $have_asciidoc; then
|
||||
AM_CONDITIONAL([ASCIIDOC], true)
|
||||
else
|
||||
AM_CONDITIONAL([ASCIIDOC], false)
|
||||
fi
|
||||
|
||||
# ----------------------------------------
|
||||
# Checks for typedefs, structures, and compiler characteristics.
|
||||
# ----------------------------------------
|
||||
@ -479,7 +489,7 @@ fi
|
||||
# Output files
|
||||
AC_CONFIG_FILES([Makefile tesseract.pc])
|
||||
AC_CONFIG_FILES([api/Makefile])
|
||||
AC_CONFIG_FILES([api/version.h])
|
||||
AC_CONFIG_FILES([api/tess_version.h])
|
||||
AC_CONFIG_FILES([arch/Makefile])
|
||||
AC_CONFIG_FILES([ccmain/Makefile])
|
||||
AC_CONFIG_FILES([opencl/Makefile])
|
||||
@ -514,6 +524,16 @@ echo "You can now build and install $PACKAGE_NAME by running:"
|
||||
echo ""
|
||||
echo "$ make"
|
||||
echo "$ sudo make install"
|
||||
echo ""
|
||||
|
||||
AM_COND_IF([ASCIIDOC],
|
||||
[
|
||||
echo "This will also build the documentation."
|
||||
], [
|
||||
echo "Documentation will not be built because asciidoc is missing."
|
||||
]
|
||||
)
|
||||
|
||||
# echo "$ sudo make install LANGS=\"eng ara deu\""
|
||||
# echo " Or:"
|
||||
# echo "$ sudo make install-langs"
|
||||
|
@ -1,4 +1,6 @@
|
||||
if MAINTAINER_MODE
|
||||
# doc/Makefile.am
|
||||
|
||||
if ASCIIDOC
|
||||
|
||||
asciidoc=asciidoc -d manpage
|
||||
|
||||
@ -29,4 +31,4 @@ man_MANS = \
|
||||
|
||||
MAINTAINERCLEANFILES = $(man_MANS) Doxyfile
|
||||
|
||||
endif # MAINTAINER_MODE
|
||||
endif
|
||||
|
@ -147,15 +147,12 @@ void FullyConnected::Forward(bool debug, const NetworkIO& input,
|
||||
int thread_id = 0;
|
||||
#endif
|
||||
double* temp_line = temp_lines[thread_id];
|
||||
const double* d_input = nullptr;
|
||||
const int8_t* i_input = nullptr;
|
||||
if (input.int_mode()) {
|
||||
i_input = input.i(t);
|
||||
ForwardTimeStep(input.i(t), t, temp_line);
|
||||
} else {
|
||||
input.ReadTimeStep(t, curr_input[thread_id]);
|
||||
d_input = curr_input[thread_id];
|
||||
ForwardTimeStep(curr_input[thread_id], t, temp_line);
|
||||
}
|
||||
ForwardTimeStep(d_input, i_input, t, temp_line);
|
||||
output->WriteTimeStep(t, temp_line);
|
||||
if (IsTraining() && type_ != NT_SOFTMAX) {
|
||||
acts_.CopyTimeStepFrom(t, *output, t);
|
||||
@ -188,15 +185,7 @@ void FullyConnected::SetupForward(const NetworkIO& input,
|
||||
}
|
||||
}
|
||||
|
||||
void FullyConnected::ForwardTimeStep(const double* d_input, const int8_t* i_input,
|
||||
int t, double* output_line) {
|
||||
// input is copied to source_ line-by-line for cache coherency.
|
||||
if (IsTraining() && external_source_ == nullptr && d_input != nullptr)
|
||||
source_t_.WriteStrided(t, d_input);
|
||||
if (d_input != nullptr)
|
||||
weights_.MatrixDotVector(d_input, output_line);
|
||||
else
|
||||
weights_.MatrixDotVector(i_input, output_line);
|
||||
void FullyConnected::ForwardTimeStep(int t, double* output_line) {
|
||||
if (type_ == NT_TANH) {
|
||||
FuncInplace<GFunc>(no_, output_line);
|
||||
} else if (type_ == NT_LOGISTIC) {
|
||||
@ -214,6 +203,22 @@ void FullyConnected::ForwardTimeStep(const double* d_input, const int8_t* i_inpu
|
||||
}
|
||||
}
|
||||
|
||||
void FullyConnected::ForwardTimeStep(const double* d_input,
|
||||
int t, double* output_line) {
|
||||
// input is copied to source_ line-by-line for cache coherency.
|
||||
if (IsTraining() && external_source_ == NULL)
|
||||
source_t_.WriteStrided(t, d_input);
|
||||
weights_.MatrixDotVector(d_input, output_line);
|
||||
ForwardTimeStep(t, output_line);
|
||||
}
|
||||
|
||||
void FullyConnected::ForwardTimeStep(const int8_t* i_input,
|
||||
int t, double* output_line) {
|
||||
// input is copied to source_ line-by-line for cache coherency.
|
||||
weights_.MatrixDotVector(i_input, output_line);
|
||||
ForwardTimeStep(t, output_line);
|
||||
}
|
||||
|
||||
// Runs backward propagation of errors on the deltas line.
|
||||
// See NetworkCpp for a detailed discussion of the arguments.
|
||||
bool FullyConnected::Backward(bool debug, const NetworkIO& fwd_deltas,
|
||||
|
@ -91,8 +91,9 @@ class FullyConnected : public Network {
|
||||
// Components of Forward so FullyConnected can be reused inside LSTM.
|
||||
void SetupForward(const NetworkIO& input,
|
||||
const TransposedArray* input_transpose);
|
||||
void ForwardTimeStep(const double* d_input, const int8_t* i_input, int t,
|
||||
double* output_line);
|
||||
void ForwardTimeStep(int t, double* output_line);
|
||||
void ForwardTimeStep(const double* d_input, int t, double* output_line);
|
||||
void ForwardTimeStep(const int8_t* i_input, int t, double* output_line);
|
||||
|
||||
// Runs backward propagation of errors on the deltas line.
|
||||
// See Network for a detailed discussion of the arguments.
|
||||
|
@ -396,9 +396,9 @@ void LSTM::Forward(bool debug, const NetworkIO& input,
|
||||
if (softmax_ != nullptr) {
|
||||
if (input.int_mode()) {
|
||||
int_output->WriteTimeStepPart(0, 0, ns_, curr_output);
|
||||
softmax_->ForwardTimeStep(nullptr, int_output->i(0), t, softmax_output);
|
||||
softmax_->ForwardTimeStep(int_output->i(0), t, softmax_output);
|
||||
} else {
|
||||
softmax_->ForwardTimeStep(curr_output, nullptr, t, softmax_output);
|
||||
softmax_->ForwardTimeStep(curr_output, t, softmax_output);
|
||||
}
|
||||
output->WriteTimeStep(t, softmax_output);
|
||||
if (type_ == NT_LSTM_SOFTMAX_ENCODED) {
|
||||
|
@ -31,8 +31,6 @@ namespace tesseract {
|
||||
// therefore only used during construction of the regions.
|
||||
class WorkingPartSet : public ELIST_LINK {
|
||||
public:
|
||||
WorkingPartSet() {
|
||||
}
|
||||
explicit WorkingPartSet(ColPartition* column)
|
||||
: column_(column), latest_part_(NULL), part_it_(&part_set_) {
|
||||
}
|
||||
|
@ -7,7 +7,8 @@ AM_CPPFLAGS += \
|
||||
-I$(top_srcdir)/viewer \
|
||||
-I$(top_srcdir)/textord -I$(top_srcdir)/dict \
|
||||
-I$(top_srcdir)/classify -I$(top_srcdir)/display \
|
||||
-I$(top_srcdir)/wordrec -I$(top_srcdir)/cutil
|
||||
-I$(top_srcdir)/wordrec -I$(top_srcdir)/cutil \
|
||||
-I$(top_builddir)/api
|
||||
|
||||
EXTRA_DIST = language-specific.sh tesstrain.sh tesstrain_utils.sh
|
||||
|
||||
|
@ -192,7 +192,11 @@ parse_flags() {
|
||||
|
||||
# Function initializes font config with a unique font cache dir.
|
||||
initialize_fontconfig() {
|
||||
export FONT_CONFIG_CACHE=$(mktemp -d --tmpdir font_tmp.XXXXXXXXXX)
|
||||
if [[ "$OSTYPE" == "darwin"* ]]; then
|
||||
export FONT_CONFIG_CACHE=$(mktemp -d -t font_tmp.XXXXXXXXXX)
|
||||
else
|
||||
export FONT_CONFIG_CACHE=$(mktemp -d --tmpdir font_tmp.XXXXXXXXXX)
|
||||
fi
|
||||
local sample_path=${FONT_CONFIG_CACHE}/sample_text.txt
|
||||
echo "Text" >${sample_path}
|
||||
run_command text2image --fonts_dir=${FONTS_DIR} \
|
||||
@ -265,6 +269,7 @@ phase_I_generate_image() {
|
||||
|
||||
local counter=0
|
||||
for font in "${FONTS[@]}"; do
|
||||
sleep 1
|
||||
generate_font_image "${font}" &
|
||||
let counter=counter+1
|
||||
let rem=counter%par_factor
|
||||
@ -566,4 +571,3 @@ make__traineddata() {
|
||||
tlog "Moving ${TRAINING_DIR}/${LANG_CODE}.traineddata to ${OUTPUT_DIR}"
|
||||
cp -f ${TRAINING_DIR}/${LANG_CODE}.traineddata ${destfile}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user