Merge branch 'master' of github.com-egorpugin:tesseract-ocr/tesseract

This commit is contained in:
Egor Pugin 2018-04-11 00:16:35 +03:00
commit 742a087507
13 changed files with 95 additions and 39 deletions

View File

@ -127,8 +127,8 @@ configure_file(${AUTOCONFIG_SRC} ${AUTOCONFIG} @ONLY)
set(INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/include" "${CMAKE_INSTALL_PREFIX}/include/tesseract")
configure_file(
${CMAKE_SOURCE_DIR}/api/version.h.in
${CMAKE_BINARY_DIR}/api/version.h @ONLY)
${CMAKE_SOURCE_DIR}/api/tess_version.h.in
${CMAKE_BINARY_DIR}/api/tess_version.h @ONLY)
configure_file(
${CMAKE_SOURCE_DIR}/vs2010/tesseract/tesseract.rc.in
${CMAKE_BINARY_DIR}/vs2010/tesseract/tesseract.rc @ONLY)
@ -160,6 +160,7 @@ include_directories(${Leptonica_INCLUDE_DIRS})
include_directories(${CMAKE_BINARY_DIR})
include_directories(api)
include_directories(${CMAKE_BINARY_DIR}/api)
include_directories(arch)
include_directories(ccmain)
include_directories(ccstruct)
@ -328,6 +329,7 @@ install(FILES
api/baseapi.h
api/capi.h
api/renderer.h
${CMAKE_CURRENT_BINARY_DIR}/api/tess_version.h
#from arch/makefile.am
arch/dotproductavx.h

View File

@ -5,7 +5,7 @@ AM_CPPFLAGS += -DLOCALEDIR=\"$(localedir)\"\
-I$(top_srcdir)/textord -I$(top_srcdir)/dict \
-I$(top_srcdir)/classify -I$(top_srcdir)/ccmain \
-I$(top_srcdir)/wordrec -I$(top_srcdir)/cutil \
-I$(top_srcdir)/opencl
-I$(top_srcdir)/opencl -I$(top_builddir)/api
AM_CPPFLAGS += $(OPENCL_CPPFLAGS)
@ -13,7 +13,7 @@ if VISIBILITY
AM_CPPFLAGS += -fvisibility=hidden -fvisibility-inlines-hidden
endif
include_HEADERS = apitypes.h baseapi.h capi.h renderer.h
include_HEADERS = apitypes.h baseapi.h capi.h renderer.h tess_version.h
lib_LTLIBRARIES =
noinst_LTLIBRARIES = libtesseract_api.la

View File

@ -24,6 +24,7 @@
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
// complexity of includes here. Use forward declarations wherever possible
// and hide includes of complex types in baseapi.cpp.
#include "tess_version.h"
#include "apitypes.h"
#include "pageiterator.h"
#include "platform.h"

30
api/tess_version.h.in Normal file
View File

@ -0,0 +1,30 @@
///////////////////////////////////////////////////////////////////////
// File: version.h
// Description: Version information
//
// (C) Copyright 2018, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_API_VERSION_H_
#define TESSERACT_API_VERSION_H_
#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
#define TESSERACT_VERSION \
(TESSERACT_MAJOR_VERSION << 16 | \
TESSERACT_MINOR_VERSION << 8 | \
TESSERACT_MICRO_VERSION)
#define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"
#endif // TESSERACT_API_VERSION_H_

View File

@ -1,8 +0,0 @@
#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
#define TESSERACT_VERSION \
(TESSERACT_MAJOR_VERSION << 16 | \
TESSERACT_MINOR_VERSION << 8 | \
TESSERACT_MICRO_VERSION)
#define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"

View File

@ -65,7 +65,6 @@ AC_SUBST([GENERIC_RELEASE])
AC_SUBST([GENERIC_VERSION])
AC_CONFIG_HEADERS([config_auto.h:config/config.h.in])
AM_MAINTAINER_MODE
# default conditional
AM_CONDITIONAL([T_WIN], false)
@ -415,6 +414,17 @@ AC_SYS_LARGEFILE
AC_CHECK_FUNCS([getline])
# ----------------------------------------
# Check for programs needed to build documentation.
# ----------------------------------------
AC_CHECK_PROG([have_asciidoc], asciidoc, true, false)
if $have_asciidoc; then
AM_CONDITIONAL([ASCIIDOC], true)
else
AM_CONDITIONAL([ASCIIDOC], false)
fi
# ----------------------------------------
# Checks for typedefs, structures, and compiler characteristics.
# ----------------------------------------
@ -479,7 +489,7 @@ fi
# Output files
AC_CONFIG_FILES([Makefile tesseract.pc])
AC_CONFIG_FILES([api/Makefile])
AC_CONFIG_FILES([api/version.h])
AC_CONFIG_FILES([api/tess_version.h])
AC_CONFIG_FILES([arch/Makefile])
AC_CONFIG_FILES([ccmain/Makefile])
AC_CONFIG_FILES([opencl/Makefile])
@ -514,6 +524,16 @@ echo "You can now build and install $PACKAGE_NAME by running:"
echo ""
echo "$ make"
echo "$ sudo make install"
echo ""
AM_COND_IF([ASCIIDOC],
[
echo "This will also build the documentation."
], [
echo "Documentation will not be built because asciidoc is missing."
]
)
# echo "$ sudo make install LANGS=\"eng ara deu\""
# echo " Or:"
# echo "$ sudo make install-langs"

View File

@ -1,4 +1,6 @@
if MAINTAINER_MODE
# doc/Makefile.am
if ASCIIDOC
asciidoc=asciidoc -d manpage
@ -29,4 +31,4 @@ man_MANS = \
MAINTAINERCLEANFILES = $(man_MANS) Doxyfile
endif # MAINTAINER_MODE
endif

View File

@ -147,15 +147,12 @@ void FullyConnected::Forward(bool debug, const NetworkIO& input,
int thread_id = 0;
#endif
double* temp_line = temp_lines[thread_id];
const double* d_input = nullptr;
const int8_t* i_input = nullptr;
if (input.int_mode()) {
i_input = input.i(t);
ForwardTimeStep(input.i(t), t, temp_line);
} else {
input.ReadTimeStep(t, curr_input[thread_id]);
d_input = curr_input[thread_id];
ForwardTimeStep(curr_input[thread_id], t, temp_line);
}
ForwardTimeStep(d_input, i_input, t, temp_line);
output->WriteTimeStep(t, temp_line);
if (IsTraining() && type_ != NT_SOFTMAX) {
acts_.CopyTimeStepFrom(t, *output, t);
@ -188,15 +185,7 @@ void FullyConnected::SetupForward(const NetworkIO& input,
}
}
void FullyConnected::ForwardTimeStep(const double* d_input, const int8_t* i_input,
int t, double* output_line) {
// input is copied to source_ line-by-line for cache coherency.
if (IsTraining() && external_source_ == nullptr && d_input != nullptr)
source_t_.WriteStrided(t, d_input);
if (d_input != nullptr)
weights_.MatrixDotVector(d_input, output_line);
else
weights_.MatrixDotVector(i_input, output_line);
void FullyConnected::ForwardTimeStep(int t, double* output_line) {
if (type_ == NT_TANH) {
FuncInplace<GFunc>(no_, output_line);
} else if (type_ == NT_LOGISTIC) {
@ -214,6 +203,22 @@ void FullyConnected::ForwardTimeStep(const double* d_input, const int8_t* i_inpu
}
}
void FullyConnected::ForwardTimeStep(const double* d_input,
int t, double* output_line) {
// input is copied to source_ line-by-line for cache coherency.
if (IsTraining() && external_source_ == NULL)
source_t_.WriteStrided(t, d_input);
weights_.MatrixDotVector(d_input, output_line);
ForwardTimeStep(t, output_line);
}
void FullyConnected::ForwardTimeStep(const int8_t* i_input,
int t, double* output_line) {
// input is copied to source_ line-by-line for cache coherency.
weights_.MatrixDotVector(i_input, output_line);
ForwardTimeStep(t, output_line);
}
// Runs backward propagation of errors on the deltas line.
// See NetworkCpp for a detailed discussion of the arguments.
bool FullyConnected::Backward(bool debug, const NetworkIO& fwd_deltas,

View File

@ -91,8 +91,9 @@ class FullyConnected : public Network {
// Components of Forward so FullyConnected can be reused inside LSTM.
void SetupForward(const NetworkIO& input,
const TransposedArray* input_transpose);
void ForwardTimeStep(const double* d_input, const int8_t* i_input, int t,
double* output_line);
void ForwardTimeStep(int t, double* output_line);
void ForwardTimeStep(const double* d_input, int t, double* output_line);
void ForwardTimeStep(const int8_t* i_input, int t, double* output_line);
// Runs backward propagation of errors on the deltas line.
// See Network for a detailed discussion of the arguments.

View File

@ -396,9 +396,9 @@ void LSTM::Forward(bool debug, const NetworkIO& input,
if (softmax_ != nullptr) {
if (input.int_mode()) {
int_output->WriteTimeStepPart(0, 0, ns_, curr_output);
softmax_->ForwardTimeStep(nullptr, int_output->i(0), t, softmax_output);
softmax_->ForwardTimeStep(int_output->i(0), t, softmax_output);
} else {
softmax_->ForwardTimeStep(curr_output, nullptr, t, softmax_output);
softmax_->ForwardTimeStep(curr_output, t, softmax_output);
}
output->WriteTimeStep(t, softmax_output);
if (type_ == NT_LSTM_SOFTMAX_ENCODED) {

View File

@ -31,8 +31,6 @@ namespace tesseract {
// therefore only used during construction of the regions.
class WorkingPartSet : public ELIST_LINK {
public:
WorkingPartSet() {
}
explicit WorkingPartSet(ColPartition* column)
: column_(column), latest_part_(NULL), part_it_(&part_set_) {
}

View File

@ -7,7 +7,8 @@ AM_CPPFLAGS += \
-I$(top_srcdir)/viewer \
-I$(top_srcdir)/textord -I$(top_srcdir)/dict \
-I$(top_srcdir)/classify -I$(top_srcdir)/display \
-I$(top_srcdir)/wordrec -I$(top_srcdir)/cutil
-I$(top_srcdir)/wordrec -I$(top_srcdir)/cutil \
-I$(top_builddir)/api
EXTRA_DIST = language-specific.sh tesstrain.sh tesstrain_utils.sh

View File

@ -192,7 +192,11 @@ parse_flags() {
# Function initializes font config with a unique font cache dir.
initialize_fontconfig() {
if [[ "$OSTYPE" == "darwin"* ]]; then
export FONT_CONFIG_CACHE=$(mktemp -d -t font_tmp.XXXXXXXXXX)
else
export FONT_CONFIG_CACHE=$(mktemp -d --tmpdir font_tmp.XXXXXXXXXX)
fi
local sample_path=${FONT_CONFIG_CACHE}/sample_text.txt
echo "Text" >${sample_path}
run_command text2image --fonts_dir=${FONTS_DIR} \
@ -265,6 +269,7 @@ phase_I_generate_image() {
local counter=0
for font in "${FONTS[@]}"; do
sleep 1
generate_font_image "${font}" &
let counter=counter+1
let rem=counter%par_factor
@ -566,4 +571,3 @@ make__traineddata() {
tlog "Moving ${TRAINING_DIR}/${LANG_CODE}.traineddata to ${OUTPUT_DIR}"
cp -f ${TRAINING_DIR}/${LANG_CODE}.traineddata ${destfile}
}