mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-06-07 18:02:40 +08:00
Added intsimdmatrix as a generic integer matrixdotvector function with AVX2 and SSE specializations
This commit is contained in:
parent
ad74e8a69c
commit
fc6a390c6c
@ -30,6 +30,7 @@ libtesseract_api_la_LIBADD = \
|
|||||||
../dict/libtesseract_dict.la \
|
../dict/libtesseract_dict.la \
|
||||||
../arch/libtesseract_arch.la \
|
../arch/libtesseract_arch.la \
|
||||||
../arch/libtesseract_avx.la \
|
../arch/libtesseract_avx.la \
|
||||||
|
../arch/libtesseract_avx2.la \
|
||||||
../arch/libtesseract_sse.la \
|
../arch/libtesseract_sse.la \
|
||||||
../lstm/libtesseract_lstm.la \
|
../lstm/libtesseract_lstm.la \
|
||||||
../ccstruct/libtesseract_ccstruct.la \
|
../ccstruct/libtesseract_ccstruct.la \
|
||||||
@ -60,6 +61,7 @@ libtesseract_la_LIBADD = \
|
|||||||
../dict/libtesseract_dict.la \
|
../dict/libtesseract_dict.la \
|
||||||
../arch/libtesseract_arch.la \
|
../arch/libtesseract_arch.la \
|
||||||
../arch/libtesseract_avx.la \
|
../arch/libtesseract_avx.la \
|
||||||
|
../arch/libtesseract_avx2.la \
|
||||||
../arch/libtesseract_sse.la \
|
../arch/libtesseract_sse.la \
|
||||||
../lstm/libtesseract_lstm.la \
|
../lstm/libtesseract_lstm.la \
|
||||||
../ccstruct/libtesseract_ccstruct.la \
|
../ccstruct/libtesseract_ccstruct.la \
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
AM_CPPFLAGS += -I$(top_srcdir)/ccutil -I$(top_srcdir)/viewer -DUSE_STD_NAMESPACE
|
AM_CPPFLAGS += -I$(top_srcdir)/ccstruct -I$(top_srcdir)/ccutil -I$(top_srcdir)/viewer -DUSE_STD_NAMESPACE
|
||||||
AUTOMAKE_OPTIONS = subdir-objects
|
AUTOMAKE_OPTIONS = subdir-objects
|
||||||
SUBDIRS =
|
SUBDIRS =
|
||||||
AM_CXXFLAGS =
|
AM_CXXFLAGS =
|
||||||
@ -8,31 +8,37 @@ AM_CXXFLAGS += -fvisibility=hidden -fvisibility-inlines-hidden
|
|||||||
AM_CPPFLAGS += -DTESS_EXPORTS
|
AM_CPPFLAGS += -DTESS_EXPORTS
|
||||||
endif
|
endif
|
||||||
|
|
||||||
include_HEADERS = dotproductavx.h dotproductsse.h simddetect.h
|
include_HEADERS = dotproductavx.h dotproductsse.h intsimdmatrix.h intsimdmatrixavx2.h intsimdmatrixsse.h simddetect.h
|
||||||
|
|
||||||
noinst_HEADERS =
|
noinst_HEADERS =
|
||||||
|
|
||||||
if !USING_MULTIPLELIBS
|
if !USING_MULTIPLELIBS
|
||||||
noinst_LTLIBRARIES = libtesseract_avx.la libtesseract_sse.la
|
noinst_LTLIBRARIES = libtesseract_avx.la libtesseract_avx2.la libtesseract_sse.la
|
||||||
noinst_LTLIBRARIES += libtesseract_arch.la
|
noinst_LTLIBRARIES += libtesseract_arch.la
|
||||||
else
|
else
|
||||||
lib_LTLIBRARIES = libtesseract_avx.la libtesseract_sse.la
|
lib_LTLIBRARIES = libtesseract_avx.la libtesseract_avx2.la libtesseract_sse.la
|
||||||
lib_LTLIBRARIES += libtesseract_arch.la
|
lib_LTLIBRARIES += libtesseract_arch.la
|
||||||
libtesseract_arch_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION)
|
libtesseract_arch_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION)
|
||||||
libtesseract_avx_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION)
|
libtesseract_avx_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION)
|
||||||
|
libtesseract_avx2_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION)
|
||||||
libtesseract_sse_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION)
|
libtesseract_sse_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
if AVX_OPT
|
if AVX_OPT
|
||||||
libtesseract_avx_la_CXXFLAGS = -mavx
|
libtesseract_avx_la_CXXFLAGS = -mavx
|
||||||
endif
|
endif
|
||||||
|
if AVX2_OPT
|
||||||
|
libtesseract_avx2_la_CXXFLAGS = -mavx2
|
||||||
|
endif
|
||||||
if SSE41_OPT
|
if SSE41_OPT
|
||||||
libtesseract_sse_la_CXXFLAGS = -msse4.1
|
libtesseract_sse_la_CXXFLAGS = -msse4.1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
libtesseract_arch_la_SOURCES = simddetect.cpp
|
libtesseract_arch_la_SOURCES = intsimdmatrix.cpp simddetect.cpp
|
||||||
|
|
||||||
libtesseract_avx_la_SOURCES = dotproductavx.cpp
|
libtesseract_avx_la_SOURCES = dotproductavx.cpp
|
||||||
|
|
||||||
libtesseract_sse_la_SOURCES = dotproductsse.cpp
|
libtesseract_avx2_la_SOURCES = intsimdmatrixavx2.cpp
|
||||||
|
|
||||||
|
libtesseract_sse_la_SOURCES = dotproductsse.cpp intsimdmatrixsse.cpp
|
||||||
|
|
||||||
|
133
arch/intsimdmatrix.cpp
Normal file
133
arch/intsimdmatrix.cpp
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
// File: intsimdmatrix.cpp
|
||||||
|
// Description: Base class for 8-bit int SIMD matrix multipliers.
|
||||||
|
// Author: Ray Smith
|
||||||
|
// Created: Tue Aug 15 08:01:32 PST 2017
|
||||||
|
//
|
||||||
|
// (C) Copyright 2017, Google Inc.
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#include "intsimdmatrix.h"
|
||||||
|
#include "intsimdmatrixavx2.h"
|
||||||
|
#include "intsimdmatrixsse.h"
|
||||||
|
#include "simddetect.h"
|
||||||
|
|
||||||
|
namespace tesseract {
|
||||||
|
|
||||||
|
// Factory makes and returns an IntSimdMatrix (sub)class of the best
|
||||||
|
// available type for the current architecture.
|
||||||
|
/* static */
|
||||||
|
IntSimdMatrix* IntSimdMatrix::GetFastestMultiplier() {
|
||||||
|
IntSimdMatrix* multiplier = nullptr;
|
||||||
|
if (SIMDDetect::IsAVX2Available()) {
|
||||||
|
multiplier = new IntSimdMatrixAVX2();
|
||||||
|
} else if (SIMDDetect::IsSSEAvailable()) {
|
||||||
|
multiplier = new IntSimdMatrixSSE();
|
||||||
|
} else {
|
||||||
|
// Default c++ implementation.
|
||||||
|
multiplier = new IntSimdMatrix();
|
||||||
|
}
|
||||||
|
return multiplier;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Computes a reshaped copy of the weight matrix w. If there are no
|
||||||
|
// partial_funcs_, it does nothing.
|
||||||
|
void IntSimdMatrix::Init(const GENERIC_2D_ARRAY<int8_t>& w) {
|
||||||
|
if (partial_funcs_.empty()) return;
|
||||||
|
int num_out = w.dim1();
|
||||||
|
int num_in = w.dim2() - 1;
|
||||||
|
// The rounded-up sizes of the reshaped weight matrix, excluding biases.
|
||||||
|
int rounded_num_in = Roundup(num_in, num_inputs_per_group_);
|
||||||
|
int rounded_num_out = RoundOutputs(num_out);
|
||||||
|
// Add the bias and compute the required size.
|
||||||
|
shaped_w_.resize((rounded_num_in + 1) * rounded_num_out, 0);
|
||||||
|
int shaped_index = 0;
|
||||||
|
int output = 0;
|
||||||
|
// Each number of registers needs a different format! Iterates over the
|
||||||
|
// different numbers of registers (each a power of 2).
|
||||||
|
for (int num_registers = max_output_registers_; num_registers >= 1;
|
||||||
|
num_registers /= 2) {
|
||||||
|
// The number of outputs that we will generate with this many registers.
|
||||||
|
int num_outputs_per_register_set =
|
||||||
|
num_registers * num_outputs_per_register_;
|
||||||
|
// Use the max number of registers until we have to go fewer.
|
||||||
|
while (output + num_outputs_per_register_set <= rounded_num_out) {
|
||||||
|
// Accumulating outputs in registers saves iterating over the inputs, so
|
||||||
|
// we only have to do it once per output register set.
|
||||||
|
for (int input = 0; input < num_in; input += num_inputs_per_group_) {
|
||||||
|
// Iterate over the number of outputs in a register set.
|
||||||
|
for (int j = 0; j < num_outputs_per_register_set; ++j) {
|
||||||
|
// Inner-most loop corresponds to the number of inputs in an input
|
||||||
|
// group.
|
||||||
|
for (int i = 0; i < num_inputs_per_group_; ++i) {
|
||||||
|
int8_t weight = 0;
|
||||||
|
if (output + j < num_out && input + i < num_in)
|
||||||
|
weight = w(output + j, input + i);
|
||||||
|
shaped_w_[shaped_index++] = weight;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Append the bias weights for the register set.
|
||||||
|
for (int j = 0; j < num_outputs_per_register_set; ++j) {
|
||||||
|
int8_t weight = 0;
|
||||||
|
if (output + j < num_out) weight = w(output + j, num_in);
|
||||||
|
shaped_w_[shaped_index++] = weight;
|
||||||
|
}
|
||||||
|
output += num_outputs_per_register_set;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Computes matrix.vector v = Wu.
|
||||||
|
// u is of size W.dim2() - 1 and the output v is of size W.dim1().
|
||||||
|
// u is imagined to have an extra element at the end with value 1, to
|
||||||
|
// implement the bias, but it doesn't actually have it.
|
||||||
|
void IntSimdMatrix::MatrixDotVector(const GENERIC_2D_ARRAY<int8_t>& w,
|
||||||
|
const GenericVector<double>& scales,
|
||||||
|
const int8_t* u, double* v) const {
|
||||||
|
int num_out = w.dim1();
|
||||||
|
int num_in = w.dim2() - 1;
|
||||||
|
if (partial_funcs_.empty()) {
|
||||||
|
// Base implementation.
|
||||||
|
for (int i = 0; i < num_out; ++i) {
|
||||||
|
const int8_t* wi = w[i];
|
||||||
|
int total = 0;
|
||||||
|
for (int j = 0; j < num_in; ++j) total += wi[j] * u[j];
|
||||||
|
// Add in the bias and correct for integer values.
|
||||||
|
v[i] = (static_cast<double>(total) / MAX_INT8 + wi[num_in]) * scales[i];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
const int8_t* w_data = shaped_w_.data();
|
||||||
|
const double* scales_data = &scales[0];
|
||||||
|
// Each call to a partial_func_ produces group_size outputs, except the
|
||||||
|
// last one, which can produce less.
|
||||||
|
int group_size = num_outputs_per_register_ * max_output_registers_;
|
||||||
|
int rounded_num_in = Roundup(num_in, num_inputs_per_group_);
|
||||||
|
int rounded_num_out = RoundOutputs(num_out);
|
||||||
|
int output = 0;
|
||||||
|
for (auto fn : partial_funcs_) {
|
||||||
|
// The amount of w_data consumed by each call to fn.
|
||||||
|
int w_step = (rounded_num_in + 1) * group_size;
|
||||||
|
// Run with this group size, until it would produce too much output, then
|
||||||
|
// switch to a smaller size.
|
||||||
|
for (; output + group_size <= rounded_num_out; output += group_size) {
|
||||||
|
(*fn)(w_data, scales_data, u, rounded_num_in, num_out - output, v);
|
||||||
|
w_data += w_step;
|
||||||
|
scales_data += group_size;
|
||||||
|
v += group_size;
|
||||||
|
}
|
||||||
|
group_size /= 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace tesseract
|
135
arch/intsimdmatrix.h
Normal file
135
arch/intsimdmatrix.h
Normal file
@ -0,0 +1,135 @@
|
|||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
// File: intsimdmatrix.h
|
||||||
|
// Description: Base class for 8-bit int SIMD matrix multipliers.
|
||||||
|
// Author: Ray Smith
|
||||||
|
// Created: Tue Aug 15 07:37:20 PST 2017
|
||||||
|
//
|
||||||
|
// (C) Copyright 2017, Google Inc.
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#ifndef TESSERACT_ARCH_INTSIMDMATRIX_H_
|
||||||
|
#define TESSERACT_ARCH_INTSIMDMATRIX_H_
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <vector>
|
||||||
|
#include "genericvector.h"
|
||||||
|
#include "matrix.h"
|
||||||
|
|
||||||
|
namespace tesseract {
|
||||||
|
|
||||||
|
// Base class for a SIMD function to multiply a matrix by a vector, with sources
|
||||||
|
// of 8-bit signed integer, and result in a double, after appropriate scaling.
|
||||||
|
// Assumes a specific method of multiplication that can be applied to any size
|
||||||
|
// and number of SIMD registers as follows:
|
||||||
|
// int32_t results are computed with num_outputs_per_register_ in each of
|
||||||
|
// max_output_registers_ result registers, repeatedly until it would make too
|
||||||
|
// many results, then the number of registers is halved, and so-on down to a
|
||||||
|
// single result register. The last calculation only outputs the required number
|
||||||
|
// of results instead of writing beyond the bounds. Eg: matrix has 75 outputs,
|
||||||
|
// num_outputs_per_register_ = 4, and max_output_registers_ = 8,
|
||||||
|
// Step 1: 8x4=32 results are computed,
|
||||||
|
// Step 2: 8x4=32 again, total 64,
|
||||||
|
// Step 3: 2x4=8 (since 8x4 is too many, so is 4x4), total 72,
|
||||||
|
// Step 4: 1x3, total 75.
|
||||||
|
// Each step above is computed using a PartialFunc, which runs over the input
|
||||||
|
// vector once. The input is read one registerful of num_inputs_per_register_
|
||||||
|
// at a time (presumably 4x num_outputs_per_register_ since they are int8_t)
|
||||||
|
// so the inputs MUST BE PADDED to a multiple of num_inputs_per_register_.
|
||||||
|
// Since it is slow (on Intel at least) to horizontally add in a register,
|
||||||
|
// provision is made to process num_inputs_per_group_ inputs at a time, with
|
||||||
|
// the group being replicated num_input_groups_ times and multiplied by a
|
||||||
|
// num_inputs_per_group_ by num_input_groups_ rectangle of the weights matrix.
|
||||||
|
// This is most convenient if num_inputs_per_group_ is 4, and the product
|
||||||
|
// sign-extends and sums 8x8=16 bit results to 32 bits, adding 4 adjacent
|
||||||
|
// results in the process, but it doesn't have to be implemented that way.
|
||||||
|
// The weights are re-ordered by Init() to be used sequentially by the above
|
||||||
|
// algorithm, followed by the biases, so they can be added at the end.
|
||||||
|
// The base class computes the base C++ implementation.
|
||||||
|
// NOTE that, although the subclasses execute on different SIMD hardware, no
|
||||||
|
// virtual methods are needed, as the constructor sets up everything that
|
||||||
|
// is required to allow the base class implementation to do all the work.
|
||||||
|
class IntSimdMatrix {
|
||||||
|
public:
|
||||||
|
// Constructor should set the data members to indicate the sizes.
|
||||||
|
// NOTE: Base constructor public only for test purposes.
|
||||||
|
IntSimdMatrix()
|
||||||
|
: num_outputs_per_register_(1),
|
||||||
|
max_output_registers_(1),
|
||||||
|
num_inputs_per_register_(1),
|
||||||
|
num_inputs_per_group_(1),
|
||||||
|
num_input_groups_(1) {}
|
||||||
|
|
||||||
|
// Factory makes and returns an IntSimdMatrix (sub)class of the best
|
||||||
|
// available type for the current architecture.
|
||||||
|
static IntSimdMatrix* GetFastestMultiplier();
|
||||||
|
|
||||||
|
// Computes a reshaped copy of the weight matrix w. If there are no
|
||||||
|
// partial_funcs_, it does nothing.
|
||||||
|
void Init(const GENERIC_2D_ARRAY<int8_t>& w);
|
||||||
|
|
||||||
|
// Rounds the size up to a multiple of the input register size (in int8_t).
|
||||||
|
int RoundInputs(int size) const {
|
||||||
|
return Roundup(size, num_inputs_per_register_);
|
||||||
|
}
|
||||||
|
// Rounds the size up to a multiple of the output register size (in int32_t).
|
||||||
|
int RoundOutputs(int size) const {
|
||||||
|
return Roundup(size, num_outputs_per_register_);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Computes matrix.vector v = Wu.
|
||||||
|
// u is of size W.dim2() - 1 and the output v is of size W.dim1().
|
||||||
|
// u is imagined to have an extra element at the end with value 1, to
|
||||||
|
// implement the bias, but it doesn't actually have it.
|
||||||
|
// Computes the base C++ implementation, if there are no partial_funcs_.
|
||||||
|
// NOTE: The size of the input vector (u) must be padded using
|
||||||
|
// RoundInputs above.
|
||||||
|
// The input will be over-read to the extent of the padding. There are no
|
||||||
|
// alignment requirements.
|
||||||
|
void MatrixDotVector(const GENERIC_2D_ARRAY<int8_t>& w,
|
||||||
|
const GenericVector<double>& scales, const int8_t* u,
|
||||||
|
double* v) const;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
// Function to compute part of a matrix.vector multiplication. The weights
|
||||||
|
// are in a very specific order (see above) in w, which is multiplied by
|
||||||
|
// u of length num_in, to produce output v after scaling the integer results
|
||||||
|
// by the corresponding member of scales.
|
||||||
|
// The amount of w and scales consumed is fixed and not available to the
|
||||||
|
// caller. The number of outputs written to v will be at most num_out.
|
||||||
|
typedef void (*PartialFunc)(const int8_t* w, const double* scales,
|
||||||
|
const int8_t* u, int num_in, int num_out,
|
||||||
|
double* v);
|
||||||
|
|
||||||
|
// Rounds the input up to a multiple of the given factor.
|
||||||
|
static int Roundup(int input, int factor) {
|
||||||
|
return (input + factor - 1) / factor * factor;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Number of 32 bit outputs held in each register.
|
||||||
|
int num_outputs_per_register_;
|
||||||
|
// Maximum number of registers that we will use to hold outputs.
|
||||||
|
int max_output_registers_;
|
||||||
|
// Number of 8 bit inputs in the inputs register.
|
||||||
|
int num_inputs_per_register_;
|
||||||
|
// Number of inputs in each weight group.
|
||||||
|
int num_inputs_per_group_;
|
||||||
|
// Number of groups of inputs to be broadcast.
|
||||||
|
int num_input_groups_;
|
||||||
|
// The weights matrix reorganized in whatever way suits this instance.
|
||||||
|
std::vector<int8_t> shaped_w_;
|
||||||
|
// A series of functions to compute a partial result.
|
||||||
|
std::vector<PartialFunc> partial_funcs_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace tesseract
|
||||||
|
|
||||||
|
#endif // TESSERACT_ARCH_INTSIMDMATRIX_H_
|
275
arch/intsimdmatrixavx2.cpp
Normal file
275
arch/intsimdmatrixavx2.cpp
Normal file
@ -0,0 +1,275 @@
|
|||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
// File: intsimdmatrixavx2.cpp
|
||||||
|
// Description: matrix-vector product for 8-bit data on avx2.
|
||||||
|
// Author: Ray Smith
|
||||||
|
// Created: Fri Aug 04 13:26:20 PST 2017
|
||||||
|
//
|
||||||
|
// (C) Copyright 2017, Google Inc.
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#include "intsimdmatrixavx2.h"
|
||||||
|
|
||||||
|
#ifdef __AVX2__
|
||||||
|
#include <immintrin.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace tesseract {
|
||||||
|
|
||||||
|
// Number of outputs held in each register. 8 x 32 bit ints.
|
||||||
|
constexpr int kNumOutputsPerRegister = 8;
|
||||||
|
// Maximum number of registers that we will use.
|
||||||
|
constexpr int kMaxOutputRegisters = 8;
|
||||||
|
// Number of inputs in the inputs register.
|
||||||
|
constexpr int kNumInputsPerRegister = 32;
|
||||||
|
// Number of inputs in each weight group.
|
||||||
|
constexpr int kNumInputsPerGroup = 4;
|
||||||
|
// Number of groups of inputs to be broadcast.
|
||||||
|
constexpr int kNumInputGroups = kNumInputsPerRegister / kNumInputsPerGroup;
|
||||||
|
|
||||||
|
// Computes one set of 4x8 products of inputs and weights, adding to result.
|
||||||
|
// Horizontally adds 4 adjacent results, making 8x32-bit results.
|
||||||
|
// rep_input is assumed to be an 8x replicated set of 4x8-bit signed integers.
|
||||||
|
// Note that wi must previously have been re-organized with blocks of 4x8
|
||||||
|
// weights in contiguous memory.
|
||||||
|
// ones is a register of 16x16-bit values all equal to 1.
|
||||||
|
// Note: wi is incremented by the amount of data read.
|
||||||
|
// weights and reps are scratch registers.
|
||||||
|
// This function must be inlined with references in order for the compiler to
|
||||||
|
// correctly use the registers declared in the caller.
|
||||||
|
inline void MultiplyGroup(const __m256i& rep_input, const __m256i& ones,
|
||||||
|
const int8_t*& wi, __m256i& weights, __m256i& reps,
|
||||||
|
__m256i& result) {
|
||||||
|
// Load a 4x8 block of weights.
|
||||||
|
weights = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(wi));
|
||||||
|
wi += kNumInputsPerRegister;
|
||||||
|
// Normalize the signs on rep_input, weights, so weights is always +ve.
|
||||||
|
reps = _mm256_sign_epi8(rep_input, weights);
|
||||||
|
weights = _mm256_sign_epi8(weights, weights);
|
||||||
|
// Multiply 32x8-bit reps by 32x8-bit weights to make 16x16-bit results,
|
||||||
|
// with adjacent pairs added.
|
||||||
|
weights = _mm256_maddubs_epi16(weights, reps);
|
||||||
|
// Multiply 16x16-bit result by 16x16-bit ones to make 8x32-bit results,
|
||||||
|
// with adjacent pairs added. What we really want is a horizontal add of
|
||||||
|
// 16+16=32 bit result, but there is no such instruction, so multiply by
|
||||||
|
// 16-bit ones instead. It is probably faster than all the sign-extending,
|
||||||
|
// permuting and adding that would otherwise be required.
|
||||||
|
weights = _mm256_madd_epi16(weights, ones);
|
||||||
|
result = _mm256_add_epi32(result, weights);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extracts and converts 8x32-bit results from result, adding the bias from wi
|
||||||
|
// and scaling by scales, before storing in *v. Note that wi, scales and v are
|
||||||
|
// expected to contain 8 consecutive elements or num_out if less.
|
||||||
|
inline void ExtractResults(__m256i& result, __m256i& shift_id,
|
||||||
|
const int8_t*& wi, const double*& scales,
|
||||||
|
int num_out, double*& v) {
|
||||||
|
for (int out = 0; out < num_out; ++out) {
|
||||||
|
int32_t res = _mm256_extract_epi32(result, 0);
|
||||||
|
*v++ = (static_cast<double>(res) / MAX_INT8 + *wi++) * *scales++;
|
||||||
|
// Rotate the results in int32_t units, so the next result is ready.
|
||||||
|
result = _mm256_permutevar8x32_epi32(result, shift_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Computes part of matrix.vector v = Wu. Computes N=64 results.
|
||||||
|
// The weights *must* be arranged so that consecutive reads from wi
|
||||||
|
// provides (num_in/kNumInputsPerGroup groups of (N output dim groups of
|
||||||
|
// (kNumInputsPerGroup inputs))). After that there must be N consecutive
|
||||||
|
// bias weights, before continuing with any more weights.
|
||||||
|
// u must be padded out with zeros to
|
||||||
|
// kNumInputsPerGroup*ceil(num_in/kNumInputsPerGroup) elements.
|
||||||
|
static void PartialMatrixDotVector64(const int8_t* wi, const double* scales,
|
||||||
|
const int8_t* u, int num_in, int num_out,
|
||||||
|
double* v) {
|
||||||
|
// Register containing 16-bit ones for horizontal add with 16->32 bit
|
||||||
|
// conversion.
|
||||||
|
__m256i ones =
|
||||||
|
_mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
|
||||||
|
__m256i shift_id = _mm256_set_epi32(0, 7, 6, 5, 4, 3, 2, 1);
|
||||||
|
// Initialize all the results to 0.
|
||||||
|
__m256i result0 = _mm256_setzero_si256();
|
||||||
|
__m256i result1 = _mm256_setzero_si256();
|
||||||
|
__m256i result2 = _mm256_setzero_si256();
|
||||||
|
__m256i result3 = _mm256_setzero_si256();
|
||||||
|
__m256i result4 = _mm256_setzero_si256();
|
||||||
|
__m256i result5 = _mm256_setzero_si256();
|
||||||
|
__m256i result6 = _mm256_setzero_si256();
|
||||||
|
__m256i result7 = _mm256_setzero_si256();
|
||||||
|
// Iterate over the input (u), one registerful at a time.
|
||||||
|
for (int j = 0; j < num_in;) {
|
||||||
|
__m256i inputs =
|
||||||
|
_mm256_loadu_si256(reinterpret_cast<const __m256i*>(u + j));
|
||||||
|
// Inputs are processed in groups of kNumInputsPerGroup, replicated
|
||||||
|
// kNumInputGroups times.
|
||||||
|
for (int ig = 0; ig < kNumInputGroups && j < num_in;
|
||||||
|
++ig, j += kNumInputsPerGroup) {
|
||||||
|
// Replicate the low 32 bits (4 inputs) 8 times.
|
||||||
|
__m256i rep_input =
|
||||||
|
_mm256_broadcastd_epi32(_mm256_castsi256_si128(inputs));
|
||||||
|
// Rotate the inputs in groups of 4, so the next 4 inputs are ready.
|
||||||
|
inputs = _mm256_permutevar8x32_epi32(inputs, shift_id);
|
||||||
|
__m256i weights, reps;
|
||||||
|
// Mul-add, with horizontal add of the 4 inputs to each of the results.
|
||||||
|
MultiplyGroup(rep_input, ones, wi, weights, reps, result0);
|
||||||
|
MultiplyGroup(rep_input, ones, wi, weights, reps, result1);
|
||||||
|
MultiplyGroup(rep_input, ones, wi, weights, reps, result2);
|
||||||
|
MultiplyGroup(rep_input, ones, wi, weights, reps, result3);
|
||||||
|
MultiplyGroup(rep_input, ones, wi, weights, reps, result4);
|
||||||
|
MultiplyGroup(rep_input, ones, wi, weights, reps, result5);
|
||||||
|
MultiplyGroup(rep_input, ones, wi, weights, reps, result6);
|
||||||
|
MultiplyGroup(rep_input, ones, wi, weights, reps, result7);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ExtractResults(result0, shift_id, wi, scales, kNumOutputsPerRegister, v);
|
||||||
|
ExtractResults(result1, shift_id, wi, scales, kNumOutputsPerRegister, v);
|
||||||
|
ExtractResults(result2, shift_id, wi, scales, kNumOutputsPerRegister, v);
|
||||||
|
ExtractResults(result3, shift_id, wi, scales, kNumOutputsPerRegister, v);
|
||||||
|
ExtractResults(result4, shift_id, wi, scales, kNumOutputsPerRegister, v);
|
||||||
|
ExtractResults(result5, shift_id, wi, scales, kNumOutputsPerRegister, v);
|
||||||
|
ExtractResults(result6, shift_id, wi, scales, kNumOutputsPerRegister, v);
|
||||||
|
num_out -= kNumOutputsPerRegister * 7;
|
||||||
|
ExtractResults(result7, shift_id, wi, scales,
|
||||||
|
std::min(kNumOutputsPerRegister, num_out), v);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Computes part of matrix.vector v = Wu. Computes N=32 results.
|
||||||
|
// For details see PartialMatrixDotVector64 with N=32.
|
||||||
|
static void PartialMatrixDotVector32(const int8_t* wi, const double* scales,
|
||||||
|
const int8_t* u, int num_in, int num_out,
|
||||||
|
double* v) {
|
||||||
|
// Register containing 16-bit ones for horizontal add with 16->32 bit
|
||||||
|
// conversion.
|
||||||
|
__m256i ones =
|
||||||
|
_mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
|
||||||
|
__m256i shift_id = _mm256_set_epi32(0, 7, 6, 5, 4, 3, 2, 1);
|
||||||
|
// Initialize all the results to 0.
|
||||||
|
__m256i result0 = _mm256_setzero_si256();
|
||||||
|
__m256i result1 = _mm256_setzero_si256();
|
||||||
|
__m256i result2 = _mm256_setzero_si256();
|
||||||
|
__m256i result3 = _mm256_setzero_si256();
|
||||||
|
// Iterate over the input (u), one registerful at a time.
|
||||||
|
for (int j = 0; j < num_in;) {
|
||||||
|
__m256i inputs =
|
||||||
|
_mm256_loadu_si256(reinterpret_cast<const __m256i*>(u + j));
|
||||||
|
// Inputs are processed in groups of kNumInputsPerGroup, replicated
|
||||||
|
// kNumInputGroups times.
|
||||||
|
for (int ig = 0; ig < kNumInputGroups && j < num_in;
|
||||||
|
++ig, j += kNumInputsPerGroup) {
|
||||||
|
// Replicate the low 32 bits (4 inputs) 8 times.
|
||||||
|
__m256i rep_input =
|
||||||
|
_mm256_broadcastd_epi32(_mm256_castsi256_si128(inputs));
|
||||||
|
// Rotate the inputs in groups of 4, so the next 4 inputs are ready.
|
||||||
|
inputs = _mm256_permutevar8x32_epi32(inputs, shift_id);
|
||||||
|
__m256i weights, reps;
|
||||||
|
// Mul-add, with horizontal add of the 4 inputs to each of the results.
|
||||||
|
MultiplyGroup(rep_input, ones, wi, weights, reps, result0);
|
||||||
|
MultiplyGroup(rep_input, ones, wi, weights, reps, result1);
|
||||||
|
MultiplyGroup(rep_input, ones, wi, weights, reps, result2);
|
||||||
|
MultiplyGroup(rep_input, ones, wi, weights, reps, result3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ExtractResults(result0, shift_id, wi, scales, kNumOutputsPerRegister, v);
|
||||||
|
ExtractResults(result1, shift_id, wi, scales, kNumOutputsPerRegister, v);
|
||||||
|
ExtractResults(result2, shift_id, wi, scales, kNumOutputsPerRegister, v);
|
||||||
|
num_out -= kNumOutputsPerRegister * 3;
|
||||||
|
ExtractResults(result3, shift_id, wi, scales,
|
||||||
|
std::min(kNumOutputsPerRegister, num_out), v);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Computes part of matrix.vector v = Wu. Computes N=16 results.
|
||||||
|
// For details see PartialMatrixDotVector64 with N=16.
|
||||||
|
static void PartialMatrixDotVector16(const int8_t* wi, const double* scales,
|
||||||
|
const int8_t* u, int num_in, int num_out,
|
||||||
|
double* v) {
|
||||||
|
// Register containing 16-bit ones for horizontal add with 16->32 bit
|
||||||
|
// conversion.
|
||||||
|
__m256i ones =
|
||||||
|
_mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
|
||||||
|
__m256i shift_id = _mm256_set_epi32(0, 7, 6, 5, 4, 3, 2, 1);
|
||||||
|
// Initialize all the results to 0.
|
||||||
|
__m256i result0 = _mm256_setzero_si256();
|
||||||
|
__m256i result1 = _mm256_setzero_si256();
|
||||||
|
// Iterate over the input (u), one registerful at a time.
|
||||||
|
for (int j = 0; j < num_in;) {
|
||||||
|
__m256i inputs =
|
||||||
|
_mm256_loadu_si256(reinterpret_cast<const __m256i*>(u + j));
|
||||||
|
// Inputs are processed in groups of kNumInputsPerGroup, replicated
|
||||||
|
// kNumInputGroups times.
|
||||||
|
for (int ig = 0; ig < kNumInputGroups && j < num_in;
|
||||||
|
++ig, j += kNumInputsPerGroup) {
|
||||||
|
// Replicate the low 32 bits (4 inputs) 8 times.
|
||||||
|
__m256i rep_input =
|
||||||
|
_mm256_broadcastd_epi32(_mm256_castsi256_si128(inputs));
|
||||||
|
// Rotate the inputs in groups of 4, so the next 4 inputs are ready.
|
||||||
|
inputs = _mm256_permutevar8x32_epi32(inputs, shift_id);
|
||||||
|
__m256i weights, reps;
|
||||||
|
// Mul-add, with horizontal add of the 4 inputs to each of the results.
|
||||||
|
MultiplyGroup(rep_input, ones, wi, weights, reps, result0);
|
||||||
|
MultiplyGroup(rep_input, ones, wi, weights, reps, result1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ExtractResults(result0, shift_id, wi, scales, kNumOutputsPerRegister, v);
|
||||||
|
num_out -= kNumOutputsPerRegister;
|
||||||
|
ExtractResults(result1, shift_id, wi, scales,
|
||||||
|
std::min(kNumOutputsPerRegister, num_out), v);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Computes part of matrix.vector v = Wu. Computes N=8 results.
|
||||||
|
// For details see PartialMatrixDotVector64 with N=8.
|
||||||
|
static void PartialMatrixDotVector8(const int8_t* wi, const double* scales,
|
||||||
|
const int8_t* u, int num_in, int num_out,
|
||||||
|
double* v) {
|
||||||
|
// Register containing 16-bit ones for horizontal add with 16->32 bit
|
||||||
|
// conversion.
|
||||||
|
__m256i ones =
|
||||||
|
_mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
|
||||||
|
__m256i shift_id = _mm256_set_epi32(0, 7, 6, 5, 4, 3, 2, 1);
|
||||||
|
// Initialize all the results to 0.
|
||||||
|
__m256i result0 = _mm256_setzero_si256();
|
||||||
|
// Iterate over the input (u), one registerful at a time.
|
||||||
|
for (int j = 0; j < num_in;) {
|
||||||
|
__m256i inputs =
|
||||||
|
_mm256_loadu_si256(reinterpret_cast<const __m256i*>(u + j));
|
||||||
|
// Inputs are processed in groups of kNumInputsPerGroup, replicated
|
||||||
|
// kNumInputGroups times.
|
||||||
|
for (int ig = 0; ig < kNumInputGroups && j < num_in;
|
||||||
|
++ig, j += kNumInputsPerGroup) {
|
||||||
|
// Replicate the low 32 bits (4 inputs) 8 times.
|
||||||
|
__m256i rep_input =
|
||||||
|
_mm256_broadcastd_epi32(_mm256_castsi256_si128(inputs));
|
||||||
|
// Rotate the inputs in groups of 4, so the next 4 inputs are ready.
|
||||||
|
inputs = _mm256_permutevar8x32_epi32(inputs, shift_id);
|
||||||
|
__m256i weights, reps;
|
||||||
|
// Mul-add, with horizontal add of the 4 inputs to each of the results.
|
||||||
|
MultiplyGroup(rep_input, ones, wi, weights, reps, result0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ExtractResults(result0, shift_id, wi, scales, num_out, v);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
namespace tesseract {
|
||||||
|
#endif // __AVX2__
|
||||||
|
|
||||||
|
IntSimdMatrixAVX2::IntSimdMatrixAVX2() {
|
||||||
|
#ifdef __AVX2__
|
||||||
|
num_outputs_per_register_ = kNumOutputsPerRegister;
|
||||||
|
max_output_registers_ = kMaxOutputRegisters;
|
||||||
|
num_inputs_per_register_ = kNumInputsPerRegister;
|
||||||
|
num_inputs_per_group_ = kNumInputsPerGroup;
|
||||||
|
num_input_groups_ = kNumInputGroups;
|
||||||
|
partial_funcs_ = {PartialMatrixDotVector64, PartialMatrixDotVector32,
|
||||||
|
PartialMatrixDotVector16, PartialMatrixDotVector8};
|
||||||
|
#endif // __AVX2__
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace tesseract.
|
33
arch/intsimdmatrixavx2.h
Normal file
33
arch/intsimdmatrixavx2.h
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
// File: intsindmatrixavx2.h
|
||||||
|
// Description: AVX2 implementation of 8-bit int SIMD matrix multiply.
|
||||||
|
// Author: Ray Smith
|
||||||
|
// Created: Wed Aug 16 10:21:42 PST 2017
|
||||||
|
//
|
||||||
|
// (C) Copyright 2017, Google Inc.
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
#ifndef TESSERACT_ARCH_INTSIMDMATRIXAVX2_H_
|
||||||
|
#define TESSERACT_ARCH_INTSIMDMATRIXAVX2_H_
|
||||||
|
|
||||||
|
#include "intsimdmatrix.h"
|
||||||
|
|
||||||
|
namespace tesseract {
|
||||||
|
|
||||||
|
// AVX2 implementation of IntSimdMatrix.
|
||||||
|
class IntSimdMatrixAVX2 : public IntSimdMatrix {
|
||||||
|
public:
|
||||||
|
IntSimdMatrixAVX2();
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace tesseract
|
||||||
|
|
||||||
|
#endif // TESSERACT_ARCH_INTSIMDMATRIXAVX2_H_
|
44
arch/intsimdmatrixsse.cpp
Normal file
44
arch/intsimdmatrixsse.cpp
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
// File: intsindmatrixsse.cpp
|
||||||
|
// Description: SSE implementation of 8-bit int SIMD matrix multiply.
|
||||||
|
// Author: Ray Smith
|
||||||
|
// Created: Tue Aug 23 13:58:49 PST 2017
|
||||||
|
//
|
||||||
|
// (C) Copyright 2017, Google Inc.
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#include "intsimdmatrixsse.h"
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <vector>
|
||||||
|
#include "dotproductsse.h"
|
||||||
|
|
||||||
|
namespace tesseract {
|
||||||
|
|
||||||
|
#ifdef __SSE4_1__
|
||||||
|
// Computes part of matrix.vector v = Wu. Computes 1 result.
|
||||||
|
static void PartialMatrixDotVector1(const int8_t* wi, const double* scales,
|
||||||
|
const int8_t* u, int num_in, int num_out,
|
||||||
|
double* v) {
|
||||||
|
int total = IntDotProductSSE(u, wi, num_in);
|
||||||
|
// Add in the bias and correct for integer values.
|
||||||
|
*v = (static_cast<double>(total) / MAX_INT8 + wi[num_in]) * *scales;
|
||||||
|
}
|
||||||
|
#endif // __SSE4_1__
|
||||||
|
|
||||||
|
IntSimdMatrixSSE::IntSimdMatrixSSE() {
|
||||||
|
#ifdef __SSE4_1__
|
||||||
|
partial_funcs_ = {PartialMatrixDotVector1};
|
||||||
|
#endif // __SSE4_1__
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace tesseract.
|
33
arch/intsimdmatrixsse.h
Normal file
33
arch/intsimdmatrixsse.h
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
// File: intsindmatrixsse.h
|
||||||
|
// Description: SSE implementation of 8-bit int SIMD matrix multiply.
|
||||||
|
// Author: Ray Smith
|
||||||
|
// Created: Tue Aug 23 13:58:21 PST 2017
|
||||||
|
//
|
||||||
|
// (C) Copyright 2017, Google Inc.
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
#ifndef TESSERACT_ARCH_INTSIMDMATRIXSSE_H_
|
||||||
|
#define TESSERACT_ARCH_INTSIMDMATRIXSSE_H_
|
||||||
|
|
||||||
|
#include "intsimdmatrix.h"
|
||||||
|
|
||||||
|
namespace tesseract {
|
||||||
|
|
||||||
|
// AVX2 implementation of IntSimdMatrix.
|
||||||
|
class IntSimdMatrixSSE : public IntSimdMatrix {
|
||||||
|
public:
|
||||||
|
IntSimdMatrixSSE();
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace tesseract
|
||||||
|
|
||||||
|
#endif // TESSERACT_ARCH_INTSIMDMATRIXSSE_H_
|
@ -81,10 +81,12 @@ class GENERIC_2D_ARRAY {
|
|||||||
memcpy(array_, src.array_, num_elements() * sizeof(array_[0]));
|
memcpy(array_, src.array_, num_elements() * sizeof(array_[0]));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reallocate the array to the given size. Does not keep old data, but does
|
// Reallocates the array to the given size. Does not keep old data, but does
|
||||||
// not initialize the array either.
|
// not initialize the array either.
|
||||||
void ResizeNoInit(int size1, int size2) {
|
// The allocated memory is expanded on the end by pad, allowing deliberate
|
||||||
int new_size = size1 * size2;
|
// access beyond the bounds of the array.
|
||||||
|
void ResizeNoInit(int size1, int size2, int pad = 0) {
|
||||||
|
int new_size = size1 * size2 + pad;
|
||||||
if (new_size > size_allocated_) {
|
if (new_size > size_allocated_) {
|
||||||
delete [] array_;
|
delete [] array_;
|
||||||
array_ = new T[new_size];
|
array_ = new T[new_size];
|
||||||
@ -92,6 +94,8 @@ class GENERIC_2D_ARRAY {
|
|||||||
}
|
}
|
||||||
dim1_ = size1;
|
dim1_ = size1;
|
||||||
dim2_ = size2;
|
dim2_ = size2;
|
||||||
|
// Fill the padding data so it isn't uninitialized.
|
||||||
|
for (int i = size1 * size2; i < new_size; ++i) array_[i] = empty_;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reallocate the array to the given size. Does not keep old data.
|
// Reallocate the array to the given size. Does not keep old data.
|
||||||
|
@ -117,6 +117,7 @@ esac
|
|||||||
|
|
||||||
## Checks for supported compiler options.
|
## Checks for supported compiler options.
|
||||||
AM_CONDITIONAL([AVX_OPT], false)
|
AM_CONDITIONAL([AVX_OPT], false)
|
||||||
|
AM_CONDITIONAL([AVX2_OPT], false)
|
||||||
AM_CONDITIONAL([SSE41_OPT], false)
|
AM_CONDITIONAL([SSE41_OPT], false)
|
||||||
|
|
||||||
AX_CHECK_COMPILE_FLAG([-mavx], [avx=true], [avx=false])
|
AX_CHECK_COMPILE_FLAG([-mavx], [avx=true], [avx=false])
|
||||||
@ -124,6 +125,11 @@ if $avx; then
|
|||||||
AM_CONDITIONAL([AVX_OPT], true)
|
AM_CONDITIONAL([AVX_OPT], true)
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
AX_CHECK_COMPILE_FLAG([-mavx2], [avx2=true], [avx2=false])
|
||||||
|
if $avx2; then
|
||||||
|
AM_CONDITIONAL([AVX2_OPT], true)
|
||||||
|
fi
|
||||||
|
|
||||||
AX_CHECK_COMPILE_FLAG([-msse4.1], [sse41=true], [sse41=false])
|
AX_CHECK_COMPILE_FLAG([-msse4.1], [sse41=true], [sse41=false])
|
||||||
if $sse41; then
|
if $sse41; then
|
||||||
AM_CONDITIONAL([SSE41_OPT], true)
|
AM_CONDITIONAL([SSE41_OPT], true)
|
||||||
|
@ -260,7 +260,9 @@ void LSTM::Forward(bool debug, const NetworkIO& input,
|
|||||||
if (softmax_ != NULL) {
|
if (softmax_ != NULL) {
|
||||||
softmax_output.Init(no_, scratch);
|
softmax_output.Init(no_, scratch);
|
||||||
ZeroVector<double>(no_, softmax_output);
|
ZeroVector<double>(no_, softmax_output);
|
||||||
if (input.int_mode()) int_output.Resize2d(true, 1, ns_, scratch);
|
int rounded_softmax_inputs = gate_weights_[CI].RoundInputs(ns_);
|
||||||
|
if (input.int_mode())
|
||||||
|
int_output.Resize2d(true, 1, rounded_softmax_inputs, scratch);
|
||||||
softmax_->SetupForward(input, NULL);
|
softmax_->SetupForward(input, NULL);
|
||||||
}
|
}
|
||||||
NetworkScratch::FloatVec curr_input;
|
NetworkScratch::FloatVec curr_input;
|
||||||
@ -364,7 +366,7 @@ void LSTM::Forward(bool debug, const NetworkIO& input,
|
|||||||
if (IsTraining()) state_.WriteTimeStep(t, curr_state);
|
if (IsTraining()) state_.WriteTimeStep(t, curr_state);
|
||||||
if (softmax_ != NULL) {
|
if (softmax_ != NULL) {
|
||||||
if (input.int_mode()) {
|
if (input.int_mode()) {
|
||||||
int_output->WriteTimeStep(0, curr_output);
|
int_output->WriteTimeStepPart(0, 0, ns_, curr_output);
|
||||||
softmax_->ForwardTimeStep(NULL, int_output->i(0), t, softmax_output);
|
softmax_->ForwardTimeStep(NULL, int_output->i(0), t, softmax_output);
|
||||||
} else {
|
} else {
|
||||||
softmax_->ForwardTimeStep(curr_output, NULL, t, softmax_output);
|
softmax_->ForwardTimeStep(curr_output, NULL, t, softmax_output);
|
||||||
@ -720,7 +722,8 @@ void LSTM::PrintDW() {
|
|||||||
|
|
||||||
// Resizes forward data to cope with an input image of the given width.
|
// Resizes forward data to cope with an input image of the given width.
|
||||||
void LSTM::ResizeForward(const NetworkIO& input) {
|
void LSTM::ResizeForward(const NetworkIO& input) {
|
||||||
source_.Resize(input, na_);
|
int rounded_inputs = gate_weights_[CI].RoundInputs(na_);
|
||||||
|
source_.Resize(input, rounded_inputs);
|
||||||
which_fg_.ResizeNoInit(input.Width(), ns_);
|
which_fg_.ResizeNoInit(input.Width(), ns_);
|
||||||
if (IsTraining()) {
|
if (IsTraining()) {
|
||||||
state_.ResizeFloat(input, ns_);
|
state_.ResizeFloat(input, ns_);
|
||||||
|
@ -30,12 +30,17 @@ const float kMinCertainty = -20.0f;
|
|||||||
// Probability corresponding to kMinCertainty.
|
// Probability corresponding to kMinCertainty.
|
||||||
const float kMinProb = exp(kMinCertainty);
|
const float kMinProb = exp(kMinCertainty);
|
||||||
|
|
||||||
|
// Holds the optimal integer multiplier for this machine.
|
||||||
|
// This is a leaked, lazily initialized singleton, and is used for computing
|
||||||
|
// padding to apply to i_ for SIMD use.
|
||||||
|
IntSimdMatrix* NetworkIO::multiplier_ = nullptr;
|
||||||
|
|
||||||
// Resizes to a specific size as a 2-d temp buffer. No batches, no y-dim.
|
// Resizes to a specific size as a 2-d temp buffer. No batches, no y-dim.
|
||||||
void NetworkIO::Resize2d(bool int_mode, int width, int num_features) {
|
void NetworkIO::Resize2d(bool int_mode, int width, int num_features) {
|
||||||
stride_map_ = StrideMap();
|
stride_map_ = StrideMap();
|
||||||
int_mode_ = int_mode;
|
int_mode_ = int_mode;
|
||||||
if (int_mode_) {
|
if (int_mode_) {
|
||||||
i_.ResizeNoInit(width, num_features);
|
i_.ResizeNoInit(width, num_features, GetPadding(num_features));
|
||||||
} else {
|
} else {
|
||||||
f_.ResizeNoInit(width, num_features);
|
f_.ResizeNoInit(width, num_features);
|
||||||
}
|
}
|
||||||
@ -51,7 +56,7 @@ void NetworkIO::ResizeToMap(bool int_mode, const StrideMap& stride_map,
|
|||||||
stride_map_ = stride_map;
|
stride_map_ = stride_map;
|
||||||
int_mode_ = int_mode;
|
int_mode_ = int_mode;
|
||||||
if (int_mode_) {
|
if (int_mode_) {
|
||||||
i_.ResizeNoInit(stride_map.Width(), num_features);
|
i_.ResizeNoInit(stride_map.Width(), num_features, GetPadding(num_features));
|
||||||
} else {
|
} else {
|
||||||
f_.ResizeNoInit(stride_map.Width(), num_features);
|
f_.ResizeNoInit(stride_map.Width(), num_features);
|
||||||
}
|
}
|
||||||
@ -976,4 +981,17 @@ void NetworkIO::ClipVector(int t, float range) {
|
|||||||
v[i] = ClipToRange(v[i], -range, range);
|
v[i] = ClipToRange(v[i], -range, range);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Returns the padding required for the given number of features in order
|
||||||
|
// for the SIMD operations to be safe.
|
||||||
|
/* static */
|
||||||
|
int NetworkIO::GetPadding(int num_features) {
|
||||||
|
if (multiplier_ == nullptr)
|
||||||
|
multiplier_ = IntSimdMatrix::GetFastestMultiplier();
|
||||||
|
int pad = 0;
|
||||||
|
if (multiplier_ != nullptr) {
|
||||||
|
pad = multiplier_->RoundInputs(num_features) - num_features;
|
||||||
|
}
|
||||||
|
return pad;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace tesseract.
|
} // namespace tesseract.
|
||||||
|
@ -327,6 +327,10 @@ class NetworkIO {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
// Returns the padding required for the given number of features in order
|
||||||
|
// for the SIMD operations to be safe.
|
||||||
|
static int GetPadding(int num_features);
|
||||||
|
|
||||||
// Choice of float vs 8 bit int for data.
|
// Choice of float vs 8 bit int for data.
|
||||||
GENERIC_2D_ARRAY<float> f_;
|
GENERIC_2D_ARRAY<float> f_;
|
||||||
GENERIC_2D_ARRAY<inT8> i_;
|
GENERIC_2D_ARRAY<inT8> i_;
|
||||||
@ -334,6 +338,10 @@ class NetworkIO {
|
|||||||
bool int_mode_;
|
bool int_mode_;
|
||||||
// Stride for 2d input data.
|
// Stride for 2d input data.
|
||||||
StrideMap stride_map_;
|
StrideMap stride_map_;
|
||||||
|
// Holds the optimal integer multiplier for this machine.
|
||||||
|
// This is a leaked, lazily initialized singleton, and is used for computing
|
||||||
|
// padding to apply to i_ for SIMD use.
|
||||||
|
static IntSimdMatrix* multiplier_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace tesseract.
|
} // namespace tesseract.
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
|
|
||||||
#include "dotproductavx.h"
|
#include "dotproductavx.h"
|
||||||
#include "dotproductsse.h"
|
#include "dotproductsse.h"
|
||||||
|
#include "intsimdmatrix.h"
|
||||||
#include "simddetect.h"
|
#include "simddetect.h"
|
||||||
#include "statistc.h"
|
#include "statistc.h"
|
||||||
#include "tprintf.h"
|
#include "tprintf.h"
|
||||||
@ -61,10 +62,7 @@ int WeightMatrix::InitWeightsFloat(int no, int ni, bool use_adam,
|
|||||||
// the old weight matrix entries for each output from code_map[output] where
|
// the old weight matrix entries for each output from code_map[output] where
|
||||||
// non-negative, and uses the mean (over all outputs) of the existing weights
|
// non-negative, and uses the mean (over all outputs) of the existing weights
|
||||||
// for all outputs with negative code_map entries. Returns the new number of
|
// for all outputs with negative code_map entries. Returns the new number of
|
||||||
// weights. Can be used to change the character set addressed by an output
|
// weights.
|
||||||
// softmax.
|
|
||||||
// TODO(rays) A RemapInputs would also be useful, so a change can be made
|
|
||||||
// in the middle of a network.
|
|
||||||
int WeightMatrix::RemapOutputs(const std::vector<int>& code_map) {
|
int WeightMatrix::RemapOutputs(const std::vector<int>& code_map) {
|
||||||
GENERIC_2D_ARRAY<double> old_wf(wf_);
|
GENERIC_2D_ARRAY<double> old_wf(wf_);
|
||||||
int old_no = wf_.dim1();
|
int old_no = wf_.dim1();
|
||||||
@ -114,6 +112,8 @@ void WeightMatrix::ConvertToInt() {
|
|||||||
}
|
}
|
||||||
wf_.Resize(1, 1, 0.0);
|
wf_.Resize(1, 1, 0.0);
|
||||||
int_mode_ = true;
|
int_mode_ = true;
|
||||||
|
multiplier_.reset(IntSimdMatrix::GetFastestMultiplier());
|
||||||
|
if (multiplier_ != nullptr) multiplier_->Init(wi_);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Allocates any needed memory for running Backward, and zeroes the deltas,
|
// Allocates any needed memory for running Backward, and zeroes the deltas,
|
||||||
@ -165,6 +165,8 @@ bool WeightMatrix::DeSerialize(bool training, TFile* fp) {
|
|||||||
if (int_mode_) {
|
if (int_mode_) {
|
||||||
if (!wi_.DeSerialize(fp)) return false;
|
if (!wi_.DeSerialize(fp)) return false;
|
||||||
if (!scales_.DeSerialize(fp)) return false;
|
if (!scales_.DeSerialize(fp)) return false;
|
||||||
|
multiplier_.reset(IntSimdMatrix::GetFastestMultiplier());
|
||||||
|
if (multiplier_ != nullptr) multiplier_->Init(wi_);
|
||||||
} else {
|
} else {
|
||||||
if (!wf_.DeSerialize(fp)) return false;
|
if (!wf_.DeSerialize(fp)) return false;
|
||||||
if (training) {
|
if (training) {
|
||||||
@ -212,19 +214,8 @@ void WeightMatrix::MatrixDotVector(const double* u, double* v) const {
|
|||||||
|
|
||||||
void WeightMatrix::MatrixDotVector(const inT8* u, double* v) const {
|
void WeightMatrix::MatrixDotVector(const inT8* u, double* v) const {
|
||||||
ASSERT_HOST(int_mode_);
|
ASSERT_HOST(int_mode_);
|
||||||
int num_out = wi_.dim1();
|
ASSERT_HOST(multiplier_ != nullptr);
|
||||||
int num_in = wi_.dim2() - 1;
|
multiplier_->MatrixDotVector(wi_, scales_, u, v);
|
||||||
for (int i = 0; i < num_out; ++i) {
|
|
||||||
const inT8* Wi = wi_[i];
|
|
||||||
int total = 0;
|
|
||||||
if (SIMDDetect::IsSSEAvailable()) {
|
|
||||||
total = IntDotProductSSE(u, Wi, num_in);
|
|
||||||
} else {
|
|
||||||
for (int j = 0; j < num_in; ++j) total += Wi[j] * u[j];
|
|
||||||
}
|
|
||||||
// Add in the bias and correct for integer values.
|
|
||||||
v[i] = (static_cast<double>(total) / MAX_INT8 + Wi[num_in]) * scales_[i];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// MatrixDotVector for peep weights, MultiplyAccumulate adds the
|
// MatrixDotVector for peep weights, MultiplyAccumulate adds the
|
||||||
|
@ -19,7 +19,9 @@
|
|||||||
#ifndef TESSERACT_LSTM_WEIGHTMATRIX_H_
|
#ifndef TESSERACT_LSTM_WEIGHTMATRIX_H_
|
||||||
#define TESSERACT_LSTM_WEIGHTMATRIX_H_
|
#define TESSERACT_LSTM_WEIGHTMATRIX_H_
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
#include "genericvector.h"
|
#include "genericvector.h"
|
||||||
|
#include "intsimdmatrix.h"
|
||||||
#include "matrix.h"
|
#include "matrix.h"
|
||||||
#include "tprintf.h"
|
#include "tprintf.h"
|
||||||
|
|
||||||
@ -74,10 +76,7 @@ class WeightMatrix {
|
|||||||
// the old weight matrix entries for each output from code_map[output] where
|
// the old weight matrix entries for each output from code_map[output] where
|
||||||
// non-negative, and uses the mean (over all outputs) of the existing weights
|
// non-negative, and uses the mean (over all outputs) of the existing weights
|
||||||
// for all outputs with negative code_map entries. Returns the new number of
|
// for all outputs with negative code_map entries. Returns the new number of
|
||||||
// weights. Can be used to change the character set addressed by an output
|
// weights.
|
||||||
// softmax.
|
|
||||||
// TODO(rays) A RemapInputs would also be useful, so a change can be made
|
|
||||||
// in the middle of a network.
|
|
||||||
int RemapOutputs(const std::vector<int>& code_map);
|
int RemapOutputs(const std::vector<int>& code_map);
|
||||||
|
|
||||||
// Converts a float network to an int network. Each set of input weights that
|
// Converts a float network to an int network. Each set of input weights that
|
||||||
@ -88,6 +87,12 @@ class WeightMatrix {
|
|||||||
// Store a multiplicative scale factor (as a float) that will reproduce
|
// Store a multiplicative scale factor (as a float) that will reproduce
|
||||||
// the original value, subject to rounding errors.
|
// the original value, subject to rounding errors.
|
||||||
void ConvertToInt();
|
void ConvertToInt();
|
||||||
|
// Returns the size rounded up to an internal factor used by the SIMD
|
||||||
|
// implementation for its input.
|
||||||
|
int RoundInputs(int size) const {
|
||||||
|
if (multiplier_ == nullptr) return size;
|
||||||
|
return multiplier_->RoundInputs(size);
|
||||||
|
}
|
||||||
|
|
||||||
// Accessors.
|
// Accessors.
|
||||||
bool is_int_mode() const {
|
bool is_int_mode() const {
|
||||||
@ -184,6 +189,8 @@ class WeightMatrix {
|
|||||||
// Iff use_adam_, the sum of squares of dw_. The number of samples is
|
// Iff use_adam_, the sum of squares of dw_. The number of samples is
|
||||||
// given to Update(). Serialized iff use_adam_.
|
// given to Update(). Serialized iff use_adam_.
|
||||||
GENERIC_2D_ARRAY<double> dw_sq_sum_;
|
GENERIC_2D_ARRAY<double> dw_sq_sum_;
|
||||||
|
// Holds the optimal integer multiplier for this machine.
|
||||||
|
std::unique_ptr<IntSimdMatrix> multiplier_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace tesseract.
|
} // namespace tesseract.
|
||||||
|
228
training/combine_lang_model
Executable file
228
training/combine_lang_model
Executable file
@ -0,0 +1,228 @@
|
|||||||
|
#! /bin/sh
|
||||||
|
|
||||||
|
# combine_lang_model - temporary wrapper script for .libs/combine_lang_model
|
||||||
|
# Generated by libtool (GNU libtool) 2.4.2 Debian-2.4.2-1.7ubuntu1
|
||||||
|
#
|
||||||
|
# The combine_lang_model program cannot be directly executed until all the libtool
|
||||||
|
# libraries that it depends on are installed.
|
||||||
|
#
|
||||||
|
# This wrapper script should never be moved out of the build directory.
|
||||||
|
# If it is, it will not operate correctly.
|
||||||
|
|
||||||
|
# Sed substitution that helps us do robust quoting. It backslashifies
|
||||||
|
# metacharacters that are still active within double-quoted strings.
|
||||||
|
sed_quote_subst='s/\([`"$\\]\)/\\\1/g'
|
||||||
|
|
||||||
|
# Be Bourne compatible
|
||||||
|
if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
|
||||||
|
emulate sh
|
||||||
|
NULLCMD=:
|
||||||
|
# Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
|
||||||
|
# is contrary to our usage. Disable this feature.
|
||||||
|
alias -g '${1+"$@"}'='"$@"'
|
||||||
|
setopt NO_GLOB_SUBST
|
||||||
|
else
|
||||||
|
case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
|
||||||
|
fi
|
||||||
|
BIN_SH=xpg4; export BIN_SH # for Tru64
|
||||||
|
DUALCASE=1; export DUALCASE # for MKS sh
|
||||||
|
|
||||||
|
# The HP-UX ksh and POSIX shell print the target directory to stdout
|
||||||
|
# if CDPATH is set.
|
||||||
|
(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
|
||||||
|
|
||||||
|
relink_command="(cd /usr/local/google/home/rays/opensrc/git/tesseract/training; { test -z \"\${LIBRARY_PATH+set}\" || unset LIBRARY_PATH || { LIBRARY_PATH=; export LIBRARY_PATH; }; }; { test -z \"\${COMPILER_PATH+set}\" || unset COMPILER_PATH || { COMPILER_PATH=; export COMPILER_PATH; }; }; { test -z \"\${GCC_EXEC_PREFIX+set}\" || unset GCC_EXEC_PREFIX || { GCC_EXEC_PREFIX=; export GCC_EXEC_PREFIX; }; }; { test -z \"\${LD_RUN_PATH+set}\" || unset LD_RUN_PATH || { LD_RUN_PATH=; export LD_RUN_PATH; }; }; { test -z \"\${LD_LIBRARY_PATH+set}\" || unset LD_LIBRARY_PATH || { LD_LIBRARY_PATH=; export LD_LIBRARY_PATH; }; }; PATH=/usr/local/google/home/rays/bin:/usr/lib/google-golang/bin:/usr/local/buildtools/java/jdk/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/google/home/rays/bin; export PATH; g++ -g -O2 -std=c++11 -o \$progdir/\$file combine_lang_model.o ./.libs/libtesseract_training.a ./.libs/libtesseract_tessopt.a -licui18n -licuuc -licudata ../api/.libs/libtesseract.so -lpthread -fopenmp -Wl,-rpath -Wl,/usr/local/google/home/rays/opensrc/git/tesseract/api/.libs)"
|
||||||
|
|
||||||
|
# This environment variable determines our operation mode.
|
||||||
|
if test "$libtool_install_magic" = "%%%MAGIC variable%%%"; then
|
||||||
|
# install mode needs the following variables:
|
||||||
|
generated_by_libtool_version='2.4.2'
|
||||||
|
notinst_deplibs=' ../api/libtesseract.la'
|
||||||
|
else
|
||||||
|
# When we are sourced in execute mode, $file and $ECHO are already set.
|
||||||
|
if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then
|
||||||
|
file="$0"
|
||||||
|
|
||||||
|
# A function that is used when there is no print builtin or printf.
|
||||||
|
func_fallback_echo ()
|
||||||
|
{
|
||||||
|
eval 'cat <<_LTECHO_EOF
|
||||||
|
$1
|
||||||
|
_LTECHO_EOF'
|
||||||
|
}
|
||||||
|
ECHO="printf %s\\n"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Very basic option parsing. These options are (a) specific to
|
||||||
|
# the libtool wrapper, (b) are identical between the wrapper
|
||||||
|
# /script/ and the wrapper /executable/ which is used only on
|
||||||
|
# windows platforms, and (c) all begin with the string --lt-
|
||||||
|
# (application programs are unlikely to have options which match
|
||||||
|
# this pattern).
|
||||||
|
#
|
||||||
|
# There are only two supported options: --lt-debug and
|
||||||
|
# --lt-dump-script. There is, deliberately, no --lt-help.
|
||||||
|
#
|
||||||
|
# The first argument to this parsing function should be the
|
||||||
|
# script's ../libtool value, followed by no.
|
||||||
|
lt_option_debug=
|
||||||
|
func_parse_lt_options ()
|
||||||
|
{
|
||||||
|
lt_script_arg0=$0
|
||||||
|
shift
|
||||||
|
for lt_opt
|
||||||
|
do
|
||||||
|
case "$lt_opt" in
|
||||||
|
--lt-debug) lt_option_debug=1 ;;
|
||||||
|
--lt-dump-script)
|
||||||
|
lt_dump_D=`$ECHO "X$lt_script_arg0" | /bin/sed -e 's/^X//' -e 's%/[^/]*$%%'`
|
||||||
|
test "X$lt_dump_D" = "X$lt_script_arg0" && lt_dump_D=.
|
||||||
|
lt_dump_F=`$ECHO "X$lt_script_arg0" | /bin/sed -e 's/^X//' -e 's%^.*/%%'`
|
||||||
|
cat "$lt_dump_D/$lt_dump_F"
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
--lt-*)
|
||||||
|
$ECHO "Unrecognized --lt- option: '$lt_opt'" 1>&2
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# Print the debug banner immediately:
|
||||||
|
if test -n "$lt_option_debug"; then
|
||||||
|
echo "combine_lang_model:combine_lang_model:${LINENO}: libtool wrapper (GNU libtool) 2.4.2 Debian-2.4.2-1.7ubuntu1" 1>&2
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Used when --lt-debug. Prints its arguments to stdout
|
||||||
|
# (redirection is the responsibility of the caller)
|
||||||
|
func_lt_dump_args ()
|
||||||
|
{
|
||||||
|
lt_dump_args_N=1;
|
||||||
|
for lt_arg
|
||||||
|
do
|
||||||
|
$ECHO "combine_lang_model:combine_lang_model:${LINENO}: newargv[$lt_dump_args_N]: $lt_arg"
|
||||||
|
lt_dump_args_N=`expr $lt_dump_args_N + 1`
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
# Core function for launching the target application
|
||||||
|
func_exec_program_core ()
|
||||||
|
{
|
||||||
|
|
||||||
|
if test -n "$lt_option_debug"; then
|
||||||
|
$ECHO "combine_lang_model:combine_lang_model:${LINENO}: newargv[0]: $progdir/$program" 1>&2
|
||||||
|
func_lt_dump_args ${1+"$@"} 1>&2
|
||||||
|
fi
|
||||||
|
exec "$progdir/$program" ${1+"$@"}
|
||||||
|
|
||||||
|
$ECHO "$0: cannot exec $program $*" 1>&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# A function to encapsulate launching the target application
|
||||||
|
# Strips options in the --lt-* namespace from $@ and
|
||||||
|
# launches target application with the remaining arguments.
|
||||||
|
func_exec_program ()
|
||||||
|
{
|
||||||
|
case " $* " in
|
||||||
|
*\ --lt-*)
|
||||||
|
for lt_wr_arg
|
||||||
|
do
|
||||||
|
case $lt_wr_arg in
|
||||||
|
--lt-*) ;;
|
||||||
|
*) set x "$@" "$lt_wr_arg"; shift;;
|
||||||
|
esac
|
||||||
|
shift
|
||||||
|
done ;;
|
||||||
|
esac
|
||||||
|
func_exec_program_core ${1+"$@"}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Parse options
|
||||||
|
func_parse_lt_options "$0" ${1+"$@"}
|
||||||
|
|
||||||
|
# Find the directory that this script lives in.
|
||||||
|
thisdir=`$ECHO "$file" | /bin/sed 's%/[^/]*$%%'`
|
||||||
|
test "x$thisdir" = "x$file" && thisdir=.
|
||||||
|
|
||||||
|
# Follow symbolic links until we get to the real thisdir.
|
||||||
|
file=`ls -ld "$file" | /bin/sed -n 's/.*-> //p'`
|
||||||
|
while test -n "$file"; do
|
||||||
|
destdir=`$ECHO "$file" | /bin/sed 's%/[^/]*$%%'`
|
||||||
|
|
||||||
|
# If there was a directory component, then change thisdir.
|
||||||
|
if test "x$destdir" != "x$file"; then
|
||||||
|
case "$destdir" in
|
||||||
|
[\\/]* | [A-Za-z]:[\\/]*) thisdir="$destdir" ;;
|
||||||
|
*) thisdir="$thisdir/$destdir" ;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
|
||||||
|
file=`$ECHO "$file" | /bin/sed 's%^.*/%%'`
|
||||||
|
file=`ls -ld "$thisdir/$file" | /bin/sed -n 's/.*-> //p'`
|
||||||
|
done
|
||||||
|
|
||||||
|
# Usually 'no', except on cygwin/mingw when embedded into
|
||||||
|
# the cwrapper.
|
||||||
|
WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=no
|
||||||
|
if test "$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR" = "yes"; then
|
||||||
|
# special case for '.'
|
||||||
|
if test "$thisdir" = "."; then
|
||||||
|
thisdir=`pwd`
|
||||||
|
fi
|
||||||
|
# remove .libs from thisdir
|
||||||
|
case "$thisdir" in
|
||||||
|
*[\\/].libs ) thisdir=`$ECHO "$thisdir" | /bin/sed 's%[\\/][^\\/]*$%%'` ;;
|
||||||
|
.libs ) thisdir=. ;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Try to get the absolute directory name.
|
||||||
|
absdir=`cd "$thisdir" && pwd`
|
||||||
|
test -n "$absdir" && thisdir="$absdir"
|
||||||
|
|
||||||
|
program=lt-'combine_lang_model'
|
||||||
|
progdir="$thisdir/.libs"
|
||||||
|
|
||||||
|
if test ! -f "$progdir/$program" ||
|
||||||
|
{ file=`ls -1dt "$progdir/$program" "$progdir/../$program" 2>/dev/null | /bin/sed 1q`; \
|
||||||
|
test "X$file" != "X$progdir/$program"; }; then
|
||||||
|
|
||||||
|
file="$$-$program"
|
||||||
|
|
||||||
|
if test ! -d "$progdir"; then
|
||||||
|
mkdir "$progdir"
|
||||||
|
else
|
||||||
|
rm -f "$progdir/$file"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# relink executable if necessary
|
||||||
|
if test -n "$relink_command"; then
|
||||||
|
if relink_command_output=`eval $relink_command 2>&1`; then :
|
||||||
|
else
|
||||||
|
printf %s\n "$relink_command_output" >&2
|
||||||
|
rm -f "$progdir/$file"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
mv -f "$progdir/$file" "$progdir/$program" 2>/dev/null ||
|
||||||
|
{ rm -f "$progdir/$program";
|
||||||
|
mv -f "$progdir/$file" "$progdir/$program"; }
|
||||||
|
rm -f "$progdir/$file"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test -f "$progdir/$program"; then
|
||||||
|
if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then
|
||||||
|
# Run the actual program with our arguments.
|
||||||
|
func_exec_program ${1+"$@"}
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
# The program doesn't exist.
|
||||||
|
$ECHO "$0: error: \`$progdir/$program' does not exist" 1>&2
|
||||||
|
$ECHO "This script is just a wrapper for $program." 1>&2
|
||||||
|
$ECHO "See the libtool documentation for more information." 1>&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
228
training/lstmeval
Executable file
228
training/lstmeval
Executable file
@ -0,0 +1,228 @@
|
|||||||
|
#! /bin/sh
|
||||||
|
|
||||||
|
# lstmeval - temporary wrapper script for .libs/lstmeval
|
||||||
|
# Generated by libtool (GNU libtool) 2.4.2 Debian-2.4.2-1.7ubuntu1
|
||||||
|
#
|
||||||
|
# The lstmeval program cannot be directly executed until all the libtool
|
||||||
|
# libraries that it depends on are installed.
|
||||||
|
#
|
||||||
|
# This wrapper script should never be moved out of the build directory.
|
||||||
|
# If it is, it will not operate correctly.
|
||||||
|
|
||||||
|
# Sed substitution that helps us do robust quoting. It backslashifies
|
||||||
|
# metacharacters that are still active within double-quoted strings.
|
||||||
|
sed_quote_subst='s/\([`"$\\]\)/\\\1/g'
|
||||||
|
|
||||||
|
# Be Bourne compatible
|
||||||
|
if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
|
||||||
|
emulate sh
|
||||||
|
NULLCMD=:
|
||||||
|
# Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
|
||||||
|
# is contrary to our usage. Disable this feature.
|
||||||
|
alias -g '${1+"$@"}'='"$@"'
|
||||||
|
setopt NO_GLOB_SUBST
|
||||||
|
else
|
||||||
|
case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
|
||||||
|
fi
|
||||||
|
BIN_SH=xpg4; export BIN_SH # for Tru64
|
||||||
|
DUALCASE=1; export DUALCASE # for MKS sh
|
||||||
|
|
||||||
|
# The HP-UX ksh and POSIX shell print the target directory to stdout
|
||||||
|
# if CDPATH is set.
|
||||||
|
(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
|
||||||
|
|
||||||
|
relink_command="(cd /usr/local/google/home/rays/opensrc/git/tesseract/training; { test -z \"\${LIBRARY_PATH+set}\" || unset LIBRARY_PATH || { LIBRARY_PATH=; export LIBRARY_PATH; }; }; { test -z \"\${COMPILER_PATH+set}\" || unset COMPILER_PATH || { COMPILER_PATH=; export COMPILER_PATH; }; }; { test -z \"\${GCC_EXEC_PREFIX+set}\" || unset GCC_EXEC_PREFIX || { GCC_EXEC_PREFIX=; export GCC_EXEC_PREFIX; }; }; { test -z \"\${LD_RUN_PATH+set}\" || unset LD_RUN_PATH || { LD_RUN_PATH=; export LD_RUN_PATH; }; }; { test -z \"\${LD_LIBRARY_PATH+set}\" || unset LD_LIBRARY_PATH || { LD_LIBRARY_PATH=; export LD_LIBRARY_PATH; }; }; PATH=/usr/local/google/home/rays/bin:/usr/lib/google-golang/bin:/usr/local/buildtools/java/jdk/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/google/home/rays/bin; export PATH; g++ -g -O2 -std=c++11 -o \$progdir/\$file lstmeval.o ./.libs/libtesseract_training.a ./.libs/libtesseract_tessopt.a -licuuc -licudata ../api/.libs/libtesseract.so -L/usr/local/lib /usr/local/lib/liblept.so -lpthread -fopenmp -Wl,-rpath -Wl,/usr/local/google/home/rays/opensrc/git/tesseract/api/.libs)"
|
||||||
|
|
||||||
|
# This environment variable determines our operation mode.
|
||||||
|
if test "$libtool_install_magic" = "%%%MAGIC variable%%%"; then
|
||||||
|
# install mode needs the following variables:
|
||||||
|
generated_by_libtool_version='2.4.2'
|
||||||
|
notinst_deplibs=' ../api/libtesseract.la'
|
||||||
|
else
|
||||||
|
# When we are sourced in execute mode, $file and $ECHO are already set.
|
||||||
|
if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then
|
||||||
|
file="$0"
|
||||||
|
|
||||||
|
# A function that is used when there is no print builtin or printf.
|
||||||
|
func_fallback_echo ()
|
||||||
|
{
|
||||||
|
eval 'cat <<_LTECHO_EOF
|
||||||
|
$1
|
||||||
|
_LTECHO_EOF'
|
||||||
|
}
|
||||||
|
ECHO="printf %s\\n"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Very basic option parsing. These options are (a) specific to
|
||||||
|
# the libtool wrapper, (b) are identical between the wrapper
|
||||||
|
# /script/ and the wrapper /executable/ which is used only on
|
||||||
|
# windows platforms, and (c) all begin with the string --lt-
|
||||||
|
# (application programs are unlikely to have options which match
|
||||||
|
# this pattern).
|
||||||
|
#
|
||||||
|
# There are only two supported options: --lt-debug and
|
||||||
|
# --lt-dump-script. There is, deliberately, no --lt-help.
|
||||||
|
#
|
||||||
|
# The first argument to this parsing function should be the
|
||||||
|
# script's ../libtool value, followed by no.
|
||||||
|
lt_option_debug=
|
||||||
|
func_parse_lt_options ()
|
||||||
|
{
|
||||||
|
lt_script_arg0=$0
|
||||||
|
shift
|
||||||
|
for lt_opt
|
||||||
|
do
|
||||||
|
case "$lt_opt" in
|
||||||
|
--lt-debug) lt_option_debug=1 ;;
|
||||||
|
--lt-dump-script)
|
||||||
|
lt_dump_D=`$ECHO "X$lt_script_arg0" | /bin/sed -e 's/^X//' -e 's%/[^/]*$%%'`
|
||||||
|
test "X$lt_dump_D" = "X$lt_script_arg0" && lt_dump_D=.
|
||||||
|
lt_dump_F=`$ECHO "X$lt_script_arg0" | /bin/sed -e 's/^X//' -e 's%^.*/%%'`
|
||||||
|
cat "$lt_dump_D/$lt_dump_F"
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
--lt-*)
|
||||||
|
$ECHO "Unrecognized --lt- option: '$lt_opt'" 1>&2
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# Print the debug banner immediately:
|
||||||
|
if test -n "$lt_option_debug"; then
|
||||||
|
echo "lstmeval:lstmeval:${LINENO}: libtool wrapper (GNU libtool) 2.4.2 Debian-2.4.2-1.7ubuntu1" 1>&2
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Used when --lt-debug. Prints its arguments to stdout
|
||||||
|
# (redirection is the responsibility of the caller)
|
||||||
|
func_lt_dump_args ()
|
||||||
|
{
|
||||||
|
lt_dump_args_N=1;
|
||||||
|
for lt_arg
|
||||||
|
do
|
||||||
|
$ECHO "lstmeval:lstmeval:${LINENO}: newargv[$lt_dump_args_N]: $lt_arg"
|
||||||
|
lt_dump_args_N=`expr $lt_dump_args_N + 1`
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
# Core function for launching the target application
|
||||||
|
func_exec_program_core ()
|
||||||
|
{
|
||||||
|
|
||||||
|
if test -n "$lt_option_debug"; then
|
||||||
|
$ECHO "lstmeval:lstmeval:${LINENO}: newargv[0]: $progdir/$program" 1>&2
|
||||||
|
func_lt_dump_args ${1+"$@"} 1>&2
|
||||||
|
fi
|
||||||
|
exec "$progdir/$program" ${1+"$@"}
|
||||||
|
|
||||||
|
$ECHO "$0: cannot exec $program $*" 1>&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# A function to encapsulate launching the target application
|
||||||
|
# Strips options in the --lt-* namespace from $@ and
|
||||||
|
# launches target application with the remaining arguments.
|
||||||
|
func_exec_program ()
|
||||||
|
{
|
||||||
|
case " $* " in
|
||||||
|
*\ --lt-*)
|
||||||
|
for lt_wr_arg
|
||||||
|
do
|
||||||
|
case $lt_wr_arg in
|
||||||
|
--lt-*) ;;
|
||||||
|
*) set x "$@" "$lt_wr_arg"; shift;;
|
||||||
|
esac
|
||||||
|
shift
|
||||||
|
done ;;
|
||||||
|
esac
|
||||||
|
func_exec_program_core ${1+"$@"}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Parse options
|
||||||
|
func_parse_lt_options "$0" ${1+"$@"}
|
||||||
|
|
||||||
|
# Find the directory that this script lives in.
|
||||||
|
thisdir=`$ECHO "$file" | /bin/sed 's%/[^/]*$%%'`
|
||||||
|
test "x$thisdir" = "x$file" && thisdir=.
|
||||||
|
|
||||||
|
# Follow symbolic links until we get to the real thisdir.
|
||||||
|
file=`ls -ld "$file" | /bin/sed -n 's/.*-> //p'`
|
||||||
|
while test -n "$file"; do
|
||||||
|
destdir=`$ECHO "$file" | /bin/sed 's%/[^/]*$%%'`
|
||||||
|
|
||||||
|
# If there was a directory component, then change thisdir.
|
||||||
|
if test "x$destdir" != "x$file"; then
|
||||||
|
case "$destdir" in
|
||||||
|
[\\/]* | [A-Za-z]:[\\/]*) thisdir="$destdir" ;;
|
||||||
|
*) thisdir="$thisdir/$destdir" ;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
|
||||||
|
file=`$ECHO "$file" | /bin/sed 's%^.*/%%'`
|
||||||
|
file=`ls -ld "$thisdir/$file" | /bin/sed -n 's/.*-> //p'`
|
||||||
|
done
|
||||||
|
|
||||||
|
# Usually 'no', except on cygwin/mingw when embedded into
|
||||||
|
# the cwrapper.
|
||||||
|
WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=no
|
||||||
|
if test "$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR" = "yes"; then
|
||||||
|
# special case for '.'
|
||||||
|
if test "$thisdir" = "."; then
|
||||||
|
thisdir=`pwd`
|
||||||
|
fi
|
||||||
|
# remove .libs from thisdir
|
||||||
|
case "$thisdir" in
|
||||||
|
*[\\/].libs ) thisdir=`$ECHO "$thisdir" | /bin/sed 's%[\\/][^\\/]*$%%'` ;;
|
||||||
|
.libs ) thisdir=. ;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Try to get the absolute directory name.
|
||||||
|
absdir=`cd "$thisdir" && pwd`
|
||||||
|
test -n "$absdir" && thisdir="$absdir"
|
||||||
|
|
||||||
|
program=lt-'lstmeval'
|
||||||
|
progdir="$thisdir/.libs"
|
||||||
|
|
||||||
|
if test ! -f "$progdir/$program" ||
|
||||||
|
{ file=`ls -1dt "$progdir/$program" "$progdir/../$program" 2>/dev/null | /bin/sed 1q`; \
|
||||||
|
test "X$file" != "X$progdir/$program"; }; then
|
||||||
|
|
||||||
|
file="$$-$program"
|
||||||
|
|
||||||
|
if test ! -d "$progdir"; then
|
||||||
|
mkdir "$progdir"
|
||||||
|
else
|
||||||
|
rm -f "$progdir/$file"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# relink executable if necessary
|
||||||
|
if test -n "$relink_command"; then
|
||||||
|
if relink_command_output=`eval $relink_command 2>&1`; then :
|
||||||
|
else
|
||||||
|
printf %s\n "$relink_command_output" >&2
|
||||||
|
rm -f "$progdir/$file"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
mv -f "$progdir/$file" "$progdir/$program" 2>/dev/null ||
|
||||||
|
{ rm -f "$progdir/$program";
|
||||||
|
mv -f "$progdir/$file" "$progdir/$program"; }
|
||||||
|
rm -f "$progdir/$file"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test -f "$progdir/$program"; then
|
||||||
|
if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then
|
||||||
|
# Run the actual program with our arguments.
|
||||||
|
func_exec_program ${1+"$@"}
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
# The program doesn't exist.
|
||||||
|
$ECHO "$0: error: \`$progdir/$program' does not exist" 1>&2
|
||||||
|
$ECHO "This script is just a wrapper for $program." 1>&2
|
||||||
|
$ECHO "See the libtool documentation for more information." 1>&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
228
training/lstmtraining
Executable file
228
training/lstmtraining
Executable file
@ -0,0 +1,228 @@
|
|||||||
|
#! /bin/sh
|
||||||
|
|
||||||
|
# lstmtraining - temporary wrapper script for .libs/lstmtraining
|
||||||
|
# Generated by libtool (GNU libtool) 2.4.2 Debian-2.4.2-1.7ubuntu1
|
||||||
|
#
|
||||||
|
# The lstmtraining program cannot be directly executed until all the libtool
|
||||||
|
# libraries that it depends on are installed.
|
||||||
|
#
|
||||||
|
# This wrapper script should never be moved out of the build directory.
|
||||||
|
# If it is, it will not operate correctly.
|
||||||
|
|
||||||
|
# Sed substitution that helps us do robust quoting. It backslashifies
|
||||||
|
# metacharacters that are still active within double-quoted strings.
|
||||||
|
sed_quote_subst='s/\([`"$\\]\)/\\\1/g'
|
||||||
|
|
||||||
|
# Be Bourne compatible
|
||||||
|
if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
|
||||||
|
emulate sh
|
||||||
|
NULLCMD=:
|
||||||
|
# Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
|
||||||
|
# is contrary to our usage. Disable this feature.
|
||||||
|
alias -g '${1+"$@"}'='"$@"'
|
||||||
|
setopt NO_GLOB_SUBST
|
||||||
|
else
|
||||||
|
case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
|
||||||
|
fi
|
||||||
|
BIN_SH=xpg4; export BIN_SH # for Tru64
|
||||||
|
DUALCASE=1; export DUALCASE # for MKS sh
|
||||||
|
|
||||||
|
# The HP-UX ksh and POSIX shell print the target directory to stdout
|
||||||
|
# if CDPATH is set.
|
||||||
|
(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
|
||||||
|
|
||||||
|
relink_command="(cd /usr/local/google/home/rays/opensrc/git/tesseract/training; { test -z \"\${LIBRARY_PATH+set}\" || unset LIBRARY_PATH || { LIBRARY_PATH=; export LIBRARY_PATH; }; }; { test -z \"\${COMPILER_PATH+set}\" || unset COMPILER_PATH || { COMPILER_PATH=; export COMPILER_PATH; }; }; { test -z \"\${GCC_EXEC_PREFIX+set}\" || unset GCC_EXEC_PREFIX || { GCC_EXEC_PREFIX=; export GCC_EXEC_PREFIX; }; }; { test -z \"\${LD_RUN_PATH+set}\" || unset LD_RUN_PATH || { LD_RUN_PATH=; export LD_RUN_PATH; }; }; { test -z \"\${LD_LIBRARY_PATH+set}\" || unset LD_LIBRARY_PATH || { LD_LIBRARY_PATH=; export LD_LIBRARY_PATH; }; }; PATH=/usr/local/google/home/rays/bin:/usr/lib/google-golang/bin:/usr/local/buildtools/java/jdk/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/google/home/rays/bin; export PATH; g++ -g -O2 -std=c++11 -o \$progdir/\$file lstmtraining.o ./.libs/libtesseract_training.a ./.libs/libtesseract_tessopt.a -licui18n -licuuc -licudata ../api/.libs/libtesseract.so -L/usr/local/lib /usr/local/lib/liblept.so -lpthread -fopenmp -Wl,-rpath -Wl,/usr/local/google/home/rays/opensrc/git/tesseract/api/.libs)"
|
||||||
|
|
||||||
|
# This environment variable determines our operation mode.
|
||||||
|
if test "$libtool_install_magic" = "%%%MAGIC variable%%%"; then
|
||||||
|
# install mode needs the following variables:
|
||||||
|
generated_by_libtool_version='2.4.2'
|
||||||
|
notinst_deplibs=' ../api/libtesseract.la'
|
||||||
|
else
|
||||||
|
# When we are sourced in execute mode, $file and $ECHO are already set.
|
||||||
|
if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then
|
||||||
|
file="$0"
|
||||||
|
|
||||||
|
# A function that is used when there is no print builtin or printf.
|
||||||
|
func_fallback_echo ()
|
||||||
|
{
|
||||||
|
eval 'cat <<_LTECHO_EOF
|
||||||
|
$1
|
||||||
|
_LTECHO_EOF'
|
||||||
|
}
|
||||||
|
ECHO="printf %s\\n"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Very basic option parsing. These options are (a) specific to
|
||||||
|
# the libtool wrapper, (b) are identical between the wrapper
|
||||||
|
# /script/ and the wrapper /executable/ which is used only on
|
||||||
|
# windows platforms, and (c) all begin with the string --lt-
|
||||||
|
# (application programs are unlikely to have options which match
|
||||||
|
# this pattern).
|
||||||
|
#
|
||||||
|
# There are only two supported options: --lt-debug and
|
||||||
|
# --lt-dump-script. There is, deliberately, no --lt-help.
|
||||||
|
#
|
||||||
|
# The first argument to this parsing function should be the
|
||||||
|
# script's ../libtool value, followed by no.
|
||||||
|
lt_option_debug=
|
||||||
|
func_parse_lt_options ()
|
||||||
|
{
|
||||||
|
lt_script_arg0=$0
|
||||||
|
shift
|
||||||
|
for lt_opt
|
||||||
|
do
|
||||||
|
case "$lt_opt" in
|
||||||
|
--lt-debug) lt_option_debug=1 ;;
|
||||||
|
--lt-dump-script)
|
||||||
|
lt_dump_D=`$ECHO "X$lt_script_arg0" | /bin/sed -e 's/^X//' -e 's%/[^/]*$%%'`
|
||||||
|
test "X$lt_dump_D" = "X$lt_script_arg0" && lt_dump_D=.
|
||||||
|
lt_dump_F=`$ECHO "X$lt_script_arg0" | /bin/sed -e 's/^X//' -e 's%^.*/%%'`
|
||||||
|
cat "$lt_dump_D/$lt_dump_F"
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
--lt-*)
|
||||||
|
$ECHO "Unrecognized --lt- option: '$lt_opt'" 1>&2
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# Print the debug banner immediately:
|
||||||
|
if test -n "$lt_option_debug"; then
|
||||||
|
echo "lstmtraining:lstmtraining:${LINENO}: libtool wrapper (GNU libtool) 2.4.2 Debian-2.4.2-1.7ubuntu1" 1>&2
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Used when --lt-debug. Prints its arguments to stdout
|
||||||
|
# (redirection is the responsibility of the caller)
|
||||||
|
func_lt_dump_args ()
|
||||||
|
{
|
||||||
|
lt_dump_args_N=1;
|
||||||
|
for lt_arg
|
||||||
|
do
|
||||||
|
$ECHO "lstmtraining:lstmtraining:${LINENO}: newargv[$lt_dump_args_N]: $lt_arg"
|
||||||
|
lt_dump_args_N=`expr $lt_dump_args_N + 1`
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
# Core function for launching the target application
|
||||||
|
func_exec_program_core ()
|
||||||
|
{
|
||||||
|
|
||||||
|
if test -n "$lt_option_debug"; then
|
||||||
|
$ECHO "lstmtraining:lstmtraining:${LINENO}: newargv[0]: $progdir/$program" 1>&2
|
||||||
|
func_lt_dump_args ${1+"$@"} 1>&2
|
||||||
|
fi
|
||||||
|
exec "$progdir/$program" ${1+"$@"}
|
||||||
|
|
||||||
|
$ECHO "$0: cannot exec $program $*" 1>&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# A function to encapsulate launching the target application
|
||||||
|
# Strips options in the --lt-* namespace from $@ and
|
||||||
|
# launches target application with the remaining arguments.
|
||||||
|
func_exec_program ()
|
||||||
|
{
|
||||||
|
case " $* " in
|
||||||
|
*\ --lt-*)
|
||||||
|
for lt_wr_arg
|
||||||
|
do
|
||||||
|
case $lt_wr_arg in
|
||||||
|
--lt-*) ;;
|
||||||
|
*) set x "$@" "$lt_wr_arg"; shift;;
|
||||||
|
esac
|
||||||
|
shift
|
||||||
|
done ;;
|
||||||
|
esac
|
||||||
|
func_exec_program_core ${1+"$@"}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Parse options
|
||||||
|
func_parse_lt_options "$0" ${1+"$@"}
|
||||||
|
|
||||||
|
# Find the directory that this script lives in.
|
||||||
|
thisdir=`$ECHO "$file" | /bin/sed 's%/[^/]*$%%'`
|
||||||
|
test "x$thisdir" = "x$file" && thisdir=.
|
||||||
|
|
||||||
|
# Follow symbolic links until we get to the real thisdir.
|
||||||
|
file=`ls -ld "$file" | /bin/sed -n 's/.*-> //p'`
|
||||||
|
while test -n "$file"; do
|
||||||
|
destdir=`$ECHO "$file" | /bin/sed 's%/[^/]*$%%'`
|
||||||
|
|
||||||
|
# If there was a directory component, then change thisdir.
|
||||||
|
if test "x$destdir" != "x$file"; then
|
||||||
|
case "$destdir" in
|
||||||
|
[\\/]* | [A-Za-z]:[\\/]*) thisdir="$destdir" ;;
|
||||||
|
*) thisdir="$thisdir/$destdir" ;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
|
||||||
|
file=`$ECHO "$file" | /bin/sed 's%^.*/%%'`
|
||||||
|
file=`ls -ld "$thisdir/$file" | /bin/sed -n 's/.*-> //p'`
|
||||||
|
done
|
||||||
|
|
||||||
|
# Usually 'no', except on cygwin/mingw when embedded into
|
||||||
|
# the cwrapper.
|
||||||
|
WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=no
|
||||||
|
if test "$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR" = "yes"; then
|
||||||
|
# special case for '.'
|
||||||
|
if test "$thisdir" = "."; then
|
||||||
|
thisdir=`pwd`
|
||||||
|
fi
|
||||||
|
# remove .libs from thisdir
|
||||||
|
case "$thisdir" in
|
||||||
|
*[\\/].libs ) thisdir=`$ECHO "$thisdir" | /bin/sed 's%[\\/][^\\/]*$%%'` ;;
|
||||||
|
.libs ) thisdir=. ;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Try to get the absolute directory name.
|
||||||
|
absdir=`cd "$thisdir" && pwd`
|
||||||
|
test -n "$absdir" && thisdir="$absdir"
|
||||||
|
|
||||||
|
program=lt-'lstmtraining'
|
||||||
|
progdir="$thisdir/.libs"
|
||||||
|
|
||||||
|
if test ! -f "$progdir/$program" ||
|
||||||
|
{ file=`ls -1dt "$progdir/$program" "$progdir/../$program" 2>/dev/null | /bin/sed 1q`; \
|
||||||
|
test "X$file" != "X$progdir/$program"; }; then
|
||||||
|
|
||||||
|
file="$$-$program"
|
||||||
|
|
||||||
|
if test ! -d "$progdir"; then
|
||||||
|
mkdir "$progdir"
|
||||||
|
else
|
||||||
|
rm -f "$progdir/$file"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# relink executable if necessary
|
||||||
|
if test -n "$relink_command"; then
|
||||||
|
if relink_command_output=`eval $relink_command 2>&1`; then :
|
||||||
|
else
|
||||||
|
printf %s\n "$relink_command_output" >&2
|
||||||
|
rm -f "$progdir/$file"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
mv -f "$progdir/$file" "$progdir/$program" 2>/dev/null ||
|
||||||
|
{ rm -f "$progdir/$program";
|
||||||
|
mv -f "$progdir/$file" "$progdir/$program"; }
|
||||||
|
rm -f "$progdir/$file"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test -f "$progdir/$program"; then
|
||||||
|
if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then
|
||||||
|
# Run the actual program with our arguments.
|
||||||
|
func_exec_program ${1+"$@"}
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
# The program doesn't exist.
|
||||||
|
$ECHO "$0: error: \`$progdir/$program' does not exist" 1>&2
|
||||||
|
$ECHO "This script is just a wrapper for $program." 1>&2
|
||||||
|
$ECHO "See the libtool documentation for more information." 1>&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
@ -28,22 +28,25 @@ AM_CPPFLAGS += -isystem $(top_srcdir)/googletest/googletest/include
|
|||||||
|
|
||||||
check_PROGRAMS = \
|
check_PROGRAMS = \
|
||||||
apiexample_test \
|
apiexample_test \
|
||||||
|
intsimdmatrix_test \
|
||||||
tesseracttests \
|
tesseracttests \
|
||||||
matrix_test
|
matrix_test
|
||||||
|
|
||||||
TESTS = $(check_PROGRAMS)
|
TESTS = $(check_PROGRAMS)
|
||||||
|
|
||||||
#List of source files needed to build the executable:
|
#List of source files needed to build the executable:
|
||||||
|
|
||||||
|
apiexample_test_SOURCES = apiexample_test.cc
|
||||||
|
apiexample_test_LDFLAGS = $(OPENCL_LDFLAGS)
|
||||||
|
|
||||||
tesseracttests_SOURCES = ../tests/tesseracttests.cpp
|
intsimdmatrix_test_SOURCES = intsimdmatrix_test.cc
|
||||||
tesseracttests_LDADD = $(GTEST_LIBS)
|
intsimdmatrix_test_LDADD = $(GTEST_LIBS)
|
||||||
|
|
||||||
matrix_test_SOURCES = matrix_test.cc
|
matrix_test_SOURCES = matrix_test.cc
|
||||||
matrix_test_LDADD = $(GTEST_LIBS)
|
matrix_test_LDADD = $(GTEST_LIBS)
|
||||||
|
|
||||||
apiexample_test_SOURCES = apiexample_test.cc
|
tesseracttests_SOURCES = ../tests/tesseracttests.cpp
|
||||||
#apiexample_test_LDFLAGS = -static
|
tesseracttests_LDADD = $(GTEST_LIBS)
|
||||||
apiexample_test_LDFLAGS = $(OPENCL_LDFLAGS)
|
|
||||||
|
|
||||||
if USING_MULTIPLELIBS
|
if USING_MULTIPLELIBS
|
||||||
apiexample_test_LDADD = \
|
apiexample_test_LDADD = \
|
||||||
@ -60,6 +63,7 @@ apiexample_test_LDADD += $(GTEST_LIBS)
|
|||||||
# for windows
|
# for windows
|
||||||
if T_WIN
|
if T_WIN
|
||||||
apiexample_test_LDADD += -lws2_32
|
apiexample_test_LDADD += -lws2_32
|
||||||
|
intsimdmatrix_test_LDADD += -lws2_32
|
||||||
matrix_test_LDADD += -lws2_32
|
matrix_test_LDADD += -lws2_32
|
||||||
tesseracttests_LDADD += -lws2_32
|
tesseracttests_LDADD += -lws2_32
|
||||||
|
|
||||||
|
17
unittest/include_gunit.h
Normal file
17
unittest/include_gunit.h
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
// (C) Copyright 2017, Google Inc.
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
// Portability include to match the Google test environment.
|
||||||
|
#ifndef TESSERACT_UNITTEST_INCLUDE_GUNIT_H_
|
||||||
|
#define TESSERACT_UNITTEST_INCLUDE_GUNIT_H_
|
||||||
|
|
||||||
|
#include "gtest/gtest.h"
|
||||||
|
|
||||||
|
#endif // TESSERACT_UNITTEST_INCLUDE_GUNIT_H_
|
105
unittest/intsimdmatrix_test.cc
Normal file
105
unittest/intsimdmatrix_test.cc
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
// File: intsimdmatrix_test.cc
|
||||||
|
// Author: rays@google.com (Ray Smith)
|
||||||
|
//
|
||||||
|
// Copyright 2017 Google Inc. All Rights Reserved.
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#include "intsimdmatrix.h"
|
||||||
|
#include <memory>
|
||||||
|
#include "genericvector.h"
|
||||||
|
#include "include_gunit.h"
|
||||||
|
#include "intsimdmatrixavx2.h"
|
||||||
|
#include "intsimdmatrixsse.h"
|
||||||
|
#include "simddetect.h"
|
||||||
|
#include "tprintf.h"
|
||||||
|
|
||||||
|
namespace tesseract {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
class IntSimdMatrixTest : public ::testing::Test {
|
||||||
|
protected:
|
||||||
|
// Makes a random weights matrix of the given size.
|
||||||
|
GENERIC_2D_ARRAY<int8_t> InitRandom(int no, int ni) {
|
||||||
|
GENERIC_2D_ARRAY<int8_t> a(no, ni, 0);
|
||||||
|
for (int i = 0; i < no; ++i) {
|
||||||
|
for (int j = 0; j < ni; ++j) {
|
||||||
|
a(i, j) = static_cast<int8_t>(random_.SignedRand(MAX_INT8));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
// Makes a random input vector of the given size, with rounding up.
|
||||||
|
std::vector<int8_t> RandomVector(int size, const IntSimdMatrix& matrix) {
|
||||||
|
int rounded_size = matrix.RoundInputs(size);
|
||||||
|
std::vector<int8_t> v(rounded_size, 0);
|
||||||
|
for (int i = 0; i < size; ++i) {
|
||||||
|
v[i] = static_cast<int8_t>(random_.SignedRand(MAX_INT8));
|
||||||
|
}
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
// Makes a random scales vector of the given size.
|
||||||
|
GenericVector<double> RandomScales(int size) {
|
||||||
|
GenericVector<double> v(size, 0.0);
|
||||||
|
for (int i = 0; i < size; ++i) {
|
||||||
|
v[i] = 1.0 + random_.SignedRand(1.0);
|
||||||
|
}
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
// Tests a range of sizes and compares the results against the base_ version.
|
||||||
|
void ExpectEqualResults(IntSimdMatrix* matrix) {
|
||||||
|
for (int num_out = 1; num_out < 130; ++num_out) {
|
||||||
|
for (int num_in = 1; num_in < 130; ++num_in) {
|
||||||
|
GENERIC_2D_ARRAY<int8_t> w = InitRandom(num_out, num_in + 1);
|
||||||
|
matrix->Init(w);
|
||||||
|
std::vector<int8_t> u = RandomVector(num_in, *matrix);
|
||||||
|
GenericVector<double> scales = RandomScales(num_out);
|
||||||
|
std::vector<double> base_result(num_out);
|
||||||
|
base_.MatrixDotVector(w, scales, u.data(), base_result.data());
|
||||||
|
std::vector<double> test_result(num_out);
|
||||||
|
matrix->MatrixDotVector(w, scales, u.data(), test_result.data());
|
||||||
|
for (int i = 0; i < num_out; ++i)
|
||||||
|
EXPECT_FLOAT_EQ(base_result[i], test_result[i]) << "i=" << i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TRand random_;
|
||||||
|
IntSimdMatrix base_;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Tests that the SSE implementation gets the same result as the vanilla.
|
||||||
|
TEST_F(IntSimdMatrixTest, SSE) {
|
||||||
|
if (SIMDDetect::IsSSEAvailable()) {
|
||||||
|
LOG(INFO) << "SSE found! Continuing...";
|
||||||
|
} else {
|
||||||
|
LOG(INFO) << "No SSE found! Not Tested!";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
std::unique_ptr<IntSimdMatrix> matrix(new IntSimdMatrixSSE());
|
||||||
|
ExpectEqualResults(matrix.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tests that the AVX2 implementation gets the same result as the vanilla.
|
||||||
|
TEST_F(IntSimdMatrixTest, AVX2) {
|
||||||
|
if (SIMDDetect::IsAVX2Available()) {
|
||||||
|
LOG(INFO) << "AVX2 found! Continuing...";
|
||||||
|
} else {
|
||||||
|
LOG(INFO) << "No AVX2 found! Not Tested!";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
std::unique_ptr<IntSimdMatrix> matrix(new IntSimdMatrixAVX2());
|
||||||
|
ExpectEqualResults(matrix.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
} // namespace tesseract
|
Loading…
Reference in New Issue
Block a user