mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-22 01:30:49 +08:00
Round output buffers for intSimdMatrix.
In order to allow intSimdMatrix implementations to 'overwrite' their outputs, ensure that the output buffers are always padded to the next block size. This doesn't make any difference yet, but it enables optimisations further down the line, especially when the biasing is pulled into the SIMD.
This commit is contained in:
parent
9dfdac51c6
commit
aba1800f69
@ -132,8 +132,11 @@ void FullyConnected::Forward(bool debug, const NetworkIO& input,
|
||||
temp_lines.init_to_size(kNumThreads, NetworkScratch::FloatVec());
|
||||
GenericVector<NetworkScratch::FloatVec> curr_input;
|
||||
curr_input.init_to_size(kNumThreads, NetworkScratch::FloatVec());
|
||||
int ro = no_;
|
||||
if (IntSimdMatrix::intSimdMatrix)
|
||||
ro = IntSimdMatrix::intSimdMatrix->RoundOutputs(ro);
|
||||
for (int i = 0; i < kNumThreads; ++i) {
|
||||
temp_lines[i].Init(no_, scratch);
|
||||
temp_lines[i].Init(no_, ro, scratch);
|
||||
curr_input[i].Init(ni_, scratch);
|
||||
}
|
||||
#ifdef _OPENMP
|
||||
|
@ -264,7 +264,10 @@ void LSTM::Forward(bool debug, const NetworkIO& input,
|
||||
ResizeForward(input);
|
||||
// Temporary storage of forward computation for each gate.
|
||||
NetworkScratch::FloatVec temp_lines[WT_COUNT];
|
||||
for (auto & temp_line : temp_lines) temp_line.Init(ns_, scratch);
|
||||
int ro = ns_;
|
||||
if (source_.int_mode() && IntSimdMatrix::intSimdMatrix)
|
||||
ro = IntSimdMatrix::intSimdMatrix->RoundOutputs(ro);
|
||||
for (auto & temp_line : temp_lines) temp_line.Init(ns_, ro, scratch);
|
||||
// Single timestep buffers for the current/recurrent output and state.
|
||||
NetworkScratch::FloatVec curr_state, curr_output;
|
||||
curr_state.Init(ns_, scratch);
|
||||
|
@ -144,15 +144,24 @@ class NetworkScratch {
|
||||
if (scratch_space_ != nullptr) scratch_space_->vec_stack_.Return(vec_);
|
||||
}
|
||||
|
||||
void Init(int size, NetworkScratch* scratch) {
|
||||
void Init(int size, int reserve, NetworkScratch* scratch) {
|
||||
if (scratch_space_ != nullptr && vec_ != nullptr)
|
||||
scratch_space_->vec_stack_.Return(vec_);
|
||||
scratch_space_ = scratch;
|
||||
vec_ = scratch_space_->vec_stack_.Borrow();
|
||||
// Abuse vec_ here; first resize to 'reserve', which is larger
|
||||
// than 'size' (i.e. it's size rounded up) then resize down again
|
||||
// to the desired size. This assumes that the implementation does
|
||||
// not shrink the storage on a resize.
|
||||
vec_->resize_no_init(reserve);
|
||||
vec_->resize_no_init(size);
|
||||
data_ = &(*vec_)[0];
|
||||
}
|
||||
|
||||
void Init(int size, NetworkScratch *scratch) {
|
||||
Init(size, size, scratch);
|
||||
}
|
||||
|
||||
// Use the cast operator instead of operator[] so the FloatVec can be used
|
||||
// as a double* argument to a function call.
|
||||
operator double*() const { return data_; }
|
||||
|
Loading…
Reference in New Issue
Block a user