mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-06 09:17:49 +08:00
62 lines
2.6 KiB
Protocol Buffer
62 lines
2.6 KiB
Protocol Buffer
|
syntax = "proto3";
|
||
|
|
||
|
package tesseract;
|
||
|
|
||
|
// TODO(rays) How to make this usable both in Google and open source?
|
||
|
import "third_party/tensorflow/core/framework/graph.proto";
|
||
|
|
||
|
// This proto is the interface between a python TF graph builder/trainer and
|
||
|
// the C++ world. The writer of this proto must provide fields as documented
|
||
|
// by the comments below.
|
||
|
// The graph must have a placeholder for NetworkIO, Widths and Heights. The
|
||
|
// following python code creates the appropriate placeholders:
|
||
|
//
|
||
|
// input_layer = tf.placeholder(tf.float32,
|
||
|
// shape=[batch_size, xsize, ysize, depth_dim],
|
||
|
// name='NetworkIO')
|
||
|
// widths = tf.placeholder(tf.int32, shape=[batch_size], name='Widths')
|
||
|
// heights = tf.placeholder(tf.int32, shape=[batch_size], name='Heights')
|
||
|
// # Flip x and y to the TF convention.
|
||
|
// input_layer = tf.transpose(input_layer, [0, 2, 1, 3])
|
||
|
//
|
||
|
// The widths and heights will be set to indicate the post-scaling size of the
|
||
|
// input image(s).
|
||
|
// For now batch_size is ignored and set to 1.
|
||
|
// The graph should return a 2-dimensional float32 tensor called 'softmax' of
|
||
|
// shape [sequence_length, num_classes], where sequence_length is allowed to
|
||
|
// be variable, given by the tensor itself.
|
||
|
// TODO(rays) determine whether it is worth providing for batch_size >1 and if
|
||
|
// so, how.
|
||
|
message TFNetworkModel {
|
||
|
// The TF graph definition. Required.
|
||
|
tensorflow.GraphDef graph = 1;
|
||
|
// The training index. Required to be > 0.
|
||
|
int64 global_step = 2;
|
||
|
// The original network definition for reference. Optional
|
||
|
string spec = 3;
|
||
|
// Input tensor parameters.
|
||
|
// Values per pixel. Required to be 1 or 3. Inputs assumed to be float32.
|
||
|
int32 depth = 4;
|
||
|
// Image size. Required. Zero implies flexible sizes, fixed if non-zero.
|
||
|
// If x_size > 0, images will be cropped/padded to the given size, after
|
||
|
// any scaling required by the y_size.
|
||
|
// If y_size > 0, images will be scaled isotropically to the given height.
|
||
|
int32 x_size = 5;
|
||
|
int32 y_size = 6;
|
||
|
// Number of images in a batch. Optional.
|
||
|
int32 batch_size = 8;
|
||
|
// Output tensor parameters.
|
||
|
// Number of output classes. Required to match the depth of the softmax.
|
||
|
int32 num_classes = 9;
|
||
|
// True if this network needs CTC-like decoding, dropping duplicated labels.
|
||
|
// The decoder always drops the null character.
|
||
|
bool using_ctc = 10;
|
||
|
// Name of input image tensor.
|
||
|
string image_input = 11;
|
||
|
// Name of image height and width tensors.
|
||
|
string image_widths = 12;
|
||
|
string image_heights = 13;
|
||
|
// Name of output (softmax) tensor.
|
||
|
string output_layer = 14;
|
||
|
}
|