tesseract/lstm/tfnetwork.proto

syntax = "proto3";

package tesseract;

// TODO(rays) How to make this usable both in Google and open source?
import "third_party/tensorflow/core/framework/graph.proto";

// This proto is the interface between a python TF graph builder/trainer and
// the C++ world. The writer of this proto must provide fields as documented
// by the comments below.
// The graph must have a placeholder for NetworkIO, Widths and Heights. The
// following python code creates the appropriate placeholders:
//
//   input_layer = tf.placeholder(tf.float32,
//                                shape=[batch_size, xsize, ysize, depth_dim],
//                                name='NetworkIO')
//   widths = tf.placeholder(tf.int32, shape=[batch_size], name='Widths')
//   heights = tf.placeholder(tf.int32, shape=[batch_size], name='Heights')
//   # Flip x and y to the TF convention.
//   input_layer = tf.transpose(input_layer, [0, 2, 1, 3])
//
// The widths and heights will be set to indicate the post-scaling size of the
// input image(s).
// For now batch_size is ignored and set to 1.
// The graph should return a 2-dimensional float32 tensor called 'softmax' of
// shape [sequence_length, num_classes], where sequence_length is allowed to
// be variable, given by the tensor itself.
// TODO(rays) determine whether it is worth providing for batch_size >1 and if
// so, how.
message TFNetworkModel {
  // The TF graph definition. Required.
  tensorflow.GraphDef graph = 1;
  // The training index. Required to be > 0.
  int64 global_step = 2;
  // The original network definition for reference. Optional
  string spec = 3;
  // Input tensor parameters.
  // Values per pixel. Required to be 1 or 3. Inputs assumed to be float32.
  int32 depth = 4;
  // Image size. Required. Zero implies flexible sizes, fixed if non-zero.
  // If x_size > 0, images will be cropped/padded to the given size, after
  //                any scaling required by the y_size.
  // If y_size > 0, images will be scaled isotropically to the given height.
  int32 x_size = 5;
  int32 y_size = 6;
  // Number of images in a batch. Optional.
  int32 batch_size = 8;
  // Output tensor parameters.
  // Number of output classes. Required to match the depth of the softmax.
  int32 num_classes = 9;
  // True if this network needs CTC-like decoding, dropping duplicated labels.
  // The decoder always drops the null character.
  bool using_ctc = 10;
  // Name of input image tensor.
  string image_input = 11;
  // Name of image height and width tensors.
  string image_widths = 12;
  string image_heights = 13;
  // Name of output (softmax) tensor.
  string output_layer = 14;
}
Added new LSTM-based neural network line recognizer 2016-11-08 07:38:07 +08:00			`syntax = "proto3";`

			`package tesseract;`

			`// TODO(rays) How to make this usable both in Google and open source?`
			`import "third_party/tensorflow/core/framework/graph.proto";`

			`// This proto is the interface between a python TF graph builder/trainer and`
			`// the C++ world. The writer of this proto must provide fields as documented`
			`// by the comments below.`
			`// The graph must have a placeholder for NetworkIO, Widths and Heights. The`
			`// following python code creates the appropriate placeholders:`
			`//`
			`// input_layer = tf.placeholder(tf.float32,`
			`// shape=[batch_size, xsize, ysize, depth_dim],`
			`// name='NetworkIO')`
			`// widths = tf.placeholder(tf.int32, shape=[batch_size], name='Widths')`
			`// heights = tf.placeholder(tf.int32, shape=[batch_size], name='Heights')`
			`// # Flip x and y to the TF convention.`
			`// input_layer = tf.transpose(input_layer, [0, 2, 1, 3])`
			`//`
			`// The widths and heights will be set to indicate the post-scaling size of the`
			`// input image(s).`
			`// For now batch_size is ignored and set to 1.`
			`// The graph should return a 2-dimensional float32 tensor called 'softmax' of`
			`// shape [sequence_length, num_classes], where sequence_length is allowed to`
			`// be variable, given by the tensor itself.`
			`// TODO(rays) determine whether it is worth providing for batch_size >1 and if`
			`// so, how.`
			`message TFNetworkModel {`
			`// The TF graph definition. Required.`
			`tensorflow.GraphDef graph = 1;`
			`// The training index. Required to be > 0.`
			`int64 global_step = 2;`
			`// The original network definition for reference. Optional`
			`string spec = 3;`
			`// Input tensor parameters.`
			`// Values per pixel. Required to be 1 or 3. Inputs assumed to be float32.`
			`int32 depth = 4;`
			`// Image size. Required. Zero implies flexible sizes, fixed if non-zero.`
			`// If x_size > 0, images will be cropped/padded to the given size, after`
			`// any scaling required by the y_size.`
			`// If y_size > 0, images will be scaled isotropically to the given height.`
			`int32 x_size = 5;`
			`int32 y_size = 6;`
			`// Number of images in a batch. Optional.`
			`int32 batch_size = 8;`
			`// Output tensor parameters.`
			`// Number of output classes. Required to match the depth of the softmax.`
			`int32 num_classes = 9;`
			`// True if this network needs CTC-like decoding, dropping duplicated labels.`
			`// The decoder always drops the null character.`
			`bool using_ctc = 10;`
			`// Name of input image tensor.`
			`string image_input = 11;`
			`// Name of image height and width tensors.`
			`string image_widths = 12;`
			`string image_heights = 13;`
			`// Name of output (softmax) tensor.`
			`string output_layer = 14;`
			`}`