diff options
Diffstat (limited to 'tesseract/src/lstm/tfnetwork.proto')
-rw-r--r-- | tesseract/src/lstm/tfnetwork.proto | 74 |
1 files changed, 74 insertions, 0 deletions
diff --git a/tesseract/src/lstm/tfnetwork.proto b/tesseract/src/lstm/tfnetwork.proto new file mode 100644 index 00000000..d9cf3390 --- /dev/null +++ b/tesseract/src/lstm/tfnetwork.proto @@ -0,0 +1,74 @@ +// Protocol description for Tesseract + +// Compile this file with the Protocol Compiler protoc to generate +// the files tfnetwork.pb.cc and tfnetwork.pb.h. + +// This requires the protocol descriptions for TensorFlow +// (included in the TensorFlow sources). + +// With TensorFlow sources installed in /usr/src/tensorflow/tensorflow-1.10.1, +// this command was used on Debian to generate the files: + +// protoc --cpp_out=$PWD --proto_path=/usr/src/tensorflow/tensorflow-1.10.1 \ +// --proto_path=$PWD src/lstm/tfnetwork.proto + +syntax = "proto3"; + +package tesseract; + +import "tensorflow/core/framework/graph.proto"; + +// This proto is the interface between a python TF graph builder/trainer and +// the C++ world. The writer of this proto must provide fields as documented +// by the comments below. +// The graph must have a placeholder for NetworkIO, Widths and Heights. The +// following python code creates the appropriate placeholders: +// +// input_layer = tf.placeholder(tf.float32, +// shape=[batch_size, xsize, ysize, depth_dim], +// name='NetworkIO') +// widths = tf.placeholder(tf.int32, shape=[batch_size], name='Widths') +// heights = tf.placeholder(tf.int32, shape=[batch_size], name='Heights') +// # Flip x and y to the TF convention. +// input_layer = tf.transpose(input_layer, [0, 2, 1, 3]) +// +// The widths and heights will be set to indicate the post-scaling size of the +// input image(s). +// For now batch_size is ignored and set to 1. +// The graph should return a 2-dimensional float32 tensor called 'softmax' of +// shape [sequence_length, num_classes], where sequence_length is allowed to +// be variable, given by the tensor itself. +// TODO(rays) determine whether it is worth providing for batch_size >1 and if +// so, how. +message TFNetworkModel { + // The TF graph definition. Required. + tensorflow.GraphDef graph = 1; + // The training index. Required to be > 0. + int64 global_step = 2; + // The original network definition for reference. Optional + string spec = 3; + // Input tensor parameters. + // Values per pixel. Required to be 1 or 3. Inputs assumed to be float32. + int32 depth = 4; + // Image size. Required. Zero implies flexible sizes, fixed if non-zero. + // If x_size > 0, images will be cropped/padded to the given size, after + // any scaling required by the y_size. + // If y_size > 0, images will be scaled isotropically to the given height. + int32 x_size = 5; + int32 y_size = 6; + // Number of images in a batch. Optional. + int32 batch_size = 8; + // Output tensor parameters. + // Number of output classes. Required to match the depth of the softmax. + int32 num_classes = 9; + // True if this network needs CTC-like decoding, dropping duplicated labels. + // The decoder always drops the null character. + bool using_ctc = 10; + // Name of input image tensor. + string image_input = 11; + // Name of image height and width tensors. + string image_widths = 12; + string image_heights = 13; + // Name of output (softmax) tensor. + string output_layer = 14; +} |