Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
devanagari_processing.h
Go to the documentation of this file.
1 // Copyright 2008 Google Inc. All Rights Reserved.
2 // Author: shobhitsaxena@google.com (Shobhit Saxena)
3 
4 #ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
5 #define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
6 
7 #include "ocrblock.h"
8 #include "params.h"
9 
10 struct Pix;
11 struct Box;
12 struct Boxa;
13 
14 extern
16  "Debug level for split shiro-rekha process.");
17 
18 extern
20  "Whether to create a debug image for split shiro-rekha process.");
21 
22 class TBOX;
23 class IMAGE;
24 
25 namespace tesseract {
26 
28  public:
30  hist_ = NULL;
31  length_ = 0;
32  }
33 
35  Clear();
36  }
37 
38  void Clear() {
39  if (hist_) {
40  delete[] hist_;
41  }
42  length_ = 0;
43  }
44 
45  int* const hist() const {
46  return hist_;
47  }
48 
49  int length() const {
50  return length_;
51  }
52 
53  // Methods to construct histograms from images. These clear any existing data.
54  void ConstructVerticalCountHist(Pix* pix);
55  void ConstructHorizontalCountHist(Pix* pix);
56 
57  // This method returns the global-maxima for the histogram. The frequency of
58  // the global maxima is returned in count, if specified.
59  int GetHistogramMaximum(int* count) const;
60 
61  private:
62  int* hist_;
63  int length_;
64 };
65 
67  public:
69  NO_SPLIT = 0, // No splitting is performed for the phase.
70  MINIMAL_SPLIT, // Blobs are split minimally.
71  MAXIMAL_SPLIT // Blobs are split maximally.
72  };
73 
75  virtual ~ShiroRekhaSplitter();
76 
77  // Top-level method to perform splitting based on current settings.
78  // Returns true if a split was actually performed.
79  // If split_for_pageseg is true, the pageseg_split_strategy_ is used for
80  // splitting. If false, the ocr_split_strategy_ is used.
81  bool Split(bool split_for_pageseg);
82 
83  // Clears the memory held by this object.
84  void Clear();
85 
86  // Refreshes the words in the segmentation block list by using blobs in the
87  // input blob list.
88  // The segmentation block list must be set.
89  void RefreshSegmentationWithNewBlobs(C_BLOB_LIST* new_blobs);
90 
91  // Returns true if the split strategies for pageseg and ocr are different.
93  return pageseg_split_strategy_ != ocr_split_strategy_;
94  }
95 
96  // This only keeps a copy of the block list pointer. At split call, the list
97  // object should still be alive. This block list is used as a golden
98  // segmentation when performing splitting.
99  void set_segmentation_block_list(BLOCK_LIST* block_list) {
100  segmentation_block_list_ = block_list;
101  }
102 
103  static const int kUnspecifiedXheight = -1;
104 
105  void set_global_xheight(int xheight) {
106  global_xheight_ = xheight;
107  }
108 
109  void set_perform_close(bool perform) {
110  perform_close_ = perform;
111  }
112 
113  // Returns the image obtained from shiro-rekha splitting. The returned object
114  // is owned by this class. Callers may want to clone the returned pix to keep
115  // it alive beyond the life of ShiroRekhaSplitter object.
116  Pix* splitted_image() {
117  return splitted_image_;
118  }
119 
120  // On setting the input image, a clone of it is owned by this class.
121  void set_orig_pix(Pix* pix);
122 
123  // Returns the input image provided to the object. This object is owned by
124  // this class. Callers may want to clone the returned pix to work with it.
125  Pix* orig_pix() {
126  return orig_pix_;
127  }
128 
130  return ocr_split_strategy_;
131  }
132 
134  ocr_split_strategy_ = strategy;
135  }
136 
138  return pageseg_split_strategy_;
139  }
140 
142  pageseg_split_strategy_ = strategy;
143  }
144 
145  BLOCK_LIST* segmentation_block_list() {
146  return segmentation_block_list_;
147  }
148 
149  // This method dumps a debug image to the specified location.
150  void DumpDebugImage(const char* filename) const;
151 
152  // This method returns the computed mode-height of blobs in the pix.
153  // It also prunes very small blobs from calculation. Could be used to provide
154  // a global xheight estimate for images which have the same point-size text.
155  static int GetModeHeight(Pix* pix);
156 
157  private:
158  // Method to perform a close operation on the input image. The xheight
159  // estimate decides the size of sel used.
160  static void PerformClose(Pix* pix, int xheight_estimate);
161 
162  // This method resolves the cc bbox to a particular row and returns the row's
163  // xheight. This uses block_list_ if available, else just returns the
164  // global_xheight_ estimate currently set in the object.
165  int GetXheightForCC(Box* cc_bbox);
166 
167  // Returns a list of regions (boxes) which should be cleared in the original
168  // image so as to perform shiro-rekha splitting. Pix is assumed to carry one
169  // (or less) word only. Xheight measure could be the global estimate, the row
170  // estimate, or unspecified. If unspecified, over splitting may occur, since a
171  // conservative estimate of stroke width along with an associated multiplier
172  // is used in its place. It is advisable to have a specified xheight when
173  // splitting for classification/training.
174  void SplitWordShiroRekha(SplitStrategy split_strategy,
175  Pix* pix,
176  int xheight,
177  int word_left,
178  int word_top,
179  Boxa* regions_to_clear);
180 
181  // Returns a new box object for the corresponding TBOX, based on the original
182  // image's coordinate system.
183  Box* GetBoxForTBOX(const TBOX& tbox) const;
184 
185  // This method returns y-extents of the shiro-rekha computed from the input
186  // word image.
187  static void GetShiroRekhaYExtents(Pix* word_pix,
188  int* shirorekha_top,
189  int* shirorekha_bottom,
190  int* shirorekha_ylevel);
191 
192  Pix* orig_pix_; // Just a clone of the input image passed.
193  Pix* splitted_image_; // Image produced after the last splitting round. The
194  // object is owned by this class.
195  SplitStrategy pageseg_split_strategy_;
196  SplitStrategy ocr_split_strategy_;
197  Pix* debug_image_;
198  // This block list is used as a golden segmentation when performing splitting.
199  BLOCK_LIST* segmentation_block_list_;
200  int global_xheight_;
201  bool perform_close_; // Whether a morphological close operation should be
202  // performed before CCs are run through splitting.
203 };
204 
205 } // namespace tesseract.
206 
207 #endif // TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_