Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
imagefind.cpp
Go to the documentation of this file.
1 
2 // File: imagefind.cpp
3 // Description: Function to find image and drawing regions in an image
4 // and create a corresponding list of empty blobs.
5 // Author: Ray Smith
6 // Created: Thu Mar 20 09:49:01 PDT 2008
7 //
8 // (C) Copyright 2008, Google Inc.
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 // http://www.apache.org/licenses/LICENSE-2.0
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18 //
20 
21 #ifdef _MSC_VER
22 #pragma warning(disable:4244) // Conversion warnings
23 #endif
24 
25 #include "imagefind.h"
26 #include "colpartitiongrid.h"
27 #include "linlsq.h"
28 #include "ndminx.h"
29 #include "statistc.h"
30 #include "params.h"
31 
32 // This entire file is dependent upon leptonica. If you don't have it,
33 // you don't get this functionality.
34 #ifdef HAVE_CONFIG_H
35 #include "config_auto.h"
36 #endif
37 #include "allheaders.h"
38 
39 INT_VAR(textord_tabfind_show_images, false, "Show image blobs");
40 
41 namespace tesseract {
42 
43 // Fraction of width or height of on pixels that can be discarded from a
44 // roughly rectangular image.
45 const double kMinRectangularFraction = 0.125;
46 // Fraction of width or height to consider image completely used.
47 const double kMaxRectangularFraction = 0.75;
48 // Fraction of width or height to allow transition from kMinRectangularFraction
49 // to kMaxRectangularFraction, equivalent to a dy/dx skew.
50 const double kMaxRectangularGradient = 0.1; // About 6 degrees.
51 // Minimum image size to be worth looking for images on.
52 const int kMinImageFindSize = 100;
53 // Scale factor for the rms color fit error.
54 const double kRMSFitScaling = 8.0;
55 // Min color difference to call it two colors.
56 const int kMinColorDifference = 16;
57 // Pixel padding for noise blobs and partitions when rendering on the image
58 // mask to encourage them to join together. Make it too big and images
59 // will fatten out too much and have to be clipped to text.
60 const int kNoisePadding = 4;
61 
62 // Finds image regions within the BINARY source pix (page image) and returns
63 // the image regions as a mask image.
64 // The returned pix may be NULL, meaning no images found.
65 // If not NULL, it must be PixDestroyed by the caller.
66 Pix* ImageFind::FindImages(Pix* pix) {
67  // Not worth looking at small images.
68  if (pixGetWidth(pix) < kMinImageFindSize ||
69  pixGetHeight(pix) < kMinImageFindSize)
70  return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
71  // Reduce by factor 2.
72  Pix *pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0);
73  pixDisplayWrite(pixr, textord_tabfind_show_images);
74 
75  // Get the halftone mask directly from Leptonica.
76  l_int32 ht_found = 0;
77  Pix *pixht2 = pixGenHalftoneMask(pixr, NULL, &ht_found,
79  pixDestroy(&pixr);
80  if (!ht_found && pixht2 != NULL)
81  pixDestroy(&pixht2);
82  if (pixht2 == NULL)
83  return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
84 
85  // Expand back up again.
86  Pix *pixht = pixExpandReplicate(pixht2, 2);
87  pixDisplayWrite(pixht, textord_tabfind_show_images);
88  pixDestroy(&pixht2);
89 
90  // Fill to capture pixels near the mask edges that were missed
91  Pix *pixt = pixSeedfillBinary(NULL, pixht, pix, 8);
92  pixOr(pixht, pixht, pixt);
93  pixDestroy(&pixt);
94 
95  // Eliminate lines and bars that may be joined to images.
96  Pix* pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3);
97  pixDilateBrick(pixfinemask, pixfinemask, 5, 5);
98  pixDisplayWrite(pixfinemask, textord_tabfind_show_images);
99  Pix* pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1);
100  Pix* pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0);
101  pixDestroy(&pixreduced);
102  pixDilateBrick(pixreduced2, pixreduced2, 5, 5);
103  Pix* pixcoarsemask = pixExpandReplicate(pixreduced2, 8);
104  pixDestroy(&pixreduced2);
105  pixDisplayWrite(pixcoarsemask, textord_tabfind_show_images);
106  // Combine the coarse and fine image masks.
107  pixAnd(pixcoarsemask, pixcoarsemask, pixfinemask);
108  pixDestroy(&pixfinemask);
109  // Dilate a bit to make sure we get everything.
110  pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3);
111  Pix* pixmask = pixExpandReplicate(pixcoarsemask, 16);
112  pixDestroy(&pixcoarsemask);
114  pixWrite("junkexpandedcoarsemask.png", pixmask, IFF_PNG);
115  // And the image mask with the line and bar remover.
116  pixAnd(pixht, pixht, pixmask);
117  pixDestroy(&pixmask);
119  pixWrite("junkfinalimagemask.png", pixht, IFF_PNG);
120  // Make the result image the same size as the input.
121  Pix* result = pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
122  pixOr(result, result, pixht);
123  pixDestroy(&pixht);
124  return result;
125 }
126 
127 // Generates a Boxa, Pixa pair from the input binary (image mask) pix,
128 // analgous to pixConnComp, except that connected components which are nearly
129 // rectangular are replaced with solid rectangles.
130 // The returned boxa, pixa may be NULL, meaning no images found.
131 // If not NULL, they must be destroyed by the caller.
132 // Resolution of pix should match the source image (Tesseract::pix_binary_)
133 // so the output coordinate systems match.
134 void ImageFind::ConnCompAndRectangularize(Pix* pix, Boxa** boxa, Pixa** pixa) {
135  *boxa = NULL;
136  *pixa = NULL;
137 
139  pixWrite("junkconncompimage.png", pix, IFF_PNG);
140  // Find the individual image regions in the mask image.
141  *boxa = pixConnComp(pix, pixa, 8);
142  // Rectangularize the individual images. If a sharp edge in vertical and/or
143  // horizontal occupancy can be found, it indicates a probably rectangular
144  // image with unwanted bits merged on, so clip to the approximate rectangle.
145  int npixes = pixaGetCount(*pixa);
146  for (int i = 0; i < npixes; ++i) {
147  int x_start, x_end, y_start, y_end;
148  Pix* img_pix = pixaGetPix(*pixa, i, L_CLONE);
149  pixDisplayWrite(img_pix, textord_tabfind_show_images);
153  &x_start, &y_start, &x_end, &y_end)) {
154  Pix* simple_pix = pixCreate(x_end - x_start, y_end - y_start, 1);
155  pixSetAll(simple_pix);
156  pixDestroy(&img_pix);
157  // pixaReplacePix takes ownership of the simple_pix.
158  pixaReplacePix(*pixa, i, simple_pix, NULL);
159  img_pix = pixaGetPix(*pixa, i, L_CLONE);
160  // Fix the box to match the new pix.
161  l_int32 x, y, width, height;
162  boxaGetBoxGeometry(*boxa, i, &x, &y, &width, &height);
163  Box* simple_box = boxCreate(x + x_start, y + y_start,
164  x_end - x_start, y_end - y_start);
165  boxaReplaceBox(*boxa, i, simple_box);
166  }
167  pixDestroy(&img_pix);
168  }
169 }
170 
171 // Scans horizontally on x=[x_start,x_end), starting with y=*y_start,
172 // stepping y+=y_step, until y=y_end. *ystart is input/output.
173 // If the number of black pixels in a row, pix_count fits this pattern:
174 // 0 or more rows with pix_count < min_count then
175 // <= mid_width rows with min_count <= pix_count <= max_count then
176 // a row with pix_count > max_count then
177 // true is returned, and *y_start = the first y with pix_count >= min_count.
178 static bool HScanForEdge(uinT32* data, int wpl, int x_start, int x_end,
179  int min_count, int mid_width, int max_count,
180  int y_end, int y_step, int* y_start) {
181  int mid_rows = 0;
182  for (int y = *y_start; y != y_end; y += y_step) {
183  // Need pixCountPixelsInRow(pix, y, &pix_count, NULL) to count in a subset.
184  int pix_count = 0;
185  uinT32* line = data + wpl * y;
186  for (int x = x_start; x < x_end; ++x) {
187  if (GET_DATA_BIT(line, x))
188  ++pix_count;
189  }
190  if (mid_rows == 0 && pix_count < min_count)
191  continue; // In the min phase.
192  if (mid_rows == 0)
193  *y_start = y; // Save the y_start where we came out of the min phase.
194  if (pix_count > max_count)
195  return true; // Found the pattern.
196  ++mid_rows;
197  if (mid_rows > mid_width)
198  break; // Middle too big.
199  }
200  return false; // Never found max_count.
201 }
202 
203 // Scans vertically on y=[y_start,y_end), starting with x=*x_start,
204 // stepping x+=x_step, until x=x_end. *x_start is input/output.
205 // If the number of black pixels in a column, pix_count fits this pattern:
206 // 0 or more cols with pix_count < min_count then
207 // <= mid_width cols with min_count <= pix_count <= max_count then
208 // a column with pix_count > max_count then
209 // true is returned, and *x_start = the first x with pix_count >= min_count.
210 static bool VScanForEdge(uinT32* data, int wpl, int y_start, int y_end,
211  int min_count, int mid_width, int max_count,
212  int x_end, int x_step, int* x_start) {
213  int mid_cols = 0;
214  for (int x = *x_start; x != x_end; x += x_step) {
215  int pix_count = 0;
216  uinT32* line = data + y_start * wpl;
217  for (int y = y_start; y < y_end; ++y, line += wpl) {
218  if (GET_DATA_BIT(line, x))
219  ++pix_count;
220  }
221  if (mid_cols == 0 && pix_count < min_count)
222  continue; // In the min phase.
223  if (mid_cols == 0)
224  *x_start = x; // Save the place where we came out of the min phase.
225  if (pix_count > max_count)
226  return true; // found the pattern.
227  ++mid_cols;
228  if (mid_cols > mid_width)
229  break; // Middle too big.
230  }
231  return false; // Never found max_count.
232 }
233 
234 // Returns true if there is a rectangle in the source pix, such that all
235 // pixel rows and column slices outside of it have less than
236 // min_fraction of the pixels black, and within max_skew_gradient fraction
237 // of the pixels on the inside, there are at least max_fraction of the
238 // pixels black. In other words, the inside of the rectangle looks roughly
239 // rectangular, and the outside of it looks like extra bits.
240 // On return, the rectangle is defined by x_start, y_start, x_end and y_end.
241 // Note: the algorithm is iterative, allowing it to slice off pixels from
242 // one edge, allowing it to then slice off more pixels from another edge.
244  double min_fraction, double max_fraction,
245  double max_skew_gradient,
246  int* x_start, int* y_start,
247  int* x_end, int* y_end) {
248  ASSERT_HOST(pix != NULL);
249  *x_start = 0;
250  *x_end = pixGetWidth(pix);
251  *y_start = 0;
252  *y_end = pixGetHeight(pix);
253 
254  uinT32* data = pixGetData(pix);
255  int wpl = pixGetWpl(pix);
256  bool any_cut = false;
257  bool left_done = false;
258  bool right_done = false;
259  bool top_done = false;
260  bool bottom_done = false;
261  do {
262  any_cut = false;
263  // Find the top/bottom edges.
264  int width = *x_end - *x_start;
265  int min_count = static_cast<int>(width * min_fraction);
266  int max_count = static_cast<int>(width * max_fraction);
267  int edge_width = static_cast<int>(width * max_skew_gradient);
268  if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width,
269  max_count, *y_end, 1, y_start) && !top_done) {
270  top_done = true;
271  any_cut = true;
272  }
273  --(*y_end);
274  if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width,
275  max_count, *y_start, -1, y_end) && !bottom_done) {
276  bottom_done = true;
277  any_cut = true;
278  }
279  ++(*y_end);
280 
281  // Find the left/right edges.
282  int height = *y_end - *y_start;
283  min_count = static_cast<int>(height * min_fraction);
284  max_count = static_cast<int>(height * max_fraction);
285  edge_width = static_cast<int>(height * max_skew_gradient);
286  if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width,
287  max_count, *x_end, 1, x_start) && !left_done) {
288  left_done = true;
289  any_cut = true;
290  }
291  --(*x_end);
292  if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width,
293  max_count, *x_start, -1, x_end) && !right_done) {
294  right_done = true;
295  any_cut = true;
296  }
297  ++(*x_end);
298  } while (any_cut);
299 
300  // All edges must satisfy the condition of sharp gradient in pixel density
301  // in order for the full rectangle to be present.
302  return left_done && right_done && top_done && bottom_done;
303 }
304 
305 // Given an input pix, and a bounding rectangle, the sides of the rectangle
306 // are shrunk inwards until they bound any black pixels found within the
307 // original rectangle. Returns false if the rectangle contains no black
308 // pixels at all.
309 bool ImageFind::BoundsWithinRect(Pix* pix, int* x_start, int* y_start,
310  int* x_end, int* y_end) {
311  Box* input_box = boxCreate(*x_start, *y_start, *x_end - *x_start,
312  *y_end - *y_start);
313  Box* output_box = NULL;
314  pixClipBoxToForeground(pix, input_box, NULL, &output_box);
315  bool result = output_box != NULL;
316  if (result) {
317  l_int32 x, y, width, height;
318  boxGetGeometry(output_box, &x, &y, &width, &height);
319  *x_start = x;
320  *y_start = y;
321  *x_end = x + width;
322  *y_end = y + height;
323  boxDestroy(&output_box);
324  }
325  boxDestroy(&input_box);
326  return result;
327 }
328 
329 // Given a point in 3-D (RGB) space, returns the squared Euclidean distance
330 // of the point from the given line, defined by a pair of points in the 3-D
331 // (RGB) space, line1 and line2.
333  const uinT8* line2,
334  const uinT8* point) {
335  int line_vector[kRGBRMSColors];
336  int point_vector[kRGBRMSColors];
337  for (int i = 0; i < kRGBRMSColors; ++i) {
338  line_vector[i] = static_cast<int>(line2[i]) - static_cast<int>(line1[i]);
339  point_vector[i] = static_cast<int>(point[i]) - static_cast<int>(line1[i]);
340  }
341  line_vector[L_ALPHA_CHANNEL] = 0;
342  // Now the cross product in 3d.
343  int cross[kRGBRMSColors];
344  cross[COLOR_RED] = line_vector[COLOR_GREEN] * point_vector[COLOR_BLUE]
345  - line_vector[COLOR_BLUE] * point_vector[COLOR_GREEN];
346  cross[COLOR_GREEN] = line_vector[COLOR_BLUE] * point_vector[COLOR_RED]
347  - line_vector[COLOR_RED] * point_vector[COLOR_BLUE];
348  cross[COLOR_BLUE] = line_vector[COLOR_RED] * point_vector[COLOR_GREEN]
349  - line_vector[COLOR_GREEN] * point_vector[COLOR_RED];
350  cross[L_ALPHA_CHANNEL] = 0;
351  // Now the sums of the squares.
352  double cross_sq = 0.0;
353  double line_sq = 0.0;
354  for (int j = 0; j < kRGBRMSColors; ++j) {
355  cross_sq += static_cast<double>(cross[j]) * cross[j];
356  line_sq += static_cast<double>(line_vector[j]) * line_vector[j];
357  }
358  if (line_sq == 0.0) {
359  return 0.0;
360  }
361  return cross_sq / line_sq; // This is the squared distance.
362 }
363 
364 
365 // Returns the leptonica combined code for the given RGB triplet.
367  l_uint32 result;
368  composeRGBPixel(r, g, b, &result);
369  return result;
370 }
371 
372 // Returns the input value clipped to a uinT8.
374  if (pixel < 0.0)
375  return 0;
376  else if (pixel >= 255.0)
377  return 255;
378  return static_cast<uinT8>(pixel);
379 }
380 
381 // Computes the light and dark extremes of color in the given rectangle of
382 // the given pix, which is factor smaller than the coordinate system in rect.
383 // The light and dark points are taken to be the upper and lower 8th-ile of
384 // the most deviant of R, G and B. The value of the other 2 channels are
385 // computed by linear fit against the most deviant.
386 // The colors of the two points are returned in color1 and color2, with the
387 // alpha channel set to a scaled mean rms of the fits.
388 // If color_map1 is not null then it and color_map2 get rect pasted in them
389 // with the two calculated colors, and rms map gets a pasted rect of the rms.
390 // color_map1, color_map2 and rms_map are assumed to be the same scale as pix.
391 void ImageFind::ComputeRectangleColors(const TBOX& rect, Pix* pix, int factor,
392  Pix* color_map1, Pix* color_map2,
393  Pix* rms_map,
394  uinT8* color1, uinT8* color2) {
395  ASSERT_HOST(pix != NULL && pixGetDepth(pix) == 32);
396  // Pad the rectangle outwards by 2 (scaled) pixels if possible to get more
397  // background.
398  int width = pixGetWidth(pix);
399  int height = pixGetHeight(pix);
400  int left_pad = MAX(rect.left() - 2 * factor, 0) / factor;
401  int top_pad = (rect.top() + 2 * factor + (factor - 1)) / factor;
402  top_pad = MIN(height, top_pad);
403  int right_pad = (rect.right() + 2 * factor + (factor - 1)) / factor;
404  right_pad = MIN(width, right_pad);
405  int bottom_pad = MAX(rect.bottom() - 2 * factor, 0) / factor;
406  int width_pad = right_pad - left_pad;
407  int height_pad = top_pad - bottom_pad;
408  if (width_pad < 1 || height_pad < 1 || width_pad + height_pad < 4)
409  return;
410  // Now crop the pix to the rectangle.
411  Box* scaled_box = boxCreate(left_pad, height - top_pad,
412  width_pad, height_pad);
413  Pix* scaled = pixClipRectangle(pix, scaled_box, NULL);
414 
415  // Compute stats over the whole image.
416  STATS red_stats(0, 256);
417  STATS green_stats(0, 256);
418  STATS blue_stats(0, 256);
419  uinT32* data = pixGetData(scaled);
420  ASSERT_HOST(pixGetWpl(scaled) == width_pad);
421  for (int y = 0; y < height_pad; ++y) {
422  for (int x = 0; x < width_pad; ++x, ++data) {
423  int r = GET_DATA_BYTE(data, COLOR_RED);
424  int g = GET_DATA_BYTE(data, COLOR_GREEN);
425  int b = GET_DATA_BYTE(data, COLOR_BLUE);
426  red_stats.add(r, 1);
427  green_stats.add(g, 1);
428  blue_stats.add(b, 1);
429  }
430  }
431  // Find the RGB component with the greatest 8th-ile-range.
432  // 8th-iles are used instead of quartiles to get closer to the true
433  // foreground color, which is going to be faint at best because of the
434  // pre-scaling of the input image.
435  int best_l8 = static_cast<int>(red_stats.ile(0.125f));
436  int best_u8 = static_cast<int>(ceil(red_stats.ile(0.875f)));
437  int best_i8r = best_u8 - best_l8;
438  int x_color = COLOR_RED;
439  int y1_color = COLOR_GREEN;
440  int y2_color = COLOR_BLUE;
441  int l8 = static_cast<int>(green_stats.ile(0.125f));
442  int u8 = static_cast<int>(ceil(green_stats.ile(0.875f)));
443  if (u8 - l8 > best_i8r) {
444  best_i8r = u8 - l8;
445  best_l8 = l8;
446  best_u8 = u8;
447  x_color = COLOR_GREEN;
448  y1_color = COLOR_RED;
449  }
450  l8 = static_cast<int>(blue_stats.ile(0.125f));
451  u8 = static_cast<int>(ceil(blue_stats.ile(0.875f)));
452  if (u8 - l8 > best_i8r) {
453  best_i8r = u8 - l8;
454  best_l8 = l8;
455  best_u8 = u8;
456  x_color = COLOR_BLUE;
457  y1_color = COLOR_GREEN;
458  y2_color = COLOR_RED;
459  }
460  if (best_i8r >= kMinColorDifference) {
461  LLSQ line1;
462  LLSQ line2;
463  uinT32* data = pixGetData(scaled);
464  for (int im_y = 0; im_y < height_pad; ++im_y) {
465  for (int im_x = 0; im_x < width_pad; ++im_x, ++data) {
466  int x = GET_DATA_BYTE(data, x_color);
467  int y1 = GET_DATA_BYTE(data, y1_color);
468  int y2 = GET_DATA_BYTE(data, y2_color);
469  line1.add(x, y1);
470  line2.add(x, y2);
471  }
472  }
473  double m1 = line1.m();
474  double c1 = line1.c(m1);
475  double m2 = line2.m();
476  double c2 = line2.c(m2);
477  double rms = line1.rms(m1, c1) + line2.rms(m2, c2);
478  rms *= kRMSFitScaling;
479  // Save the results.
480  color1[x_color] = ClipToByte(best_l8);
481  color1[y1_color] = ClipToByte(m1 * best_l8 + c1 + 0.5);
482  color1[y2_color] = ClipToByte(m2 * best_l8 + c2 + 0.5);
483  color1[L_ALPHA_CHANNEL] = ClipToByte(rms);
484  color2[x_color] = ClipToByte(best_u8);
485  color2[y1_color] = ClipToByte(m1 * best_u8 + c1 + 0.5);
486  color2[y2_color] = ClipToByte(m2 * best_u8 + c2 + 0.5);
487  color2[L_ALPHA_CHANNEL] = ClipToByte(rms);
488  } else {
489  // There is only one color.
490  color1[COLOR_RED] = ClipToByte(red_stats.median());
491  color1[COLOR_GREEN] = ClipToByte(green_stats.median());
492  color1[COLOR_BLUE] = ClipToByte(blue_stats.median());
493  color1[L_ALPHA_CHANNEL] = 0;
494  memcpy(color2, color1, 4);
495  }
496  if (color_map1 != NULL) {
497  pixSetInRectArbitrary(color_map1, scaled_box,
498  ComposeRGB(color1[COLOR_RED],
499  color1[COLOR_GREEN],
500  color1[COLOR_BLUE]));
501  pixSetInRectArbitrary(color_map2, scaled_box,
502  ComposeRGB(color2[COLOR_RED],
503  color2[COLOR_GREEN],
504  color2[COLOR_BLUE]));
505  pixSetInRectArbitrary(rms_map, scaled_box, color1[L_ALPHA_CHANNEL]);
506  }
507  pixDestroy(&scaled);
508  boxDestroy(&scaled_box);
509 }
510 
511 // ================ CUTTING POLYGONAL IMAGES FROM A RECTANGLE ================
512 // The following functions are responsible for cutting a polygonal image from
513 // a rectangle: CountPixelsInRotatedBox, AttemptToShrinkBox, CutChunkFromParts
514 // with DivideImageIntoParts as the master.
515 // Problem statement:
516 // We start with a single connected component from the image mask: we get
517 // a Pix of the component, and its location on the page (im_box).
518 // The objective of cutting a polygonal image from its rectangle is to avoid
519 // interfering text, but not text that completely overlaps the image.
520 // ------------------------------ ------------------------------
521 // | Single input partition | | 1 Cut up output partitions |
522 // | | ------------------------------
523 // Av|oid | Avoid | |
524 // | | |________________________|
525 // Int|erfering | Interfering | |
526 // | | _____|__________________|
527 // T|ext | Text | |
528 // | Text-on-image | | Text-on-image |
529 // ------------------------------ --------------------------
530 // DivideImageIntoParts does this by building a ColPartition_LIST (not in the
531 // grid) with each ColPartition representing one of the rectangles needed,
532 // starting with a single rectangle for the whole image component, and cutting
533 // bits out of it with CutChunkFromParts as needed to avoid text. The output
534 // ColPartitions are supposed to be ordered from top to bottom.
535 
536 // The problem is complicated by the fact that we have rotated the coordinate
537 // system to make text lines horizontal, so if we need to look at the component
538 // image, we have to rotate the coordinates. Throughout the functions in this
539 // section im_box is the rectangle representing the image component in the
540 // rotated page coordinates (where we are building our output ColPartitions),
541 // rotation is the rotation that we used to get there, and rerotation is the
542 // rotation required to get back to original page image coordinates.
543 // To get to coordinates in the component image, pix, we rotate the im_box,
544 // the point we want to locate, and subtract the rotated point from the top-left
545 // of the rotated im_box.
546 // im_box is therefore essential to calculating coordinates within the pix.
547 
548 // Returns true if there are no black pixels in between the boxes.
549 // The im_box must represent the bounding box of the pix in tesseract
550 // coordinates, which may be negative, due to rotations to make the textlines
551 // horizontal. The boxes are rotated by rotation, which should undo such
552 // rotations, before mapping them onto the pix.
553 bool ImageFind::BlankImageInBetween(const TBOX& box1, const TBOX& box2,
554  const TBOX& im_box, const FCOORD& rotation,
555  Pix* pix) {
556  TBOX search_box(box1);
557  search_box += box2;
558  if (box1.x_gap(box2) >= box1.y_gap(box2)) {
559  if (box1.x_gap(box2) <= 0)
560  return true;
561  search_box.set_left(MIN(box1.right(), box2.right()));
562  search_box.set_right(MAX(box1.left(), box2.left()));
563  } else {
564  if (box1.y_gap(box2) <= 0)
565  return true;
566  search_box.set_top(MAX(box1.bottom(), box2.bottom()));
567  search_box.set_bottom(MIN(box1.top(), box2.top()));
568  }
569  return CountPixelsInRotatedBox(search_box, im_box, rotation, pix) == 0;
570 }
571 
572 // Returns the number of pixels in box in the pix.
573 // rotation, pix and im_box are defined in the large comment above.
575  const FCOORD& rotation, Pix* pix) {
576  // Intersect it with the image box.
577  box &= im_box; // This is in-place box intersection.
578  if (box.null_box())
579  return 0;
580  box.rotate(rotation);
581  TBOX rotated_im_box(im_box);
582  rotated_im_box.rotate(rotation);
583  Pix* rect_pix = pixCreate(box.width(), box.height(), 1);
584  pixRasterop(rect_pix, 0, 0, box.width(), box.height(),
585  PIX_SRC, pix, box.left() - rotated_im_box.left(),
586  rotated_im_box.top() - box.top());
587  l_int32 result;
588  pixCountPixels(rect_pix, &result, NULL);
589  pixDestroy(&rect_pix);
590  return result;
591 }
592 
593 // The box given by slice contains some black pixels, but not necessarily
594 // over the whole box. Shrink the x bounds of slice, but not the y bounds
595 // until there is at least one black pixel in the outermost columns.
596 // rotation, rerotation, pix and im_box are defined in the large comment above.
597 static void AttemptToShrinkBox(const FCOORD& rotation, const FCOORD& rerotation,
598  const TBOX& im_box, Pix* pix, TBOX* slice) {
599  TBOX rotated_box(*slice);
600  rotated_box.rotate(rerotation);
601  TBOX rotated_im_box(im_box);
602  rotated_im_box.rotate(rerotation);
603  int left = rotated_box.left() - rotated_im_box.left();
604  int right = rotated_box.right() - rotated_im_box.left();
605  int top = rotated_im_box.top() - rotated_box.top();
606  int bottom = rotated_im_box.top() - rotated_box.bottom();
607  ImageFind::BoundsWithinRect(pix, &left, &top, &right, &bottom);
608  top = rotated_im_box.top() - top;
609  bottom = rotated_im_box.top() - bottom;
610  left += rotated_im_box.left();
611  right += rotated_im_box.left();
612  rotated_box.set_to_given_coords(left, bottom, right, top);
613  rotated_box.rotate(rotation);
614  slice->set_left(rotated_box.left());
615  slice->set_right(rotated_box.right());
616 }
617 
618 // The meat of cutting a polygonal image around text.
619 // This function covers the general case of cutting a box out of a box
620 // as shown:
621 // Input Output
622 // ------------------------------ ------------------------------
623 // | Single input partition | | 1 Cut up output partitions |
624 // | | ------------------------------
625 // | ---------- | --------- ----------
626 // | | box | | | 2 | box | 3 |
627 // | | | | | | is cut | |
628 // | ---------- | --------- out ----------
629 // | | ------------------------------
630 // | | | 4 |
631 // ------------------------------ ------------------------------
632 // In the context that this function is used, at most 3 of the above output
633 // boxes will be created, as the overlapping box is never contained by the
634 // input.
635 // The above cutting operation is executed for each element of part_list that
636 // is overlapped by the input box. Each modified ColPartition is replaced
637 // in place in the list by the output of the cutting operation in the order
638 // shown above, so iff no holes are ever created, the output will be in
639 // top-to-bottom order, but in extreme cases, hole creation is possible.
640 // In such cases, the output order may cause strange block polygons.
641 // rotation, rerotation, pix and im_box are defined in the large comment above.
642 static void CutChunkFromParts(const TBOX& box, const TBOX& im_box,
643  const FCOORD& rotation, const FCOORD& rerotation,
644  Pix* pix, ColPartition_LIST* part_list) {
645  ASSERT_HOST(!part_list->empty());
646  ColPartition_IT part_it(part_list);
647  do {
648  ColPartition* part = part_it.data();
649  TBOX part_box = part->bounding_box();
650  if (part_box.overlap(box)) {
651  // This part must be cut and replaced with the remains. There are
652  // upto 4 pieces to be made. Start with the first one and use
653  // add_before_stay_put. For each piece if it has no black pixels
654  // left, just don't make the box.
655  // Above box.
656  if (box.top() < part_box.top()) {
657  TBOX slice(part_box);
658  slice.set_bottom(box.top());
659  if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation,
660  pix) > 0) {
661  AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
662  part_it.add_before_stay_put(
664  BTFT_NONTEXT));
665  }
666  }
667  // Left of box.
668  if (box.left() > part_box.left()) {
669  TBOX slice(part_box);
670  slice.set_right(box.left());
671  if (box.top() < part_box.top())
672  slice.set_top(box.top());
673  if (box.bottom() > part_box.bottom())
674  slice.set_bottom(box.bottom());
675  if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation,
676  pix) > 0) {
677  AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
678  part_it.add_before_stay_put(
680  BTFT_NONTEXT));
681  }
682  }
683  // Right of box.
684  if (box.right() < part_box.right()) {
685  TBOX slice(part_box);
686  slice.set_left(box.right());
687  if (box.top() < part_box.top())
688  slice.set_top(box.top());
689  if (box.bottom() > part_box.bottom())
690  slice.set_bottom(box.bottom());
691  if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation,
692  pix) > 0) {
693  AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
694  part_it.add_before_stay_put(
696  BTFT_NONTEXT));
697  }
698  }
699  // Below box.
700  if (box.bottom() > part_box.bottom()) {
701  TBOX slice(part_box);
702  slice.set_top(box.bottom());
703  if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation,
704  pix) > 0) {
705  AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
706  part_it.add_before_stay_put(
708  BTFT_NONTEXT));
709  }
710  }
711  part->DeleteBoxes();
712  delete part_it.extract();
713  }
714  part_it.forward();
715  } while (!part_it.at_first());
716 }
717 
718 // Starts with the bounding box of the image component and cuts it up
719 // so that it doesn't intersect text where possible.
720 // Strong fully contained horizontal text is marked as text on image,
721 // and does not cause a division of the image.
722 // For more detail see the large comment above on cutting polygonal images
723 // from a rectangle.
724 // rotation, rerotation, pix and im_box are defined in the large comment above.
725 static void DivideImageIntoParts(const TBOX& im_box, const FCOORD& rotation,
726  const FCOORD& rerotation, Pix* pix,
727  ColPartitionGridSearch* rectsearch,
728  ColPartition_LIST* part_list) {
729  // Add the full im_box partition to the list to begin with.
730  ColPartition* pix_part = ColPartition::FakePartition(im_box, PT_UNKNOWN,
732  BTFT_NONTEXT);
733  ColPartition_IT part_it(part_list);
734  part_it.add_after_then_move(pix_part);
735 
736  rectsearch->StartRectSearch(im_box);
737  ColPartition* part;
738  while ((part = rectsearch->NextRectSearch()) != NULL) {
739  TBOX part_box = part->bounding_box();
740  if (part_box.contains(im_box) && part->flow() >= BTFT_CHAIN) {
741  // This image is completely covered by an existing text partition.
742  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
743  ColPartition* pix_part = part_it.extract();
744  pix_part->DeleteBoxes();
745  delete pix_part;
746  }
747  } else if (part->flow() == BTFT_STRONG_CHAIN) {
748  // Text intersects the box.
749  TBOX overlap_box = part_box.intersection(im_box);
750  // Intersect it with the image box.
751  int black_area = ImageFind::CountPixelsInRotatedBox(overlap_box, im_box,
752  rerotation, pix);
753  if (black_area * 2 < part_box.area() || !im_box.contains(part_box)) {
754  // Eat a piece out of the image.
755  // Pad it so that pieces eaten out look decent.
756  int padding = part->blob_type() == BRT_VERT_TEXT
757  ? part_box.width() : part_box.height();
758  part_box.set_top(part_box.top() + padding / 2);
759  part_box.set_bottom(part_box.bottom() - padding / 2);
760  CutChunkFromParts(part_box, im_box, rotation, rerotation,
761  pix, part_list);
762  } else {
763  // Strong overlap with the black area, so call it text on image.
764  part->set_flow(BTFT_TEXT_ON_IMAGE);
765  }
766  }
767  if (part_list->empty()) {
768  break;
769  }
770  }
771 }
772 
773 // Search for the rightmost text that overlaps vertically and is to the left
774 // of the given box, but within the given left limit.
775 static int ExpandImageLeft(const TBOX& box, int left_limit,
776  ColPartitionGrid* part_grid) {
777  ColPartitionGridSearch search(part_grid);
778  ColPartition* part;
779  // Search right to left for any text that overlaps.
780  search.StartSideSearch(box.left(), box.bottom(), box.top());
781  while ((part = search.NextSideSearch(true)) != NULL) {
782  if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
783  const TBOX& part_box(part->bounding_box());
784  if (part_box.y_gap(box) < 0) {
785  if (part_box.right() > left_limit && part_box.right() < box.left())
786  left_limit = part_box.right();
787  break;
788  }
789  }
790  }
791  if (part != NULL) {
792  // Search for the nearest text up to the one we already found.
793  TBOX search_box(left_limit, box.bottom(), box.left(), box.top());
794  search.StartRectSearch(search_box);
795  while ((part = search.NextRectSearch()) != NULL) {
796  if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
797  const TBOX& part_box(part->bounding_box());
798  if (part_box.y_gap(box) < 0) {
799  if (part_box.right() > left_limit && part_box.right() < box.left()) {
800  left_limit = part_box.right();
801  }
802  }
803  }
804  }
805  }
806  return left_limit;
807 }
808 
809 // Search for the leftmost text that overlaps vertically and is to the right
810 // of the given box, but within the given right limit.
811 static int ExpandImageRight(const TBOX& box, int right_limit,
812  ColPartitionGrid* part_grid) {
813  ColPartitionGridSearch search(part_grid);
814  ColPartition* part;
815  // Search left to right for any text that overlaps.
816  search.StartSideSearch(box.right(), box.bottom(), box.top());
817  while ((part = search.NextSideSearch(false)) != NULL) {
818  if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
819  const TBOX& part_box(part->bounding_box());
820  if (part_box.y_gap(box) < 0) {
821  if (part_box.left() < right_limit && part_box.left() > box.right())
822  right_limit = part_box.left();
823  break;
824  }
825  }
826  }
827  if (part != NULL) {
828  // Search for the nearest text up to the one we already found.
829  TBOX search_box(box.left(), box.bottom(), right_limit, box.top());
830  search.StartRectSearch(search_box);
831  while ((part = search.NextRectSearch()) != NULL) {
832  if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
833  const TBOX& part_box(part->bounding_box());
834  if (part_box.y_gap(box) < 0) {
835  if (part_box.left() < right_limit && part_box.left() > box.right())
836  right_limit = part_box.left();
837  }
838  }
839  }
840  }
841  return right_limit;
842 }
843 
844 // Search for the topmost text that overlaps horizontally and is below
845 // the given box, but within the given bottom limit.
846 static int ExpandImageBottom(const TBOX& box, int bottom_limit,
847  ColPartitionGrid* part_grid) {
848  ColPartitionGridSearch search(part_grid);
849  ColPartition* part;
850  // Search right to left for any text that overlaps.
851  search.StartVerticalSearch(box.left(), box.right(), box.bottom());
852  while ((part = search.NextVerticalSearch(true)) != NULL) {
853  if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
854  const TBOX& part_box(part->bounding_box());
855  if (part_box.x_gap(box) < 0) {
856  if (part_box.top() > bottom_limit && part_box.top() < box.bottom())
857  bottom_limit = part_box.top();
858  break;
859  }
860  }
861  }
862  if (part != NULL) {
863  // Search for the nearest text up to the one we already found.
864  TBOX search_box(box.left(), bottom_limit, box.right(), box.bottom());
865  search.StartRectSearch(search_box);
866  while ((part = search.NextRectSearch()) != NULL) {
867  if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
868  const TBOX& part_box(part->bounding_box());
869  if (part_box.x_gap(box) < 0) {
870  if (part_box.top() > bottom_limit && part_box.top() < box.bottom())
871  bottom_limit = part_box.top();
872  }
873  }
874  }
875  }
876  return bottom_limit;
877 }
878 
879 // Search for the bottommost text that overlaps horizontally and is above
880 // the given box, but within the given top limit.
881 static int ExpandImageTop(const TBOX& box, int top_limit,
882  ColPartitionGrid* part_grid) {
883  ColPartitionGridSearch search(part_grid);
884  ColPartition* part;
885  // Search right to left for any text that overlaps.
886  search.StartVerticalSearch(box.left(), box.right(), box.top());
887  while ((part = search.NextVerticalSearch(false)) != NULL) {
888  if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
889  const TBOX& part_box(part->bounding_box());
890  if (part_box.x_gap(box) < 0) {
891  if (part_box.bottom() < top_limit && part_box.bottom() > box.top())
892  top_limit = part_box.bottom();
893  break;
894  }
895  }
896  }
897  if (part != NULL) {
898  // Search for the nearest text up to the one we already found.
899  TBOX search_box(box.left(), box.top(), box.right(), top_limit);
900  search.StartRectSearch(search_box);
901  while ((part = search.NextRectSearch()) != NULL) {
902  if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
903  const TBOX& part_box(part->bounding_box());
904  if (part_box.x_gap(box) < 0) {
905  if (part_box.bottom() < top_limit && part_box.bottom() > box.top())
906  top_limit = part_box.bottom();
907  }
908  }
909  }
910  }
911  return top_limit;
912 }
913 
914 // Expands the image box in the given direction until it hits text,
915 // limiting the expansion to the given limit box, returning the result
916 // in the expanded box, and
917 // returning the increase in area resulting from the expansion.
918 static int ExpandImageDir(BlobNeighbourDir dir, const TBOX& im_box,
919  const TBOX& limit_box,
920  ColPartitionGrid* part_grid, TBOX* expanded_box) {
921  *expanded_box = im_box;
922  switch (dir) {
923  case BND_LEFT:
924  expanded_box->set_left(ExpandImageLeft(im_box, limit_box.left(),
925  part_grid));
926  break;
927  case BND_RIGHT:
928  expanded_box->set_right(ExpandImageRight(im_box, limit_box.right(),
929  part_grid));
930  break;
931  case BND_ABOVE:
932  expanded_box->set_top(ExpandImageTop(im_box, limit_box.top(), part_grid));
933  break;
934  case BND_BELOW:
935  expanded_box->set_bottom(ExpandImageBottom(im_box, limit_box.bottom(),
936  part_grid));
937  break;
938  default:
939  return 0;
940  }
941  return expanded_box->area() - im_box.area();
942 }
943 
944 // Expands the image partition into any non-text until it touches text.
945 // The expansion proceeds in the order of increasing increase in area
946 // as a heuristic to find the best rectangle by expanding in the most
947 // constrained direction first.
948 static void MaximalImageBoundingBox(ColPartitionGrid* part_grid, TBOX* im_box) {
949  bool dunnit[BND_COUNT];
950  memset(dunnit, 0, sizeof(dunnit));
951  TBOX limit_box(part_grid->bleft().x(), part_grid->bleft().y(),
952  part_grid->tright().x(), part_grid->tright().y());
953  TBOX text_box(*im_box);
954  for (int iteration = 0; iteration < BND_COUNT; ++iteration) {
955  // Find the direction with least area increase.
956  int best_delta = -1;
957  BlobNeighbourDir best_dir = BND_LEFT;
958  TBOX expanded_boxes[BND_COUNT];
959  for (int dir = 0; dir < BND_COUNT; ++dir) {
960  BlobNeighbourDir bnd = static_cast<BlobNeighbourDir>(dir);
961  if (!dunnit[bnd]) {
962  TBOX expanded_box;
963  int area_delta = ExpandImageDir(bnd, text_box, limit_box, part_grid,
964  &expanded_boxes[bnd]);
965  if (best_delta < 0 || area_delta < best_delta) {
966  best_delta = area_delta;
967  best_dir = bnd;
968  }
969  }
970  }
971  // Run the best and remember the direction.
972  dunnit[best_dir] = true;
973  text_box = expanded_boxes[best_dir];
974  }
975  *im_box = text_box;
976 }
977 
978 // Helper deletes the given partition but first marks up all the blobs as
979 // noise, so they get deleted later, and disowns them.
980 // If the initial type of the partition is image, then it actually deletes
981 // the blobs, as the partition owns them in that case.
982 static void DeletePartition(ColPartition* part) {
983  BlobRegionType type = part->blob_type();
984  if (type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) {
985  // The partition owns the boxes of these types, so just delete them.
986  part->DeleteBoxes(); // From a previous iteration.
987  } else {
988  // Once marked, the blobs will be swept up by TidyBlobs.
989  part->set_flow(BTFT_NONTEXT);
990  part->set_blob_type(BRT_NOISE);
991  part->SetBlobTypes();
992  part->DisownBoxes(); // Created before FindImagePartitions.
993  }
994  delete part;
995 }
996 
997 // The meat of joining fragmented images and consuming ColPartitions of
998 // uncertain type.
999 // *part_ptr is an input/output BRT_RECTIMAGE ColPartition that is to be
1000 // expanded to consume overlapping and nearby ColPartitions of uncertain type
1001 // and other BRT_RECTIMAGE partitions, but NOT to be expanded beyond
1002 // max_image_box. *part_ptr is NOT in the part_grid.
1003 // rectsearch is already constructed on the part_grid, and is used for
1004 // searching for overlapping and nearby ColPartitions.
1005 // ExpandImageIntoParts is called iteratively until it returns false. Each
1006 // time it absorbs the nearest non-contained candidate, and everything that
1007 // is fully contained within part_ptr's bounding box.
1008 // TODO(rays) what if it just eats everything inside max_image_box in one go?
1009 static bool ExpandImageIntoParts(const TBOX& max_image_box,
1010  ColPartitionGridSearch* rectsearch,
1011  ColPartitionGrid* part_grid,
1012  ColPartition** part_ptr) {
1013  ColPartition* image_part = *part_ptr;
1014  TBOX im_part_box = image_part->bounding_box();
1015  if (textord_tabfind_show_images > 1) {
1016  tprintf("Searching for merge with image part:");
1017  im_part_box.print();
1018  tprintf("Text box=");
1019  max_image_box.print();
1020  }
1021  rectsearch->StartRectSearch(max_image_box);
1022  ColPartition* part;
1023  ColPartition* best_part = NULL;
1024  int best_dist = 0;
1025  while ((part = rectsearch->NextRectSearch()) != NULL) {
1026  if (textord_tabfind_show_images > 1) {
1027  tprintf("Considering merge with part:");
1028  part->Print();
1029  if (im_part_box.contains(part->bounding_box()))
1030  tprintf("Fully contained\n");
1031  else if (!max_image_box.contains(part->bounding_box()))
1032  tprintf("Not within text box\n");
1033  else if (part->flow() == BTFT_STRONG_CHAIN)
1034  tprintf("Too strong text\n");
1035  else
1036  tprintf("Real candidate\n");
1037  }
1038  if (part->flow() == BTFT_STRONG_CHAIN ||
1039  part->flow() == BTFT_TEXT_ON_IMAGE ||
1040  part->blob_type() == BRT_POLYIMAGE)
1041  continue;
1042  TBOX box = part->bounding_box();
1043  if (max_image_box.contains(box) && part->blob_type() != BRT_NOISE) {
1044  if (im_part_box.contains(box)) {
1045  // Eat it completely.
1046  rectsearch->RemoveBBox();
1047  DeletePartition(part);
1048  continue;
1049  }
1050  int x_dist = MAX(0, box.x_gap(im_part_box));
1051  int y_dist = MAX(0, box.y_gap(im_part_box));
1052  int dist = x_dist * x_dist + y_dist * y_dist;
1053  if (dist > box.area() || dist > im_part_box.area())
1054  continue; // Not close enough.
1055  if (best_part == NULL || dist < best_dist) {
1056  // We keep the nearest qualifier, which is not necessarily the nearest.
1057  best_part = part;
1058  best_dist = dist;
1059  }
1060  }
1061  }
1062  if (best_part != NULL) {
1063  // It needs expanding. We can do it without touching text.
1064  TBOX box = best_part->bounding_box();
1065  if (textord_tabfind_show_images > 1) {
1066  tprintf("Merging image part:");
1067  im_part_box.print();
1068  tprintf("with part:");
1069  box.print();
1070  }
1071  im_part_box += box;
1072  *part_ptr = ColPartition::FakePartition(im_part_box, PT_UNKNOWN,
1073  BRT_RECTIMAGE,
1074  BTFT_NONTEXT);
1075  DeletePartition(image_part);
1076  part_grid->RemoveBBox(best_part);
1077  DeletePartition(best_part);
1078  rectsearch->RepositionIterator();
1079  return true;
1080  }
1081  return false;
1082 }
1083 
1084 // Helper function to compute the overlap area between the box and the
1085 // given list of partitions.
1086 static int IntersectArea(const TBOX& box, ColPartition_LIST* part_list) {
1087  int intersect_area = 0;
1088  ColPartition_IT part_it(part_list);
1089  // Iterate the parts and subtract intersecting area.
1090  for (part_it.mark_cycle_pt(); !part_it.cycled_list();
1091  part_it.forward()) {
1092  ColPartition* image_part = part_it.data();
1093  TBOX intersect = box.intersection(image_part->bounding_box());
1094  intersect_area += intersect.area();
1095  }
1096  return intersect_area;
1097 }
1098 
1099 // part_list is a set of ColPartitions representing a polygonal image, and
1100 // im_box is the union of the bounding boxes of all the parts in part_list.
1101 // Tests whether part is to be consumed by the polygonal image.
1102 // Returns true if part is weak text and more than half of its area is
1103 // intersected by parts from the part_list, and it is contained within im_box.
1104 static bool TestWeakIntersectedPart(const TBOX& im_box,
1105  ColPartition_LIST* part_list,
1106  ColPartition* part) {
1107  if (part->flow() < BTFT_STRONG_CHAIN) {
1108  // A weak partition intersects the box.
1109  TBOX part_box = part->bounding_box();
1110  if (im_box.contains(part_box)) {
1111  int area = part_box.area();
1112  int intersect_area = IntersectArea(part_box, part_list);
1113  if (area < 2 * intersect_area) {
1114  return true;
1115  }
1116  }
1117  }
1118  return false;
1119 }
1120 
1121 // A rectangular or polygonal image has been completed, in part_list, bounding
1122 // box in im_box. We want to eliminate weak text or other uncertain partitions
1123 // (basically anything that is not BRT_STRONG_CHAIN or better) from both the
1124 // part_grid and the big_parts list that are contained within im_box and
1125 // overlapped enough by the possibly polygonal image.
1126 static void EliminateWeakParts(const TBOX& im_box,
1127  ColPartitionGrid* part_grid,
1128  ColPartition_LIST* big_parts,
1129  ColPartition_LIST* part_list) {
1130  ColPartitionGridSearch rectsearch(part_grid);
1131  ColPartition* part;
1132  rectsearch.StartRectSearch(im_box);
1133  while ((part = rectsearch.NextRectSearch()) != NULL) {
1134  if (TestWeakIntersectedPart(im_box, part_list, part)) {
1135  BlobRegionType type = part->blob_type();
1136  if (type == BRT_POLYIMAGE || type == BRT_RECTIMAGE) {
1137  rectsearch.RemoveBBox();
1138  DeletePartition(part);
1139  } else {
1140  // The part is mostly covered, so mark it. Non-image partitions are
1141  // kept hanging around to mark the image for pass2
1142  part->set_flow(BTFT_NONTEXT);
1143  part->set_blob_type(BRT_NOISE);
1144  part->SetBlobTypes();
1145  }
1146  }
1147  }
1148  ColPartition_IT big_it(big_parts);
1149  for (big_it.mark_cycle_pt(); !big_it.cycled_list(); big_it.forward()) {
1150  part = big_it.data();
1151  if (TestWeakIntersectedPart(im_box, part_list, part)) {
1152  // Once marked, the blobs will be swept up by TidyBlobs.
1153  DeletePartition(big_it.extract());
1154  }
1155  }
1156 }
1157 
1158 // Helper scans for good text partitions overlapping the given box.
1159 // If there are no good text partitions overlapping an expanded box, then
1160 // the box is expanded, otherwise, the original box is returned.
1161 // If good text overlaps the box, true is returned.
1162 static bool ScanForOverlappingText(ColPartitionGrid* part_grid, TBOX* box) {
1163  ColPartitionGridSearch rectsearch(part_grid);
1164  TBOX padded_box(*box);
1165  padded_box.pad(kNoisePadding, kNoisePadding);
1166  rectsearch.StartRectSearch(padded_box);
1167  ColPartition* part;
1168  bool any_text_in_padded_rect = false;
1169  while ((part = rectsearch.NextRectSearch()) != NULL) {
1170  if (part->flow() == BTFT_CHAIN ||
1171  part->flow() == BTFT_STRONG_CHAIN) {
1172  // Text intersects the box.
1173  any_text_in_padded_rect = true;
1174  TBOX part_box = part->bounding_box();
1175  if (box->overlap(part_box)) {
1176  return true;
1177  }
1178  }
1179  }
1180  if (!any_text_in_padded_rect)
1181  *box = padded_box;
1182  return false;
1183 }
1184 
1185 // Renders the boxes of image parts from the supplied list onto the image_pix,
1186 // except where they interfere with existing strong text in the part_grid,
1187 // and then deletes them.
1188 // Box coordinates are rotated by rerotate to match the image.
1189 static void MarkAndDeleteImageParts(const FCOORD& rerotate,
1190  ColPartitionGrid* part_grid,
1191  ColPartition_LIST* image_parts,
1192  Pix* image_pix) {
1193  if (image_pix == NULL)
1194  return;
1195  int imageheight = pixGetHeight(image_pix);
1196  ColPartition_IT part_it(image_parts);
1197  for (; !part_it.empty(); part_it.forward()) {
1198  ColPartition* part = part_it.extract();
1199  TBOX part_box = part->bounding_box();
1200  BlobRegionType type = part->blob_type();
1201  if (!ScanForOverlappingText(part_grid, &part_box) ||
1202  type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) {
1203  // Mark the box on the image.
1204  // All coords need to be rotated to match the image.
1205  part_box.rotate(rerotate);
1206  int left = part_box.left();
1207  int top = part_box.top();
1208  pixRasterop(image_pix, left, imageheight - top,
1209  part_box.width(), part_box.height(), PIX_SET, NULL, 0, 0);
1210  }
1211  DeletePartition(part);
1212  }
1213 }
1214 
1215 // Locates all the image partitions in the part_grid, that were found by a
1216 // previous call to FindImagePartitions, marks them in the image_mask,
1217 // removes them from the grid, and deletes them. This makes it possble to
1218 // call FindImagePartitions again to produce less broken-up and less
1219 // overlapping image partitions.
1220 // rerotation specifies how to rotate the partition coords to match
1221 // the image_mask, since this function is used after orientation correction.
1223  ColPartitionGrid* part_grid,
1224  Pix* image_mask) {
1225  // Extract the noise parts from the grid and put them on a temporary list.
1226  ColPartition_LIST parts_list;
1227  ColPartition_IT part_it(&parts_list);
1228  ColPartitionGridSearch gsearch(part_grid);
1229  gsearch.StartFullSearch();
1230  ColPartition* part;
1231  while ((part = gsearch.NextFullSearch()) != NULL) {
1232  BlobRegionType type = part->blob_type();
1233  if (type == BRT_NOISE || type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) {
1234  part_it.add_after_then_move(part);
1235  gsearch.RemoveBBox();
1236  }
1237  }
1238  // Render listed noise partitions to the image mask.
1239  MarkAndDeleteImageParts(rerotation, part_grid, &parts_list, image_mask);
1240 }
1241 
1242 // Removes and deletes all image partitions that are too small to be worth
1243 // keeping. We have to do this as a separate phase after creating the image
1244 // partitions as the small images are needed to join the larger ones together.
1245 static void DeleteSmallImages(ColPartitionGrid* part_grid) {
1246  if (part_grid != NULL) return;
1247  ColPartitionGridSearch gsearch(part_grid);
1248  gsearch.StartFullSearch();
1249  ColPartition* part;
1250  while ((part = gsearch.NextFullSearch()) != NULL) {
1251  // Only delete rectangular images, since if it became a poly image, it
1252  // is more evidence that it is somehow important.
1253  if (part->blob_type() == BRT_RECTIMAGE) {
1254  const TBOX& part_box = part->bounding_box();
1255  if (part_box.width() < kMinImageFindSize ||
1256  part_box.height() < kMinImageFindSize) {
1257  // It is too small to keep. Just make it disappear.
1258  gsearch.RemoveBBox();
1259  DeletePartition(part);
1260  }
1261  }
1262  }
1263 }
1264 
1265 // Runs a CC analysis on the image_pix mask image, and creates
1266 // image partitions from them, cutting out strong text, and merging with
1267 // nearby image regions such that they don't interfere with text.
1268 // Rotation and rerotation specify how to rotate image coords to match
1269 // the blob and partition coords and back again.
1270 // The input/output part_grid owns all the created partitions, and
1271 // the partitions own all the fake blobs that belong in the partitions.
1272 // Since the other blobs in the other partitions will be owned by the block,
1273 // ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this
1274 // situation and collect the image blobs.
1276  const FCOORD& rotation,
1277  const FCOORD& rerotation,
1278  TO_BLOCK* block,
1279  TabFind* tab_grid,
1280  ColPartitionGrid* part_grid,
1281  ColPartition_LIST* big_parts) {
1282  int imageheight = pixGetHeight(image_pix);
1283  Boxa* boxa;
1284  Pixa* pixa;
1285  ConnCompAndRectangularize(image_pix, &boxa, &pixa);
1286  // Iterate the connected components in the image regions mask.
1287  int nboxes = boxaGetCount(boxa);
1288  for (int i = 0; i < nboxes; ++i) {
1289  l_int32 x, y, width, height;
1290  boxaGetBoxGeometry(boxa, i, &x, &y, &width, &height);
1291  Pix* pix = pixaGetPix(pixa, i, L_CLONE);
1292  TBOX im_box(x, imageheight -y - height, x + width, imageheight - y);
1293  im_box.rotate(rotation); // Now matches all partitions and blobs.
1294  ColPartitionGridSearch rectsearch(part_grid);
1295  rectsearch.SetUniqueMode(true);
1296  ColPartition_LIST part_list;
1297  DivideImageIntoParts(im_box, rotation, rerotation, pix,
1298  &rectsearch, &part_list);
1300  pixWrite("junkimagecomponent.png", pix, IFF_PNG);
1301  tprintf("Component has %d parts\n", part_list.length());
1302  }
1303  pixDestroy(&pix);
1304  if (!part_list.empty()) {
1305  ColPartition_IT part_it(&part_list);
1306  if (part_list.singleton()) {
1307  // We didn't have to chop it into a polygon to fit around text, so
1308  // try expanding it to merge fragmented image parts, as long as it
1309  // doesn't touch strong text.
1310  ColPartition* part = part_it.extract();
1311  TBOX text_box(im_box);
1312  MaximalImageBoundingBox(part_grid, &text_box);
1313  while (ExpandImageIntoParts(text_box, &rectsearch, part_grid, &part));
1314  part_it.set_to_list(&part_list);
1315  part_it.add_after_then_move(part);
1316  im_box = part->bounding_box();
1317  }
1318  EliminateWeakParts(im_box, part_grid, big_parts, &part_list);
1319  // Iterate the part_list and put the parts into the grid.
1320  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
1321  ColPartition* image_part = part_it.extract();
1322  im_box = image_part->bounding_box();
1323  part_grid->InsertBBox(true, true, image_part);
1324  if (!part_it.at_last()) {
1325  ColPartition* neighbour = part_it.data_relative(1);
1326  image_part->AddPartner(false, neighbour);
1327  neighbour->AddPartner(true, image_part);
1328  }
1329  }
1330  }
1331  }
1332  boxaDestroy(&boxa);
1333  pixaDestroy(&pixa);
1334  DeleteSmallImages(part_grid);
1336  ScrollView* images_win_ = part_grid->MakeWindow(1000, 400, "With Images");
1337  part_grid->DisplayBoxes(images_win_);
1338  }
1339 }
1340 
1341 
1342 } // namespace tesseract.
1343