90 ColPartition* neighbour;
137 bool any_done =
false;
139 bool merge_done =
false;
149 if (!box_cb->
Run(part, &box))
152 ColPartition_CLIST merge_candidates;
153 FindMergeCandidates(part, box, debug, &merge_candidates);
155 int overlap_increase;
159 if (neighbour !=
NULL && overlap_increase <= 0) {
161 tprintf(
"Merging:hoverlap=%d, voverlap=%d, OLI=%d\n",
174 }
else if (neighbour !=
NULL) {
176 tprintf(
"Overlapped when merged with increase %d: ", overlap_increase);
180 tprintf(
"No candidate neighbour returned\n");
182 }
while (merge_done);
195 if (candidate == part)
202 tprintf(
"Examining merge candidate:");
210 tprintf(
"Too far away: h_dist = %d\n", h_dist);
218 tprintf(
"Too far away: v_dist = %d\n", v_dist);
227 tprintf(
"Candidate fails overlap and diacritic tests!\n");
239 static int IncreaseInOverlap(
const ColPartition* merge1,
240 const ColPartition* merge2,
242 ColPartition_CLIST* parts) {
245 ColPartition_C_IT it(parts);
246 TBOX merged_box(merge1->bounding_box());
247 merged_box += merge2->bounding_box();
248 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
249 ColPartition* part = it.data();
250 if (part == merge1 || part == merge2)
252 TBOX part_box = part->bounding_box();
255 if (overlap_area > 0 && !part->OKMergeOverlap(*merge1, *merge2,
256 ok_overlap,
false)) {
257 total_area += overlap_area;
259 overlap_area = part_box.
intersection(merge1->bounding_box()).area();
260 if (overlap_area > 0)
261 total_area -= overlap_area;
263 overlap_area = intersection_box.
area();
264 if (overlap_area > 0) {
265 total_area -= overlap_area;
267 intersection_box &= merge1->bounding_box();
268 overlap_area = intersection_box.
area();
269 if (overlap_area > 0)
270 total_area += overlap_area;
298 static bool TestCompatibleCandidates(
const ColPartition& part,
bool debug,
299 ColPartition_CLIST* candidates) {
300 ColPartition_C_IT it(candidates);
301 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
302 ColPartition* candidate = it.data();
303 if (!candidate->OKDiacriticMerge(part,
false)) {
304 ColPartition_C_IT it2(it);
305 for (it2.mark_cycle_pt(); !it2.cycled_list(); it2.forward()) {
306 ColPartition* candidate2 = it2.data();
307 if (candidate2 != candidate &&
308 !OKMergeCandidate(candidate, candidate2,
false)) {
310 tprintf(
"NC overlap failed:Candidate:");
311 candidate2->bounding_box().print();
312 tprintf(
"fails to be a good merge with:");
313 candidate->bounding_box().print();
328 ColPartition_CLIST* parts) {
333 if (part != not_this)
334 parts->add_sorted(SortByBoxLeft<ColPartition>,
true, part);
380 const ColPartition* part, ColPartition_CLIST* candidates,
bool debug,
382 int* overlap_increase) {
383 if (overlap_increase !=
NULL)
384 *overlap_increase = 0;
385 if (candidates->empty())
394 ColPartition_C_IT it(candidates);
397 TBOX full_box(part_box);
398 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
403 ColPartition_CLIST neighbours;
408 tprintf(
"Finding best merge candidate from %d, %d neighbours for box:",
409 candidates->length(), neighbours.length());
417 ColPartition_CLIST non_candidate_neighbours;
418 non_candidate_neighbours.set_subtract(SortByBoxLeft<ColPartition>,
true,
419 &neighbours, candidates);
420 int worst_nc_increase = 0;
423 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
425 if (confirm_cb !=
NULL && !confirm_cb->
Run(part, candidate)) {
427 tprintf(
"Candidate not confirmed:");
432 int increase = IncreaseInOverlap(part, candidate, ok_overlap, &neighbours);
434 if (best_candidate ==
NULL || increase < best_increase) {
435 best_candidate = candidate;
436 best_increase = increase;
439 tprintf(
"New best merge candidate has increase %d, area %d, over box:",
440 increase, best_area);
444 }
else if (increase == best_increase) {
446 if (area < best_area) {
448 best_candidate = candidate;
451 increase = IncreaseInOverlap(part, candidate, ok_overlap,
452 &non_candidate_neighbours);
453 if (increase > worst_nc_increase)
454 worst_nc_increase = increase;
456 if (best_increase > 0) {
463 if (worst_nc_increase < best_increase &&
464 TestCompatibleCandidates(*part, debug, candidates)) {
465 best_increase = worst_nc_increase;
468 if (overlap_increase !=
NULL)
469 *overlap_increase = best_increase;
470 return best_candidate;
476 ColPartition_LIST* part_list) {
489 ColPartition_LIST* big_parts) {
502 int unresolved_overlaps = 0;
506 if (neighbour == part)
520 if (!shrunken.
overlap(neighbour_box) &&
525 RemoveBadBox(excluded, part, big_parts);
530 }
else if (box.
contains(neighbour_box)) {
531 ++unresolved_overlaps;
542 RemoveBadBox(excluded, neighbour, big_parts);
551 if (neighbour_overlap_count <= part_overlap_count ||
555 if (split_blob !=
NULL) {
564 if (split_blob !=
NULL) {
571 if (right_part !=
NULL) {
578 if (unresolved_overlaps > 2 && part->
IsSingleton()) {
581 ColPartition_IT big_it(big_parts);
583 big_it.add_to_end(part);
606 bool any_changed =
false;
612 if (SmoothRegionType(nontext_map, im_box, rotation, debug, part))
622 const FCOORD& rerotation) {
623 if (scaled_color ==
NULL)
625 Pix* color_map1 =
NULL;
626 Pix* color_map2 =
NULL;
629 int width = pixGetWidth(scaled_color);
630 int height = pixGetHeight(scaled_color);
631 color_map1 = pixCreate(width, height, 32);
632 color_map2 = pixCreate(width, height, 32);
633 rms_map = pixCreate(width, height, 8);
644 color_map1, color_map2, rms_map,
647 if (color_map1 !=
NULL) {
648 pixWrite(
"swcolorinput.png", scaled_color, IFF_PNG);
649 pixWrite(
"swcolor1.png", color_map1, IFF_PNG);
650 pixWrite(
"swcolor2.png", color_map2, IFF_PNG);
651 pixWrite(
"swrms.png", rms_map, IFF_PNG);
652 pixDestroy(&color_map1);
653 pixDestroy(&color_map2);
654 pixDestroy(&rms_map);
661 ColPartition_LIST parts;
662 ColPartition_IT part_it(&parts);
668 part_it.add_after_then_move(part);
675 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
676 part = part_it.extract();
685 ColPartition_LIST parts;
686 ColPartition_IT part_it(&parts);
692 part_it.add_after_then_move(part);
700 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
701 part = part_it.extract();
733 ColPartition_LIST* part_lists =
new ColPartition_LIST[
gridheight()];
740 bool any_parts_found =
false;
748 ColPartition_IT part_it(&part_lists[grid_y]);
749 part_it.add_to_end(part);
750 any_parts_found =
true;
753 if (any_parts_found) {
754 for (
int grid_y = 0; grid_y <
gridheight(); ++grid_y) {
756 if (!part_lists[grid_y].empty()) {
762 delete [] part_lists;
763 return any_parts_found;
787 if (single_column_part ==
NULL) {
791 single_column_part->
CopyLeftTab(*single_column_part,
false);
792 single_column_part->
CopyRightTab(*single_column_part,
false);
802 if (single_column_part !=
NULL) {
826 BLOBNBOX_IT im_blob_it(im_blobs);
827 ColPartition_LIST dead_parts;
828 ColPartition_IT dead_part_it(&dead_parts);
837 BLOBNBOX_C_IT blob_it(part->
boxes());
838 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
840 im_blob_it.add_after_then_move(blob);
844 BLOBNBOX_C_IT blob_it(part->
boxes());
845 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
861 BLOBNBOX_C_IT blob_it(part->
boxes());
863 dead_part_it.add_to_end(part);
865 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
869 delete blob->
cblob();
883 ColPartition_LIST saved_parts;
884 ColPartition_IT part_it(&saved_parts);
890 part_it.add_to_end(part);
893 Init(gridsize, bleft, tright);
895 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
896 part = part_it.extract();
916 ? best_columns[gsearch.
GridY()]
918 FindPartitionMargins(columns, part);
921 tprintf(
"Computed margins for part:");
933 ColPartition_LIST* parts) {
934 ColPartition_IT part_it(parts);
935 for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
938 if (best_columns !=
NULL) {
943 columns = best_columns[grid_y];
945 FindPartitionMargins(columns, part);
951 ColPartition_LIST dead_parts;
952 ColPartition_IT dead_it(&dead_parts);
958 dead_it.add_to_end(part);
999 for (
int upper = 0; upper < 2; ++upper) {
1003 for (partner_it.mark_cycle_pt(); !partner_it.cycled_list();
1004 partner_it.forward()) {
1010 if (!partner_it.cycled_list())
continue;
1012 for (partner_it.mark_cycle_pt(); !partner_it.cycled_list();
1013 partner_it.forward()) {
1018 tprintf(
"Finding figure captions for image part:");
1020 tprintf(
"Considering partner:");
1021 partner_box.
print();
1023 if (partner_box.
left() >= part_box.
left() &&
1025 int dist = partner_box.
y_gap(part_box);
1026 if (best_caption ==
NULL || dist < best_dist) {
1028 best_caption = partner;
1034 if (best_caption !=
NULL) {
1036 tprintf(
"Best caption candidate:");
1037 best_caption->bounding_box().print();
1043 int biggest_gap = 0;
1045 int total_height = 0;
1046 int mean_height = 0;
1051 partner = next_partner) {
1052 if (!partner->IsTextType()) {
1053 end_partner = partner;
1059 if (next_partner !=
NULL) {
1062 if (gap > biggest_gap) {
1064 end_partner = next_partner;
1065 mean_height = total_height / line_count;
1066 }
else if (gap < smallest_gap) {
1077 tprintf(
"Line count=%d, biggest gap %d, smallest%d, mean height %d\n",
1078 line_count, biggest_gap, smallest_gap, mean_height);
1079 if (end_partner !=
NULL) {
1089 partner != end_partner;
1090 partner = next_partner) {
1092 partner->SetBlobTypes();
1094 tprintf(
"Set caption type for partition:");
1095 partner->bounding_box().print();
1132 int height = top - bottom;
1133 int mid_y = (bottom + top) / 2;
1141 if (neighbour == part || neighbour->
type() ==
PT_NOISE)
1145 int neighbour_y = (neighbour_bottom + neighbour_top) / 2;
1146 if (upper != (neighbour_y > mid_y))
1151 if (best_neighbour ==
NULL)
1152 best_neighbour = neighbour;
1155 int dist = upper ? neighbour_bottom - top : bottom - neighbour_top;
1157 if (dist < best_dist) {
1159 best_neighbour = neighbour;
1165 if (best_neighbour !=
NULL)
1178 int width = right - left;
1179 int mid_x = (left + right) / 2;
1187 if (neighbour == part || neighbour->
type() ==
PT_NOISE)
1191 int neighbour_x = (neighbour_left + neighbour_right) / 2;
1192 if (to_the_left != (neighbour_x < mid_x))
1198 int dist = to_the_left ? left - neighbour_right : neighbour_left - right;
1200 if (dist < best_dist || best_neighbour ==
NULL) {
1202 best_neighbour = neighbour;
1210 if (best_neighbour !=
NULL)
1211 part->
AddPartner(to_the_left, best_neighbour);
1227 get_desperate,
this);
1240 void ColPartitionGrid::FindMergeCandidates(
const ColPartition* part,
1241 const TBOX& search_box,
bool debug,
1242 ColPartition_CLIST* candidates) {
1248 rsearch.SetUniqueMode(
true);
1249 rsearch.StartRectSearch(search_box);
1251 while ((candidate = rsearch.NextRectSearch()) !=
NULL) {
1252 if (!OKMergeCandidate(part, candidate, debug))
1269 TBOX merged_box(part_box);
1270 merged_box += c_box;
1272 msearch.SetUniqueMode(
true);
1273 msearch.StartRectSearch(merged_box);
1275 while ((neighbour = msearch.NextRectSearch()) !=
NULL) {
1276 if (neighbour == part || neighbour == candidate)
1278 if (neighbour->
OKMergeOverlap(*part, *candidate, ok_overlap,
false))
1285 !OKMergeCandidate(part, neighbour,
false) &&
1286 !OKMergeCandidate(candidate, neighbour,
false))
1289 if (neighbour !=
NULL) {
1291 tprintf(
"Combined box overlaps another that is not OK despite"
1292 " allowance of %d:", ok_overlap);
1295 OKMergeCandidate(part, neighbour,
true);
1297 OKMergeCandidate(candidate, neighbour,
true);
1309 candidates->add_sorted(SortByBoxLeft<ColPartition>,
true, candidate);
1324 bool ColPartitionGrid::SmoothRegionType(Pix* nontext_map,
1326 const FCOORD& rerotation,
1328 ColPartition* part) {
1329 const TBOX& part_box = part->bounding_box();
1331 tprintf(
"Smooothing part at:");
1339 bool any_image =
false;
1340 bool all_image =
true;
1344 BlobRegionType type = SmoothInOneDirection(dir, nontext_map, im_box,
1345 rerotation, debug, *part,
1348 tprintf(
"Result in dir %d = %d at dist %d\n", dir, type, dist);
1359 if (best_dist > max_dist)
1366 if (best_type ==
BRT_TEXT && !any_image) {
1376 if (new_type != part->blob_type() || new_flow != part->flow()) {
1377 part->set_flow(new_flow);
1378 part->set_blob_type(new_type);
1379 part->SetBlobTypes();
1394 const TBOX& part_box,
1398 *search_box = part_box;
1402 padding =
MAX(padding, min_padding);
1404 search_box->
pad(padding, padding);
1407 switch (direction) {
1410 *dist_scaling =
ICOORD(2, 1);
1414 *dist_scaling =
ICOORD(1, 2);
1418 *dist_scaling =
ICOORD(2, 1);
1422 *dist_scaling =
ICOORD(1, 2);
1450 const TBOX& im_box,
const FCOORD& rerotation,
1451 bool debug,
const ColPartition& part,
int* best_distance) {
1453 TBOX part_box = part.bounding_box();
1456 ComputeSearchBoxAndScaling(direction, part_box,
gridsize(),
1457 &search_box, &dist_scaling);
1462 AccumulatePartDistances(part, dist_scaling, search_box,
1463 nontext_map, im_box, rerotation, debug, dists);
1468 memset(counts, 0,
sizeof(counts[0]) *
NPT_COUNT);
1478 if (counts[i] < dists[i].size() && dists[i][counts[i]] < min_dist)
1479 min_dist = dists[i][counts[i]];
1483 while (counts[i] < dists[i].size() && dists[i][counts[i]] <= min_dist)
1486 *best_distance = min_dist;
1488 tprintf(
"Totals: htext=%d+%d, vtext=%d+%d, image=%d+%d, at dist=%d\n",
1491 counts[
NPT_IMAGE], image_bias, min_dist);
1499 if (image_count > 0 &&
1531 void ColPartitionGrid::AccumulatePartDistances(
const ColPartition& base_part,
1532 const ICOORD& dist_scaling,
1533 const TBOX& search_box,
1536 const FCOORD& rerotation,
1539 const TBOX& part_box = base_part.bounding_box();
1541 rsearch.SetUniqueMode(
true);
1542 rsearch.StartRectSearch(search_box);
1543 ColPartition* neighbour;
1546 while ((neighbour = rsearch.NextRectSearch()) !=
NULL) {
1547 if (neighbour->IsUnMergeableType() ||
1548 !base_part.ConfirmNoTabViolation(*neighbour) ||
1549 neighbour == &base_part)
1551 TBOX nbox = neighbour->bounding_box();
1559 int x_gap =
MAX(part_box.
x_gap(nbox), 0);
1560 int y_gap =
MAX(part_box.
y_gap(nbox), 0);
1561 int n_dist = x_gap * dist_scaling.
x() + y_gap* dist_scaling.
y();
1563 tprintf(
"Part has x-gap=%d, y=%d, dist=%d at:",
1564 x_gap, y_gap, n_dist);
1586 if (debug)
tprintf(
"Weak %d\n", n_boxes);
1589 if (debug)
tprintf(
"Image %d\n", n_boxes);
1591 if (count_vector !=
NULL) {
1592 for (
int i = 0; i < n_boxes; ++i)
1607 void ColPartitionGrid::FindPartitionMargins(ColPartitionSet* columns,
1608 ColPartition* part) {
1611 int y = part->MidY();
1613 int left_margin =
bleft().
x();
1614 int right_margin =
tright().
x();
1615 if (columns !=
NULL) {
1616 ColPartition* column = columns->ColumnContaining(box.
left(), y);
1618 left_margin = column->LeftAtY(y);
1619 column = columns->ColumnContaining(box.
right(), y);
1621 right_margin = column->RightAtY(y);
1626 left_margin = FindMargin(box.
left() + box.
height(),
true, left_margin,
1628 part->set_left_margin(left_margin);
1630 right_margin = FindMargin(box.
right() - box.
height(),
false, right_margin,
1632 part->set_right_margin(right_margin);
1638 int ColPartitionGrid::FindMargin(
int x,
bool right_to_left,
int x_limit,
1639 int y_bottom,
int y_top,
1640 const ColPartition* not_this) {
1641 int height = y_top - y_bottom;
1644 side_search.SetUniqueMode(
true);
1645 side_search.StartSideSearch(x, y_bottom, y_top);
1647 while ((part = side_search.NextSideSearch(right_to_left)) !=
NULL) {
1649 if (part == not_this)
1653 TBOX box = part->bounding_box();
1654 int min_overlap =
MIN(height, box.
height());
1657 if (y_overlap < min_overlap)
1660 int x_edge = right_to_left ? box.
right() : box.
left();
1661 if ((x_edge < x) != right_to_left)
1664 if ((x_edge < x_limit) == right_to_left)