39 STATE *output_best_state,
43 tprintf(
"Starting SegSearch on ratings matrix:\n");
72 for (col = 0; col < ratings->
dimension(); ++col) {
85 pain_points, chunks_record, blamer_bundle,
89 float pain_point_priority;
91 output_best_state, best_choice, raw_choice, best_char_choices);
102 SEG_SEARCH_PENDING_LIST *pending =
103 new SEG_SEARCH_PENDING_LIST[ratings->
dimension()];
106 for (row = 0; row < ratings->
dimension(); ++row) {
108 pending[0].add_sorted(
114 pain_points, &best_choice_bundle, blamer_bundle);
117 int num_futile_classifications = 0;
120 (blamer_bundle !=
NULL &&
121 blamer_bundle->segsearch_is_looking_for_blame)) {
125 pop =
HeapPop(pain_points, &pain_point_priority, &pain_point);
126 if (pop ==
EMPTY)
break;
127 if (pain_point->
Valid(*ratings) &&
140 chunks_record, pain_points, blamer_bundle);
143 chunks_record, pain_points, &best_choice_bundle,
145 if (!best_choice_bundle.
updated) ++num_futile_classifications;
148 tprintf(
"num_futile_classifications %d\n", num_futile_classifications);
151 best_choice_bundle.
updated =
false;
157 blamer_bundle->incorrect_result_reason ==
IRR_CORRECT &&
158 !blamer_bundle->segsearch_is_looking_for_blame &&
159 blamer_bundle->truth_has_char_boxes &&
161 best_choice, blamer_bundle->truth_text)) {
163 pain_points, blamer_bundle, &blamer_debug);
167 blamer_bundle, &blamer_debug);
170 tprintf(
"Done with SegSearch (AcceptableChoiceFound: %d)\n",
176 delete[] best_path_by_column;
178 for (row = 0; row < ratings->
dimension(); ++row) {
179 for (col = 0; col <= row; ++col) {
180 BLOB_CHOICE_LIST *rating = ratings->
get(col, row);
188 SEG_SEARCH_PENDING_LIST *pending[],
195 for (
int col = starting_col; col < ratings->
dimension(); ++col) {
197 tprintf(
"\n\nUpdateSegSearchNodes: evaluate children in col=%d\n", col);
200 SEG_SEARCH_PENDING_LIST *pending_list = &((*pending)[col]);
201 SEG_SEARCH_PENDING_IT pending_it(pending_list);
203 while (!pending_it.empty()) {
206 if (non_empty_rows.
length() == 0 ||
210 BLOB_CHOICE_LIST *current_node = ratings->
get(col, p->
child_row);
213 current_node, p->
parent, pain_points,
214 best_path_by_column, chunks_record,
215 best_choice_bundle, blamer_bundle);
221 for (
int child_row = child_col;
222 child_row < ratings->
dimension(); ++child_row) {
228 (*pending)[child_col].add_sorted_and_find(
230 if (new_pending != actual_new_pending)
delete new_pending;
231 actual_new_pending->
changed |= new_changed;
233 tprintf(
"Added child(col=%d row=%d) parent(col=%d row=%d)"
234 " changed=0x%x to pending\n", child_col,
242 pending_it.forward();
246 pain_points, best_path_by_column, chunks_record);
249 if (best_choice_bundle->
updated) {
251 pain_points, chunks_record, best_choice_bundle);
260 SEG_SEARCH_PENDING_LIST *pending[],
265 tprintf(
"Classifying pain point priority=%.4f, col=%d, row=%d\n",
266 pain_point_priority, pain_point.
col, pain_point.
row);
272 pain_point.
col, pain_point.
row, blamer_bundle);
273 ratings->
put(pain_point.
col, pain_point.
row, classified);
277 ratings->
get(pain_point.
col, pain_point.
row),
284 if (!classified->empty()) {
285 float worst_piece_cert;
287 if (pain_point.
col > 0) {
290 &worst_piece_cert, &fragmented);
292 pain_point.
col-1, pain_point.
row,
false,
294 worst_piece_cert, fragmented, best_choice->
certainty(),
296 chunks_record, pain_points);
301 &worst_piece_cert, &fragmented);
303 pain_point.
col, pain_point.
row+1,
true,
305 worst_piece_cert, fragmented, best_choice->
certainty(),
307 chunks_record, pain_points);
312 int parent_row = pain_point.
col - 1;
313 if (parent_row < 0) {
314 (*pending)[pain_point.
col].add_sorted(
319 for (
int parent_col = 0; parent_col < pain_point.
col; ++parent_col) {
321 (*pending)[pain_point.
col].add_sorted(
324 ratings->
get(parent_col, parent_row),
338 tprintf(
"segsearch starting to look for blame\n");
342 float pain_point_priority;
344 while ((pop =
HeapPop(pain_points, &pain_point_priority,
345 &pain_point)) !=
EMPTY) {
350 *blamer_debug +=
"Correct segmentation:\n";
357 *blamer_debug +=
"\n";
365 NULL,
NULL, chunks_record, pain_points)) {
367 *blamer_debug +=
"\nFailed to insert pain point\n";
393 *blamer_debug =
"Best choice is: incorrect, top choice, dictionary word";
394 *blamer_debug +=
" with permuter ";
400 *blamer_debug +=
"Correct segmentation state was not explored";
406 *blamer_debug +=
"Correct segmentation paths were pruned by LM\n";
408 char debug_buffer[256];
409 *blamer_debug +=
"Best correct segmentation rating ";
410 sprintf(debug_buffer,
"%g",
412 *blamer_debug += debug_buffer;
413 *blamer_debug +=
" vs. best choice rating ";
414 sprintf(debug_buffer,
"%g", best_choice->
rating());
415 *blamer_debug += debug_buffer;