本文整理汇总了C++中WERD::gblob_list方法的典型用法代码示例。如果您正苦于以下问题:C++ WERD::gblob_list方法的具体用法?C++ WERD::gblob_list怎么用?C++ WERD::gblob_list使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类WERD
的用法示例。
在下文中一共展示了WERD::gblob_list方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: apply_box_training
void apply_box_training(BLOCK_LIST *block_list) {
BLOCK_IT block_it(block_list);
ROW_IT row_it;
ROW *row;
WERD_IT word_it;
WERD *word;
WERD *bln_word;
WERD copy_outword; // copy to denorm
PBLOB_IT blob_it;
DENORM denorm;
INT16 count = 0;
char ch[2];
ch[1] = '\0';
tprintf ("Generating training data\n");
for (block_it.mark_cycle_pt ();
!block_it.cycled_list (); block_it.forward ()) {
row_it.set_to_list (block_it.data ()->row_list ());
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
row = row_it.data ();
word_it.set_to_list (row->word_list ());
for (word_it.mark_cycle_pt ();
!word_it.cycled_list (); word_it.forward ()) {
word = word_it.data ();
if ((strlen (word->text ()) == 1) &&
(word->gblob_list ()->length () == 1)) {
/* Here is a word with a single char label and a single blob so train on it */
bln_word =
make_bln_copy (word, row, row->x_height (), &denorm);
blob_it.set_to_list (bln_word->blob_list ());
ch[0] = *word->text ();
tess_training_tester (blob_it.data (),
//single blob
&denorm, TRUE, //correct
ch, //correct ASCII char
1, //ASCII length
NULL);
copy_outword = *(bln_word);
copy_outword.baseline_denormalise (&denorm);
blob_it.set_to_list (copy_outword.blob_list ());
ch[0] = *word->text ();
delete bln_word;
count++;
}
}
}
}
tprintf ("Generated training data for %d blobs\n", count);
}
示例2: apply_box_testing
void apply_box_testing(BLOCK_LIST *block_list) {
BLOCK_IT block_it(block_list);
ROW_IT row_it;
ROW *row;
INT16 row_count = 0;
WERD_IT word_it;
WERD *word;
WERD *bln_word;
INT16 word_count = 0;
PBLOB_IT blob_it;
DENORM denorm;
INT16 count = 0;
char ch[2];
WERD *outword; //bln best choice
//segmentation
WERD_CHOICE *best_choice; //tess output
WERD_CHOICE *raw_choice; //top choice permuter
//detailed results
BLOB_CHOICE_LIST_CLIST blob_choices;
INT16 char_count = 0;
INT16 correct_count = 0;
INT16 err_count = 0;
INT16 rej_count = 0;
#ifndef SECURE_NAMES
WERDSTATS wordstats; //As from newdiff
#endif
char tess_rej_str[3];
char tess_long_str[3];
ch[1] = '\0';
strcpy (tess_rej_str, "|A");
strcpy (tess_long_str, "|B");
for (block_it.mark_cycle_pt ();
!block_it.cycled_list (); block_it.forward ()) {
row_it.set_to_list (block_it.data ()->row_list ());
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
row = row_it.data ();
row_count++;
word_count = 0;
word_it.set_to_list (row->word_list ());
for (word_it.mark_cycle_pt ();
!word_it.cycled_list (); word_it.forward ()) {
word = word_it.data ();
word_count++;
if ((strlen (word->text ()) == 1) &&
!STRING (applybox_test_exclusions).contains (*word->text ())
&& (word->gblob_list ()->length () == 1)) {
/* Here is a word with a single char label and a single blob so test it */
bln_word =
make_bln_copy (word, row, row->x_height (), &denorm);
blob_it.set_to_list (bln_word->blob_list ());
ch[0] = *word->text ();
char_count++;
best_choice = tess_segment_pass1 (bln_word,
&denorm,
tess_default_matcher,
raw_choice,
&blob_choices, outword);
/*
Test for TESS screw up on word. Recog_word has already ensured that the
choice list, outword blob lists and best_choice string are the same
length. A TESS screw up is indicated by a blank filled or 0 length string.
*/
if ((best_choice->string ().length () == 0) ||
(strspn (best_choice->string ().string (), " ") ==
best_choice->string ().length ())) {
rej_count++;
tprintf ("%d:%d: \"%s\" -> TESS FAILED\n",
row_count, word_count, ch);
#ifndef SECURE_NAMES
wordstats.word (tess_rej_str, 2, ch, 1);
#endif
}
else {
if ((best_choice->string ().length () !=
outword->blob_list ()->length ()) ||
(best_choice->string ().length () !=
blob_choices.length ())) {
tprintf
("ASSERT FAIL String:\"%s\"; Strlen=%d; #Blobs=%d; #Choices=%d\n",
best_choice->string ().string (),
best_choice->string ().length (),
outword->blob_list ()->length (),
blob_choices.length ());
}
ASSERT_HOST (best_choice->string ().length () ==
outword->blob_list ()->length ());
ASSERT_HOST (best_choice->string ().length () ==
blob_choices.length ());
fix_quotes ((char *) best_choice->string ().string (),
//turn to double
outword, &blob_choices);
if (strcmp (best_choice->string ().string (), ch) != 0) {
err_count++;
tprintf ("%d:%d: \"%s\" -> \"%s\"\n",
row_count, word_count, ch,
best_choice->string ().string ());
}
//.........这里部分代码省略.........
示例3: resegment_box
INT16 resegment_box( //
ROW *row,
BOX box,
char *ch,
INT16 block_id,
INT16 row_id,
INT16 boxfile_lineno,
INT16 boxfile_charno) {
WERD_IT word_it;
WERD *word;
WERD *new_word = NULL;
BOOL8 polyg = false;
PBLOB_IT blob_it;
PBLOB_IT new_blob_it;
PBLOB *blob;
PBLOB *new_blob;
OUTLINE_IT outline_it;
OUTLINE_LIST dummy; // Just to initialize new_outline_it.
OUTLINE_IT new_outline_it = &dummy;
OUTLINE *outline;
BOX new_word_box;
float word_x_centre;
float baseline;
INT16 error_count = 0; //number of chars lost
word_it.set_to_list (row->word_list ());
for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
word = word_it.data ();
polyg = word->flag (W_POLYGON);
if (word->bounding_box ().overlap (box)) {
blob_it.set_to_list (word->gblob_list ());
for (blob_it.mark_cycle_pt ();
!blob_it.cycled_list (); blob_it.forward ()) {
blob = blob_it.data ();
if (gblob_bounding_box (blob, polyg).overlap (box)) {
outline_it.set_to_list (gblob_out_list (blob, polyg));
for (outline_it.mark_cycle_pt ();
!outline_it.cycled_list (); outline_it.forward ()) {
outline = outline_it.data ();
if (goutline_bounding_box (outline, polyg).
major_overlap (box)) {
if (strlen (word->text ()) > 0) {
if (error_count == 0) {
error_count = 1;
if (applybox_debug > 4)
report_failed_box (boxfile_lineno,
boxfile_charno,
box, ch,
"FAILURE! box overlaps blob in labelled word");
}
if (applybox_debug > 4)
tprintf
("APPLY_BOXES: ALSO ignoring corrupted char blk:%d row:%d \"%s\"\n",
block_id, row_id,
word_it.data ()->text ());
word_it.data ()->set_text ("");
//UN label it
error_count++;
}
if (error_count == 0) {
if (new_word == NULL) {
/* Make a new word with a single blob */
new_word = word->shallow_copy ();
new_word->set_text (ch);
if (polyg)
new_blob = new PBLOB;
else
new_blob = (PBLOB *) new C_BLOB;
new_blob_it.set_to_list (new_word->
gblob_list ());
new_blob_it.add_to_end (new_blob);
new_outline_it.
set_to_list (gblob_out_list
(new_blob, polyg));
}
new_outline_it.add_to_end (outline_it.
extract ());
//move blob
}
}
}
//no outlines in blob
if (outline_it.empty ())
//so delete blob
delete blob_it.extract ();
}
}
if (blob_it.empty ()) //no blobs in word
//so delete word
delete word_it.extract ();
}
}
if (error_count > 0)
return error_count;
if (new_word != NULL) {
gblob_sort_list (new_word->gblob_list (), polyg);
word_it.add_to_end (new_word);
new_word_box = new_word->bounding_box ();
//.........这里部分代码省略.........
示例4: tidy_up
/*************************************************************************
* tidy_up()
* - report >1 block
* - sort the words in each row.
* - report any rows with no labelled words.
* - report any remaining unlabelled words
* - report total labelled words
*
*************************************************************************/
void tidy_up( //
BLOCK_LIST *block_list, //real blocks
INT16 &ok_char_count,
INT16 &ok_row_count,
INT16 &unlabelled_words,
INT16 *tgt_char_counts,
INT16 &rebalance_count,
char &min_char,
INT16 &min_samples,
INT16 &final_labelled_blob_count) {
BLOCK_IT block_it(block_list);
ROW_IT row_it;
ROW *row;
WERD_IT word_it;
WERD *word;
WERD *duplicate_word;
INT16 block_idx = 0;
INT16 row_idx;
INT16 all_row_idx = 0;
BOOL8 row_ok;
BOOL8 rebalance_needed = FALSE;
//No. of unique labelled samples
INT16 labelled_char_counts[128];
INT16 i;
char ch;
char prev_ch = '\0';
BOOL8 at_dupe_of_prev_word;
ROW *prev_row = NULL;
INT16 left;
INT16 prev_left = -1;
for (i = 0; i < 128; i++)
labelled_char_counts[i] = 0;
ok_char_count = 0;
ok_row_count = 0;
unlabelled_words = 0;
if ((applybox_debug > 4) && (block_it.length () != 1))
tprintf ("APPLY_BOXES: More than one block??\n");
for (block_it.mark_cycle_pt ();
!block_it.cycled_list (); block_it.forward ()) {
block_idx++;
row_idx = 0;
row_ok = FALSE;
row_it.set_to_list (block_it.data ()->row_list ());
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
row_idx++;
all_row_idx++;
row = row_it.data ();
word_it.set_to_list (row->word_list ());
word_it.sort (word_comparator);
for (word_it.mark_cycle_pt ();
!word_it.cycled_list (); word_it.forward ()) {
word = word_it.data ();
if (strlen (word->text ()) == 0) {
unlabelled_words++;
if (applybox_debug > 4) {
tprintf
("APPLY_BOXES: Unlabelled word blk:%d row:%d allrows:%d\n",
block_idx, row_idx, all_row_idx);
}
}
else {
if (word->gblob_list ()->length () != 1)
tprintf
("APPLY_BOXES: FATALITY - MULTIBLOB Labelled word blk:%d row:%d allrows:%d\n",
block_idx, row_idx, all_row_idx);
ok_char_count++;
labelled_char_counts[*word->text ()]++;
row_ok = TRUE;
}
}
if ((applybox_debug > 4) && (!row_ok)) {
tprintf
("APPLY_BOXES: Row with no labelled words blk:%d row:%d allrows:%d\n",
block_idx, row_idx, all_row_idx);
}
else
ok_row_count++;
}
}
min_samples = 9999;
for (i = 0; i < 128; i++) {
if (tgt_char_counts[i] > labelled_char_counts[i]) {
if (labelled_char_counts[i] <= 1) {
tprintf
("APPLY_BOXES: FATALITY - %d labelled samples of \"%c\" - target is %d\n",
//.........这里部分代码省略.........
示例5: block_it
ROW *find_row_of_box( //
BLOCK_LIST *block_list, //real blocks
BOX box, //from boxfile
INT16 &block_id,
INT16 &row_id_to_process) {
BLOCK_IT block_it(block_list);
BLOCK *block;
ROW_IT row_it;
ROW *row;
ROW *row_to_process = NULL;
INT16 row_id;
WERD_IT word_it;
WERD *word;
BOOL8 polyg;
PBLOB_IT blob_it;
PBLOB *blob;
OUTLINE_IT outline_it;
OUTLINE *outline;
/*
Find row to process - error if box REALLY overlaps more than one row. (I.e
it overlaps blobs in the row - not just overlaps the bounding box of the
whole row.)
*/
block_id = 0;
for (block_it.mark_cycle_pt ();
!block_it.cycled_list (); block_it.forward ()) {
block_id++;
row_id = 0;
block = block_it.data ();
if (block->bounding_box ().overlap (box)) {
row_it.set_to_list (block->row_list ());
for (row_it.mark_cycle_pt ();
!row_it.cycled_list (); row_it.forward ()) {
row_id++;
row = row_it.data ();
if (row->bounding_box ().overlap (box)) {
word_it.set_to_list (row->word_list ());
for (word_it.mark_cycle_pt ();
!word_it.cycled_list (); word_it.forward ()) {
word = word_it.data ();
polyg = word->flag (W_POLYGON);
if (word->bounding_box ().overlap (box)) {
blob_it.set_to_list (word->gblob_list ());
for (blob_it.mark_cycle_pt ();
!blob_it.cycled_list (); blob_it.forward ()) {
blob = blob_it.data ();
if (gblob_bounding_box (blob, polyg).
overlap (box)) {
outline_it.
set_to_list (gblob_out_list
(blob, polyg));
for (outline_it.mark_cycle_pt ();
!outline_it.cycled_list ();
outline_it.forward ()) {
outline = outline_it.data ();
if (goutline_bounding_box
(outline, polyg).major_overlap (box)) {
if ((row_to_process == NULL) ||
(row_to_process == row)) {
row_to_process = row;
row_id_to_process = row_id;
}
else
/* RETURN ERROR Box overlaps blobs in more than one row */
return NULL;
}
}
}
}
}
}
}
}
}
}
return row_to_process;
}