当前位置: 首页>>代码示例>>C++>>正文


C++ WERD::gblob_list方法代码示例

本文整理汇总了C++中WERD::gblob_list方法的典型用法代码示例。如果您正苦于以下问题:C++ WERD::gblob_list方法的具体用法?C++ WERD::gblob_list怎么用?C++ WERD::gblob_list使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在WERD的用法示例。


在下文中一共展示了WERD::gblob_list方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: apply_box_training

void apply_box_training(BLOCK_LIST *block_list) {
  BLOCK_IT block_it(block_list);
  ROW_IT row_it;
  ROW *row;
  WERD_IT word_it;
  WERD *word;
  WERD *bln_word;
  WERD copy_outword;             // copy to denorm
  PBLOB_IT blob_it;
  DENORM denorm;
  INT16 count = 0;
  char ch[2];

  ch[1] = '\0';

  tprintf ("Generating training data\n");
  for (block_it.mark_cycle_pt ();
  !block_it.cycled_list (); block_it.forward ()) {
    row_it.set_to_list (block_it.data ()->row_list ());
    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
      row = row_it.data ();
      word_it.set_to_list (row->word_list ());
      for (word_it.mark_cycle_pt ();
      !word_it.cycled_list (); word_it.forward ()) {
        word = word_it.data ();
        if ((strlen (word->text ()) == 1) &&
        (word->gblob_list ()->length () == 1)) {
          /* Here is a word with a single char label and a single blob so train on it */
          bln_word =
            make_bln_copy (word, row, row->x_height (), &denorm);
          blob_it.set_to_list (bln_word->blob_list ());
          ch[0] = *word->text ();
          tess_training_tester (blob_it.data (),
                                 //single blob
            &denorm, TRUE,       //correct
            ch,                  //correct ASCII char
            1,                   //ASCII length
            NULL);
          copy_outword = *(bln_word);
          copy_outword.baseline_denormalise (&denorm);
          blob_it.set_to_list (copy_outword.blob_list ());
          ch[0] = *word->text ();
          delete bln_word;
          count++;
        }
      }
    }
  }
  tprintf ("Generated training data for %d blobs\n", count);
}
开发者ID:jan-ruzicka,项目名称:tesseract-ocr-sf,代码行数:50,代码来源:applybox.cpp

示例2: apply_box_testing

void apply_box_testing(BLOCK_LIST *block_list) {
  BLOCK_IT block_it(block_list);
  ROW_IT row_it;
  ROW *row;
  INT16 row_count = 0;
  WERD_IT word_it;
  WERD *word;
  WERD *bln_word;
  INT16 word_count = 0;
  PBLOB_IT blob_it;
  DENORM denorm;
  INT16 count = 0;
  char ch[2];
  WERD *outword;                 //bln best choice
  //segmentation
  WERD_CHOICE *best_choice;      //tess output
  WERD_CHOICE *raw_choice;       //top choice permuter
                                 //detailed results
  BLOB_CHOICE_LIST_CLIST blob_choices;
  INT16 char_count = 0;
  INT16 correct_count = 0;
  INT16 err_count = 0;
  INT16 rej_count = 0;
  #ifndef SECURE_NAMES
  WERDSTATS wordstats;           //As from newdiff
  #endif
  char tess_rej_str[3];
  char tess_long_str[3];

  ch[1] = '\0';
  strcpy (tess_rej_str, "|A");
  strcpy (tess_long_str, "|B");

  for (block_it.mark_cycle_pt ();
  !block_it.cycled_list (); block_it.forward ()) {
    row_it.set_to_list (block_it.data ()->row_list ());
    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
      row = row_it.data ();
      row_count++;
      word_count = 0;
      word_it.set_to_list (row->word_list ());
      for (word_it.mark_cycle_pt ();
      !word_it.cycled_list (); word_it.forward ()) {
        word = word_it.data ();
        word_count++;
        if ((strlen (word->text ()) == 1) &&
          !STRING (applybox_test_exclusions).contains (*word->text ())
        && (word->gblob_list ()->length () == 1)) {
          /* Here is a word with a single char label and a single blob so test it */
          bln_word =
            make_bln_copy (word, row, row->x_height (), &denorm);
          blob_it.set_to_list (bln_word->blob_list ());
          ch[0] = *word->text ();
          char_count++;
          best_choice = tess_segment_pass1 (bln_word,
            &denorm,
            tess_default_matcher,
            raw_choice,
            &blob_choices, outword);

          /*
            Test for TESS screw up on word. Recog_word has already ensured that the
            choice list, outword blob lists and best_choice string are the same
            length. A TESS screw up is indicated by a blank filled or 0 length string.
          */
          if ((best_choice->string ().length () == 0) ||
            (strspn (best_choice->string ().string (), " ") ==
          best_choice->string ().length ())) {
            rej_count++;
            tprintf ("%d:%d: \"%s\" -> TESS FAILED\n",
              row_count, word_count, ch);
            #ifndef SECURE_NAMES
            wordstats.word (tess_rej_str, 2, ch, 1);
            #endif
          }
          else {
            if ((best_choice->string ().length () !=
              outword->blob_list ()->length ()) ||
              (best_choice->string ().length () !=
            blob_choices.length ())) {
              tprintf
                ("ASSERT FAIL String:\"%s\"; Strlen=%d; #Blobs=%d; #Choices=%d\n",
                best_choice->string ().string (),
                best_choice->string ().length (),
                outword->blob_list ()->length (),
                blob_choices.length ());
            }
            ASSERT_HOST (best_choice->string ().length () ==
              outword->blob_list ()->length ());
            ASSERT_HOST (best_choice->string ().length () ==
              blob_choices.length ());
            fix_quotes ((char *) best_choice->string ().string (),
                                 //turn to double
              outword, &blob_choices);
            if (strcmp (best_choice->string ().string (), ch) != 0) {
              err_count++;
              tprintf ("%d:%d: \"%s\" -> \"%s\"\n",
                row_count, word_count, ch,
                best_choice->string ().string ());
            }
//.........这里部分代码省略.........
开发者ID:jan-ruzicka,项目名称:tesseract-ocr-sf,代码行数:101,代码来源:applybox.cpp

示例3: resegment_box

INT16 resegment_box(  //
                    ROW *row,
                    BOX box,
                    char *ch,
                    INT16 block_id,
                    INT16 row_id,
                    INT16 boxfile_lineno,
                    INT16 boxfile_charno) {
  WERD_IT word_it;
  WERD *word;
  WERD *new_word = NULL;
  BOOL8 polyg = false;
  PBLOB_IT blob_it;
  PBLOB_IT new_blob_it;
  PBLOB *blob;
  PBLOB *new_blob;
  OUTLINE_IT outline_it;
  OUTLINE_LIST dummy;  // Just to initialize new_outline_it.
  OUTLINE_IT new_outline_it = &dummy;
  OUTLINE *outline;
  BOX new_word_box;
  float word_x_centre;
  float baseline;
  INT16 error_count = 0;         //number of chars lost

  word_it.set_to_list (row->word_list ());
  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
    word = word_it.data ();
    polyg = word->flag (W_POLYGON);
    if (word->bounding_box ().overlap (box)) {
      blob_it.set_to_list (word->gblob_list ());
      for (blob_it.mark_cycle_pt ();
      !blob_it.cycled_list (); blob_it.forward ()) {
        blob = blob_it.data ();
        if (gblob_bounding_box (blob, polyg).overlap (box)) {
          outline_it.set_to_list (gblob_out_list (blob, polyg));
          for (outline_it.mark_cycle_pt ();
          !outline_it.cycled_list (); outline_it.forward ()) {
            outline = outline_it.data ();
            if (goutline_bounding_box (outline, polyg).
            major_overlap (box)) {
              if (strlen (word->text ()) > 0) {
                if (error_count == 0) {
                  error_count = 1;
                  if (applybox_debug > 4)
                    report_failed_box (boxfile_lineno,
                      boxfile_charno,
                      box, ch,
                      "FAILURE! box overlaps blob in labelled word");
                }
                if (applybox_debug > 4)
                  tprintf
                    ("APPLY_BOXES: ALSO ignoring corrupted char blk:%d row:%d \"%s\"\n",
                    block_id, row_id,
                    word_it.data ()->text ());
                word_it.data ()->set_text ("");
                //UN label it
                error_count++;
              }

              if (error_count == 0) {
                if (new_word == NULL) {
                                 /* Make a new word with a single blob */
                  new_word = word->shallow_copy ();
                  new_word->set_text (ch);
                  if (polyg)
                    new_blob = new PBLOB;
                  else
                    new_blob = (PBLOB *) new C_BLOB;
                  new_blob_it.set_to_list (new_word->
                    gblob_list ());
                  new_blob_it.add_to_end (new_blob);
                  new_outline_it.
                    set_to_list (gblob_out_list
                    (new_blob, polyg));
                }
                new_outline_it.add_to_end (outline_it.
                  extract ());
                //move blob
              }
            }
          }
                                 //no outlines in blob
          if (outline_it.empty ())
                                 //so delete blob
            delete blob_it.extract ();
        }
      }
      if (blob_it.empty ())      //no blobs in word
                                 //so delete word
          delete word_it.extract ();
    }
  }
  if (error_count > 0)
    return error_count;

  if (new_word != NULL) {
    gblob_sort_list (new_word->gblob_list (), polyg);
    word_it.add_to_end (new_word);
    new_word_box = new_word->bounding_box ();
//.........这里部分代码省略.........
开发者ID:jan-ruzicka,项目名称:tesseract-ocr-sf,代码行数:101,代码来源:applybox.cpp

示例4: tidy_up

/*************************************************************************
 * tidy_up()
 *   - report >1 block
 *   - sort the words in each row.
 *   - report any rows with no labelled words.
 *   - report any remaining unlabelled words
 *		- report total labelled words
 *
 *************************************************************************/
void tidy_up(                         //
             BLOCK_LIST *block_list,  //real blocks
             INT16 &ok_char_count,
             INT16 &ok_row_count,
             INT16 &unlabelled_words,
             INT16 *tgt_char_counts,
             INT16 &rebalance_count,
             char &min_char,
             INT16 &min_samples,
             INT16 &final_labelled_blob_count) {
  BLOCK_IT block_it(block_list);
  ROW_IT row_it;
  ROW *row;
  WERD_IT word_it;
  WERD *word;
  WERD *duplicate_word;
  INT16 block_idx = 0;
  INT16 row_idx;
  INT16 all_row_idx = 0;
  BOOL8 row_ok;
  BOOL8 rebalance_needed = FALSE;
                                 //No. of unique labelled samples
  INT16 labelled_char_counts[128];
  INT16 i;
  char ch;
  char prev_ch = '\0';
  BOOL8 at_dupe_of_prev_word;
  ROW *prev_row = NULL;
  INT16 left;
  INT16 prev_left = -1;

  for (i = 0; i < 128; i++)
    labelled_char_counts[i] = 0;

  ok_char_count = 0;
  ok_row_count = 0;
  unlabelled_words = 0;
  if ((applybox_debug > 4) && (block_it.length () != 1))

    tprintf ("APPLY_BOXES: More than one block??\n");

  for (block_it.mark_cycle_pt ();
  !block_it.cycled_list (); block_it.forward ()) {
    block_idx++;
    row_idx = 0;
    row_ok = FALSE;
    row_it.set_to_list (block_it.data ()->row_list ());
    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
      row_idx++;
      all_row_idx++;
      row = row_it.data ();
      word_it.set_to_list (row->word_list ());
      word_it.sort (word_comparator);
      for (word_it.mark_cycle_pt ();
      !word_it.cycled_list (); word_it.forward ()) {
        word = word_it.data ();
        if (strlen (word->text ()) == 0) {
          unlabelled_words++;
          if (applybox_debug > 4) {
            tprintf
              ("APPLY_BOXES: Unlabelled word blk:%d row:%d allrows:%d\n",
              block_idx, row_idx, all_row_idx);
          }
        }
        else {
          if (word->gblob_list ()->length () != 1)
            tprintf
              ("APPLY_BOXES: FATALITY - MULTIBLOB Labelled word blk:%d row:%d allrows:%d\n",
              block_idx, row_idx, all_row_idx);

          ok_char_count++;
          labelled_char_counts[*word->text ()]++;
          row_ok = TRUE;
        }
      }
      if ((applybox_debug > 4) && (!row_ok)) {
        tprintf
          ("APPLY_BOXES: Row with no labelled words blk:%d row:%d allrows:%d\n",
          block_idx, row_idx, all_row_idx);
      }
      else
        ok_row_count++;
    }
  }

  min_samples = 9999;
  for (i = 0; i < 128; i++) {
    if (tgt_char_counts[i] > labelled_char_counts[i]) {
      if (labelled_char_counts[i] <= 1) {
        tprintf
          ("APPLY_BOXES: FATALITY - %d labelled samples of \"%c\" - target is %d\n",
//.........这里部分代码省略.........
开发者ID:jan-ruzicka,项目名称:tesseract-ocr-sf,代码行数:101,代码来源:applybox.cpp

示例5: block_it

ROW *find_row_of_box(                         //
                     BLOCK_LIST *block_list,  //real blocks
                     BOX box,                 //from boxfile
                     INT16 &block_id,
                     INT16 &row_id_to_process) {
  BLOCK_IT block_it(block_list);
  BLOCK *block;
  ROW_IT row_it;
  ROW *row;
  ROW *row_to_process = NULL;
  INT16 row_id;
  WERD_IT word_it;
  WERD *word;
  BOOL8 polyg;
  PBLOB_IT blob_it;
  PBLOB *blob;
  OUTLINE_IT outline_it;
  OUTLINE *outline;

  /*
    Find row to process - error if box REALLY overlaps more than one row. (I.e
    it overlaps blobs in the row - not just overlaps the bounding box of the
    whole row.)
  */

  block_id = 0;
  for (block_it.mark_cycle_pt ();
  !block_it.cycled_list (); block_it.forward ()) {
    block_id++;
    row_id = 0;
    block = block_it.data ();
    if (block->bounding_box ().overlap (box)) {
      row_it.set_to_list (block->row_list ());
      for (row_it.mark_cycle_pt ();
      !row_it.cycled_list (); row_it.forward ()) {
        row_id++;
        row = row_it.data ();
        if (row->bounding_box ().overlap (box)) {
          word_it.set_to_list (row->word_list ());
          for (word_it.mark_cycle_pt ();
          !word_it.cycled_list (); word_it.forward ()) {
            word = word_it.data ();
            polyg = word->flag (W_POLYGON);
            if (word->bounding_box ().overlap (box)) {
              blob_it.set_to_list (word->gblob_list ());
              for (blob_it.mark_cycle_pt ();
              !blob_it.cycled_list (); blob_it.forward ()) {
                blob = blob_it.data ();
                if (gblob_bounding_box (blob, polyg).
                overlap (box)) {
                  outline_it.
                    set_to_list (gblob_out_list
                    (blob, polyg));
                  for (outline_it.mark_cycle_pt ();
                    !outline_it.cycled_list ();
                  outline_it.forward ()) {
                    outline = outline_it.data ();
                    if (goutline_bounding_box
                    (outline, polyg).major_overlap (box)) {
                      if ((row_to_process == NULL) ||
                      (row_to_process == row)) {
                        row_to_process = row;
                        row_id_to_process = row_id;
                      }
                      else
                        /* RETURN ERROR Box overlaps blobs in more than one row  */
                        return NULL;
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  }
  return row_to_process;
}
开发者ID:jan-ruzicka,项目名称:tesseract-ocr-sf,代码行数:79,代码来源:applybox.cpp


注:本文中的WERD::gblob_list方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。