当前位置: 首页>>代码示例>>C++>>正文


C++ STRING::string方法代码示例

本文整理汇总了C++中STRING::string方法的典型用法代码示例。如果您正苦于以下问题:C++ STRING::string方法的具体用法?C++ STRING::string怎么用?C++ STRING::string使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在STRING的用法示例。


在下文中一共展示了STRING::string方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: word_display


//.........这里部分代码省略.........
      }
      image_win->Pen(color);
      TBOX box = box_word->BlobBox(i);
      image_win->Rectangle(box.left(), box.bottom(), box.right(), box.top());
    }
    return true;
  }
  /*
    Note the double coercions of(COLOUR)((inT32)editor_image_word_bb_color)
    etc. are to keep the compiler happy.
  */
                                 // display bounding box
  if (word->display_flag(DF_BOX)) {
    word->bounding_box().plot(image_win,
     (ScrollView::Color)((inT32)
      editor_image_word_bb_color),
     (ScrollView::Color)((inT32)
      editor_image_word_bb_color));

    ScrollView::Color c = (ScrollView::Color)
       ((inT32) editor_image_blob_bb_color);
    image_win->Pen(c);
    c_it.set_to_list(word->cblob_list());
    for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward())
      c_it.data()->bounding_box().plot(image_win);
    displayed_something = TRUE;
  }

                                 // display edge steps
  if (word->display_flag(DF_EDGE_STEP)) {     // edgesteps available
    word->plot(image_win);      // rainbow colors
    displayed_something = TRUE;
  }

                                 // display poly approx
  if (word->display_flag(DF_POLYGONAL)) {
                                 // need to convert
    TWERD* tword = TWERD::PolygonalCopy(poly_allow_detailed_fx, word);
    tword->plot(image_win);
    delete tword;
    displayed_something = TRUE;
  }

  // Display correct text and blamer information.
  STRING text;
  STRING blame;
  if (word->display_flag(DF_TEXT) && word->text() != NULL) {
    text = word->text();
  }
  if (word->display_flag(DF_BLAMER) &&
      !(word_res->blamer_bundle != NULL &&
        word_res->blamer_bundle->incorrect_result_reason() == IRR_CORRECT)) {
    text = "";
    const BlamerBundle *blamer_bundle = word_res->blamer_bundle;
    if (blamer_bundle == NULL) {
      text += "NULL";
    } else {
      text = blamer_bundle->TruthString();
    }
    text += " -> ";
    STRING best_choice_str;
    if (word_res->best_choice == NULL) {
      best_choice_str = "NULL";
    } else {
      word_res->best_choice->string_and_lengths(&best_choice_str, NULL);
    }
    text += best_choice_str;
    IncorrectResultReason reason = (blamer_bundle == NULL) ?
        IRR_PAGE_LAYOUT : blamer_bundle->incorrect_result_reason();
    ASSERT_HOST(reason < IRR_NUM_REASONS)
    blame += " [";
    blame += BlamerBundle::IncorrectReasonName(reason);
    blame += "]";
  }
  if (text.length() > 0) {
    word_bb = word->bounding_box();
    image_win->Pen(ScrollView::RED);
    word_height = word_bb.height();
    int text_height = 0.50 * word_height;
    if (text_height > 20) text_height = 20;
    image_win->TextAttributes("Arial", text_height, false, false, false);
    shift = (word_height < word_bb.width()) ? 0.25 * word_height : 0.0f;
    image_win->Text(word_bb.left() + shift,
                    word_bb.bottom() + 0.25 * word_height, text.string());
    if (blame.length() > 0) {
      image_win->Text(word_bb.left() + shift,
                      word_bb.bottom() + 0.25 * word_height - text_height,
                      blame.string());
    }

    displayed_something = TRUE;
  }

  if (!displayed_something)      // display BBox anyway
    word->bounding_box().plot(image_win,
     (ScrollView::Color)((inT32) editor_image_word_bb_color),
     (ScrollView::Color)((inT32)
      editor_image_word_bb_color));
  return TRUE;
}
开发者ID:Annyjain29,项目名称:tess-two,代码行数:101,代码来源:pgedit.cpp

示例2: SegmentPage

/**
 * Segment the page according to the current value of tessedit_pageseg_mode.
 * pix_binary_ is used as the source image and should not be NULL.
 * On return the blocks list owns all the constructed page layout.
 */
int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
                           Tesseract* osd_tess, OSResults* osr) {
  ASSERT_HOST(pix_binary_ != NULL);
  int width = pixGetWidth(pix_binary_);
  int height = pixGetHeight(pix_binary_);
  // Get page segmentation mode.
  PageSegMode pageseg_mode = static_cast<PageSegMode>(
      static_cast<int>(tessedit_pageseg_mode));
  // If a UNLV zone file can be found, use that instead of segmentation.
  if (!PSM_COL_FIND_ENABLED(pageseg_mode) &&
      input_file != NULL && input_file->length() > 0) {
    STRING name = *input_file;
    const char* lastdot = strrchr(name.string(), '.');
    if (lastdot != NULL)
      name[lastdot - name.string()] = '\0';
    read_unlv_file(name, width, height, blocks);
  }
  if (blocks->empty()) {
    // No UNLV file present. Work according to the PageSegMode.
    // First make a single block covering the whole image.
    BLOCK_IT block_it(blocks);
    BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height);
    block->set_right_to_left(right_to_left());
    block_it.add_to_end(block);
  } else {
    // UNLV file present. Use PSM_SINGLE_BLOCK.
    pageseg_mode = PSM_SINGLE_BLOCK;
  }
  int auto_page_seg_ret_val = 0;
  TO_BLOCK_LIST to_blocks;
  if (PSM_OSD_ENABLED(pageseg_mode) || PSM_BLOCK_FIND_ENABLED(pageseg_mode) ||
      PSM_SPARSE(pageseg_mode)) {
    auto_page_seg_ret_val =
        AutoPageSeg(pageseg_mode, blocks, &to_blocks, osd_tess, osr);
    if (pageseg_mode == PSM_OSD_ONLY)
      return auto_page_seg_ret_val;
    // To create blobs from the image region bounds uncomment this line:
    //  to_blocks.clear();  // Uncomment to go back to the old mode.
  } else {
    deskew_ = FCOORD(1.0f, 0.0f);
    reskew_ = FCOORD(1.0f, 0.0f);
    if (pageseg_mode == PSM_CIRCLE_WORD) {
      Pix* pixcleaned = RemoveEnclosingCircle(pix_binary_);
      if (pixcleaned != NULL) {
        pixDestroy(&pix_binary_);
        pix_binary_ = pixcleaned;
      }
    }
  }

  if (auto_page_seg_ret_val < 0) {
    return -1;
  }

  if (blocks->empty()) {
    if (textord_debug_tabfind)
      tprintf("Empty page\n");
    return 0;  // AutoPageSeg found an empty page.
  }
  bool splitting =
      pageseg_devanagari_split_strategy != ShiroRekhaSplitter::NO_SPLIT;
  bool cjk_mode = textord_use_cjk_fp_model;

  textord_.TextordPage(pageseg_mode, reskew_, width, height, pix_binary_,
                       pix_thresholds_, pix_grey_, splitting || cjk_mode,
                       blocks, &to_blocks);
  return auto_page_seg_ret_val;
}
开发者ID:vkbrad,项目名称:AndroidOCR,代码行数:73,代码来源:pagesegmain.cpp

示例3:

static void CallWithUTF8(TessCallback1<const char *> *cb,
                         const WERD_CHOICE *wc) {
  STRING s;
  wc->string_and_lengths(&s, nullptr);
  cb->Run(s.string());
}
开发者ID:Shreeshrii,项目名称:tesseract,代码行数:6,代码来源:dawg.cpp

示例4: main

int main(int argc, char **argv) {
#ifdef USING_GETTEXT
  setlocale (LC_ALL, "");
  bindtextdomain (PACKAGE, LOCALEDIR);
  textdomain (PACKAGE);
#endif
  if ((argc == 2 && strcmp(argv[1], "-v") == 0) ||
      (argc == 2 && strcmp(argv[1], "--version") == 0)) {
    char *versionStrP;

    fprintf(stderr, "tesseract %s\n", tesseract::TessBaseAPI::Version());

    versionStrP = getLeptonicaVersion();
    fprintf(stderr, " %s\n", versionStrP);
    lept_free(versionStrP);

    versionStrP = getImagelibVersions();
    fprintf(stderr, "  %s\n", versionStrP);
    lept_free(versionStrP);

    exit(0);
  }

  // Make the order of args a bit more forgiving than it used to be.
  const char* lang = "eng";
  const char* image = NULL;
  const char* output = NULL;
  bool noocr = false;
  bool list_langs = false;
  bool print_parameters = false;

  tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO;
  int arg = 1;
  while (arg < argc && (output == NULL || argv[arg][0] == '-')) {
    if (strcmp(argv[arg], "-l") == 0 && arg + 1 < argc) {
      lang = argv[arg + 1];
      ++arg;
    } else if (strcmp(argv[arg], "-psm") == 0 && arg + 1 < argc) {
      pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[arg + 1]));
      ++arg;
    } else if (strcmp(argv[arg], "--print-parameters") == 0) {
      noocr = true;
      print_parameters = true;
    } else if (strcmp(argv[arg], "-c") == 0 && arg + 1 < argc) {
      // handled properly after api init
      ++arg;
    } else if (image == NULL) {
      image = argv[arg];
    } else if (output == NULL) {
      output = argv[arg];
    }
    ++arg;
  }

  if (argc == 2 && strcmp(argv[1], "--list-langs") == 0) {
    list_langs = true;
    noocr = true;
  }

  if (output == NULL && noocr == false) {
    fprintf(stderr, _("Usage:%s imagename outputbase|stdout [-l lang] "
                      "[-psm pagesegmode] [-c configvar=value] "
                      "[configfile...]\n\n"), argv[0]);
    fprintf(stderr,
            _("pagesegmode values are:\n"
              "0 = Orientation and script detection (OSD) only.\n"
              "1 = Automatic page segmentation with OSD.\n"
              "2 = Automatic page segmentation, but no OSD, or OCR\n"
              "3 = Fully automatic page segmentation, but no OSD. (Default)\n"
              "4 = Assume a single column of text of variable sizes.\n"
              "5 = Assume a single uniform block of vertically aligned text.\n"
              "6 = Assume a single uniform block of text.\n"
              "7 = Treat the image as a single text line.\n"
              "8 = Treat the image as a single word.\n"
              "9 = Treat the image as a single word in a circle.\n"
              "10 = Treat the image as a single character.\n"));
    fprintf(stderr, _("multiple -c arguments are allowed.\n"));
    fprintf(stderr, _("-l lang, -psm pagesegmode and any -c options must occur"
                      "before any configfile.\n\n"));
    fprintf(stderr, _("Single options:\n"));
    fprintf(stderr, _("  -v --version: version info\n"));
    fprintf(stderr, _("  --list-langs: list available languages for tesseract "
                      "engine\n"));
    fprintf(stderr, _("  --print-parameters: print tesseract parameters to the "
                      "stdout\n"));
    exit(1);
  }

  tesseract::TessBaseAPI api;

  STRING tessdata_dir;
  truncate_path(argv[0], &tessdata_dir);
  api.SetOutputName(output);
  int rc = api.Init(tessdata_dir.string(), lang, tesseract::OEM_DEFAULT,
                &(argv[arg]), argc - arg, NULL, NULL, false);

  if (rc) {
    fprintf(stderr, _("Could not initialize tesseract.\n"));
    exit(1);
  }
//.........这里部分代码省略.........
开发者ID:ArunPandiyan,项目名称:textfairy,代码行数:101,代码来源:tesseractmain.cpp

示例5: SegmentPage

// Segment the page according to the current value of tessedit_pageseg_mode.
// If the pix_binary_ member is not NULL, it is used as the source image,
// and copied to image, otherwise it just uses image as the input.
// On return the blocks list owns all the constructed page layout.
int Tesseract::SegmentPage(const STRING* input_file,
                           IMAGE* image, BLOCK_LIST* blocks) {
  int width = image->get_xsize();
  int height = image->get_ysize();
  int resolution = image->get_res();
#ifdef HAVE_LIBLEPT
  if (pix_binary_ != NULL) {
    width = pixGetWidth(pix_binary_);
    height = pixGetHeight(pix_binary_);
    resolution = pixGetXRes(pix_binary_);
  }
#endif
  // Zero resolution messes up the algorithms, so make sure it is credible.
  if (resolution < kMinCredibleResolution)
    resolution = kDefaultResolution;
  // Get page segmentation mode.
  PageSegMode pageseg_mode = static_cast<PageSegMode>(
      static_cast<int>(tessedit_pageseg_mode));
  // If a UNLV zone file can be found, use that instead of segmentation.
  if (pageseg_mode != tesseract::PSM_AUTO &&
      input_file != NULL && input_file->length() > 0) {
    STRING name = *input_file;
    const char* lastdot = strrchr(name.string(), '.');
    if (lastdot != NULL)
      name[lastdot - name.string()] = '\0';
    read_unlv_file(name, width, height, blocks);
  }
  bool single_column = pageseg_mode > PSM_AUTO;
  if (blocks->empty()) {
    // No UNLV file present. Work according to the PageSegMode.
    // First make a single block covering the whole image.
    BLOCK_IT block_it(blocks);
    BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height);
    block_it.add_to_end(block);
  } else {
    // UNLV file present. Use PSM_SINGLE_COLUMN.
    pageseg_mode = PSM_SINGLE_COLUMN;
  }

  TO_BLOCK_LIST land_blocks, port_blocks;
  TBOX page_box;
  if (pageseg_mode <= PSM_SINGLE_COLUMN) {
    if (AutoPageSeg(width, height, resolution, single_column,
                    image, blocks, &port_blocks) < 0) {
      return -1;
    }
    // To create blobs from the image region bounds uncomment this line:
    //  port_blocks.clear();  // Uncomment to go back to the old mode.
  } else {
#if HAVE_LIBLEPT
    image->FromPix(pix_binary_);
#endif
    deskew_ = FCOORD(1.0f, 0.0f);
    reskew_ = FCOORD(1.0f, 0.0f);
  }
  if (blocks->empty()) {
    tprintf("Empty page\n");
    return 0;  // AutoPageSeg found an empty page.
  }

  if (port_blocks.empty()) {
    // AutoPageSeg was not used, so we need to find_components first.
    find_components(blocks, &land_blocks, &port_blocks, &page_box);
  } else {
    // AutoPageSeg does not need to find_components as it did that already.
    page_box.set_left(0);
    page_box.set_bottom(0);
    page_box.set_right(width);
    page_box.set_top(height);
    // Filter_blobs sets up the TO_BLOCKs the same as find_components does.
    filter_blobs(page_box.topright(), &port_blocks, true);
  }

  TO_BLOCK_IT to_block_it(&port_blocks);
  ASSERT_HOST(!port_blocks.empty());
  TO_BLOCK* to_block = to_block_it.data();
  if (pageseg_mode <= PSM_SINGLE_BLOCK ||
      to_block->line_size < 2) {
    // For now, AUTO, SINGLE_COLUMN and SINGLE_BLOCK all map to the old
    // textord. The difference is the number of blocks and how the are made.
    textord_page(page_box.topright(), blocks, &land_blocks, &port_blocks,
                 this);
  } else {
    // SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
    float gradient = make_single_row(page_box.topright(),
                                     to_block, &port_blocks, this);
    if (pageseg_mode == PSM_SINGLE_LINE) {
      // SINGLE_LINE uses the old word maker on the single line.
      make_words(page_box.topright(), gradient, blocks,
                 &land_blocks, &port_blocks, this);
    } else {
      // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a
      // single word, and in SINGLE_CHAR mode, all the outlines
      // go in a single blob.
      make_single_word(pageseg_mode == PSM_SINGLE_CHAR,
                       to_block->get_rows(), to_block->block->row_list());
//.........这里部分代码省略.........
开发者ID:mk219533,项目名称:tesseract-ocr,代码行数:101,代码来源:pagesegmain.cpp

示例6: main

// Main program to combine/extract/overwrite tessdata components
// in [lang].traineddata files.
//
// To combine all the individual tessdata components (unicharset, DAWGs,
// classifier templates, ambiguities, language configs) located at, say,
// /home/$USER/temp/eng.* run:
//
//   combine_tessdata /home/$USER/temp/eng.
//
// The result will be a combined tessdata file /home/$USER/temp/eng.traineddata
//
// Specify option -e if you would like to extract individual components
// from a combined traineddata file. For DC, to extract language config
// file and the unicharset from tessdata/eng.traineddata run:
//
//   combine_tessdata -e tessdata/eng.traineddata
//   /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset
//
// The desired config file and unicharset will be written to
// /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset
//
// Specify option -o to overwrite individual components of the given
// [lang].traineddata file. For DC, to overwrite language config
// and unichar ambiguities files in tessdata/eng.traineddata use:
//
//   combine_tessdata -o tessdata/eng.traineddata
//   /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharambigs
//
// As a result, tessdata/eng.traineddata will contain the new language config
// and unichar ambigs, plus all the original DAWGs, classifier teamples, etc.
//
// Note: the file names of the files to extract to and to overwrite from should
// have the appropriate file suffixes (extensions) indicating their tessdata
// component type (.unicharset for the unicharset, .unicharambigs for unichar
// ambigs, etc). See k*FileSuffix variable in ccutil/tessdatamanager.h.
//
// Specify option -u to unpack all the components to the specified path:
//
// combine_tessdata -u tessdata/eng.traineddata /home/$USER/temp/eng.
//
// This will create  /home/$USER/temp/eng.* files with individual tessdata
// components from tessdata/eng.traineddata.
//
int main(int argc, char **argv) {
  int i;
  if (argc == 2) {
    printf("Combining tessdata files\n");
    STRING lang = argv[1];
    char* last = &argv[1][strlen(argv[1])-1];
    if (*last != '.')
      lang += '.';
    STRING output_file = lang;
    output_file += kTrainedDataSuffix;
    if (!tesseract::TessdataManager::CombineDataFiles(
        lang.string(), output_file.string())) {
      printf("Error combining tessdata files into %s\n",
             output_file.string());
    } else {
      printf("Output %s created sucessfully.\n", output_file.string());
    }
  } else if (argc >= 4 && (strcmp(argv[1], "-e") == 0 ||
                           strcmp(argv[1], "-u") == 0)) {
    // Initialize TessdataManager with the data in the given traineddata file.
    tesseract::TessdataManager tm;
    tm.Init(argv[2], 0);
    printf("Extracting tessdata components from %s\n", argv[2]);
    if (strcmp(argv[1], "-e") == 0) {
      for (i = 3; i < argc; ++i) {
        if (tm.ExtractToFile(argv[i])) {
          printf("Wrote %s\n", argv[i]);
        } else {
          printf("Not extracting %s, since this component"
                 " is not present\n", argv[i]);
        }
      }
    } else {  // extract all the components
      for (i = 0; i < tesseract::TESSDATA_NUM_ENTRIES; ++i) {
        STRING filename = argv[3];
        char* last = &argv[3][strlen(argv[3])-1];
        if (*last != '.')
          filename += '.';
        filename += tesseract::kTessdataFileSuffixes[i];
        if (tm.ExtractToFile(filename.string())) {
          printf("Wrote %s\n", filename.string());
        }
      }
    }
    tm.End();
  } else if (argc >= 4 && strcmp(argv[1], "-o") == 0) {
    // Rename the current traineddata file to a temporary name.
    const char *new_traineddata_filename = argv[2];
    STRING traineddata_filename = new_traineddata_filename;
    traineddata_filename += ".__tmp__";
    if (rename(new_traineddata_filename, traineddata_filename.string()) != 0) {
      tprintf("Failed to create a temporary file %s\n",
              traineddata_filename.string());
      exit(1);
    }

    // Initialize TessdataManager with the data in the given traineddata file.
//.........这里部分代码省略.........
开发者ID:Dhavalc2012,项目名称:Opticial-Character-Recognisation,代码行数:101,代码来源:combine_tessdata.cpp

示例7: main

// Main program to combine/extract/overwrite tessdata components
// in [lang].traineddata files.
//
// To combine all the individual tessdata components (unicharset, DAWGs,
// classifier templates, ambiguities, language configs) located at, say,
// /home/$USER/temp/eng.* run:
//
//   combine_tessdata /home/$USER/temp/eng.
//
// The result will be a combined tessdata file /home/$USER/temp/eng.traineddata
//
// Specify option -e if you would like to extract individual components
// from a combined traineddata file. For example, to extract language config
// file and the unicharset from tessdata/eng.traineddata run:
//
//   combine_tessdata -e tessdata/eng.traineddata
//   /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset
//
// The desired config file and unicharset will be written to
// /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset
//
// Specify option -o to overwrite individual components of the given
// [lang].traineddata file. For example, to overwrite language config
// and unichar ambiguities files in tessdata/eng.traineddata use:
//
//   combine_tessdata -o tessdata/eng.traineddata
//   /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharambigs
//
// As a result, tessdata/eng.traineddata will contain the new language config
// and unichar ambigs, plus all the original DAWGs, classifier teamples, etc.
//
// Note: the file names of the files to extract to and to overwrite from should
// have the appropriate file suffixes (extensions) indicating their tessdata
// component type (.unicharset for the unicharset, .unicharambigs for unichar
// ambigs, etc). See k*FileSuffix variable in ccutil/tessdatamanager.h.
//
// Specify option -u to unpack all the components to the specified path:
//
// combine_tessdata -u tessdata/eng.traineddata /home/$USER/temp/eng.
//
// This will create  /home/$USER/temp/eng.* files with individual tessdata
// components from tessdata/eng.traineddata.
//
int main(int argc, char **argv) {
  tesseract::CheckSharedLibraryVersion();

  int i;
  tesseract::TessdataManager tm;
  if (argc > 1 && (!strcmp(argv[1], "-v") || !strcmp(argv[1], "--version"))) {
    printf("%s\n", tesseract::TessBaseAPI::Version());
    return EXIT_SUCCESS;
  } else if (argc == 2) {
    printf("Combining tessdata files\n");
    STRING lang = argv[1];
    char* last = &argv[1][strlen(argv[1])-1];
    if (*last != '.')
      lang += '.';
    STRING output_file = lang;
    output_file += kTrainedDataSuffix;
    if (!tm.CombineDataFiles(lang.string(), output_file.string())) {
      printf("Error combining tessdata files into %s\n",
             output_file.string());
    } else {
      printf("Output %s created successfully.\n", output_file.string());
    }
  } else if (argc >= 4 && (strcmp(argv[1], "-e") == 0 ||
                           strcmp(argv[1], "-u") == 0)) {
    // Initialize TessdataManager with the data in the given traineddata file.
    if (!tm.Init(argv[2])) {
      tprintf("Failed to read %s\n", argv[2]);
      return EXIT_FAILURE;
    }
    printf("Extracting tessdata components from %s\n", argv[2]);
    if (strcmp(argv[1], "-e") == 0) {
      for (i = 3; i < argc; ++i) {
        errno = 0;
        if (tm.ExtractToFile(argv[i])) {
          printf("Wrote %s\n", argv[i]);
        } else if (errno == 0) {
          printf("Not extracting %s, since this component"
                 " is not present\n", argv[i]);
          return EXIT_FAILURE;
        } else {
          printf("Error, could not extract %s: %s\n",
                 argv[i], strerror(errno));
          return EXIT_FAILURE;
        }
      }
    } else {  // extract all the components
      for (i = 0; i < tesseract::TESSDATA_NUM_ENTRIES; ++i) {
        STRING filename = argv[3];
        char* last = &argv[3][strlen(argv[3])-1];
        if (*last != '.')
          filename += '.';
        filename += tesseract::kTessdataFileSuffixes[i];
        errno = 0;
        if (tm.ExtractToFile(filename.string())) {
          printf("Wrote %s\n", filename.string());
        } else if (errno != 0) {
          printf("Error, could not extract %s: %s\n",
//.........这里部分代码省略.........
开发者ID:Shreeshrii,项目名称:tesseract,代码行数:101,代码来源:combine_tessdata.cpp

示例8: recog_training_segmented

// This function takes tif/box pair of files and runs recognition on the image,
// while making sure that the word bounds that tesseract identified roughly
// match to those specified by the input box file. For each word (ngram in a
// single bounding box from the input box file) it outputs the ocred result,
// the correct label, rating and certainty.
    void Tesseract::recog_training_segmented(const STRING &fname,
                                             PAGE_RES *page_res,
                                             volatile ETEXT_DESC *monitor,
                                             FILE *output_file) {
        STRING box_fname = fname;
        const char *lastdot = strrchr(box_fname.string(), '.');
        if (lastdot != NULL) box_fname[lastdot - box_fname.string()] = '\0';
        box_fname += ".box";
        // read_next_box() will close box_file
        FILE *box_file = open_file(box_fname.string(), "r");

        PAGE_RES_IT page_res_it;
        page_res_it.page_res = page_res;
        page_res_it.restart_page();
        STRING label;

        // Process all the words on this page.
        TBOX tbox;  // tesseract-identified box
        TBOX bbox;  // box from the box file
        bool keep_going;
        int line_number = 0;
        int examined_words = 0;
        do {
            keep_going = read_t(&page_res_it, &tbox);
            keep_going &= ReadNextBox(applybox_page, &line_number, box_file, &label,
                                      &bbox);
            // Align bottom left points of the TBOXes.
            while (keep_going &&
                   !NearlyEqual<int>(tbox.bottom(), bbox.bottom(), kMaxBoxEdgeDiff)) {
                if (bbox.bottom() < tbox.bottom()) {
                    page_res_it.forward();
                    keep_going = read_t(&page_res_it, &tbox);
                } else {
                    keep_going = ReadNextBox(applybox_page, &line_number, box_file, &label,
                                             &bbox);
                }
            }
            while (keep_going &&
                   !NearlyEqual<int>(tbox.left(), bbox.left(), kMaxBoxEdgeDiff)) {
                if (bbox.left() > tbox.left()) {
                    page_res_it.forward();
                    keep_going = read_t(&page_res_it, &tbox);
                } else {
                    keep_going = ReadNextBox(applybox_page, &line_number, box_file, &label,
                                             &bbox);
                }
            }
            // OCR the word if top right points of the TBOXes are similar.
            if (keep_going &&
                NearlyEqual<int>(tbox.right(), bbox.right(), kMaxBoxEdgeDiff) &&
                NearlyEqual<int>(tbox.top(), bbox.top(), kMaxBoxEdgeDiff)) {
                ambigs_classify_and_output(label.string(), &page_res_it, output_file);
                examined_words++;
            }
            page_res_it.forward();
        } while (keep_going);
        fclose(box_file);

        // Set up scripts on all of the words that did not get sent to
        // ambigs_classify_and_output.  They all should have, but if all the
        // werd_res's don't get uch_sets, tesseract will crash when you try
        // to iterate over them. :-(
        int total_words = 0;
        for (page_res_it.restart_page(); page_res_it.block() != NULL;
             page_res_it.forward()) {
            if (page_res_it.word()) {
                if (page_res_it.word()->uch_set == NULL)
                    page_res_it.word()->SetupFake(unicharset);
                total_words++;
            }
        }
        if (examined_words < 0.85 * total_words) {
            tprintf("TODO(antonova): clean up recog_training_segmented; "
                            " It examined only a small fraction of the ambigs image.\n");
        }
        tprintf("recog_training_segmented: examined %d / %d words.\n",
                examined_words, total_words);
    }
开发者ID:mehulsbhatt,项目名称:MyOCRTEST,代码行数:83,代码来源:recogtraining.cpp

示例9: ParseCommandLineFlags

void ParseCommandLineFlags(const char* usage,
                           int* argc, char*** argv,
                           const bool remove_flags) {
  if (*argc == 1) {
    tprintf("USAGE: %s\n", usage);
    PrintCommandLineFlags();
    exit(0);
  }

  unsigned int i = 1;
  for (i = 1; i < *argc; ++i) {
    const char* current_arg = (*argv)[i];
    // If argument does not start with a hyphen then break.
    if (current_arg[0] != '-') {
      break;
    }
    // Position current_arg after startings hyphens. We treat a sequence of
    // consecutive hyphens of any length identically.
    while (*current_arg == '-') {
      ++current_arg;
    }
    // If this is asking for usage, print the help message and abort.
    if (!strcmp(current_arg, "help") ||
        !strcmp(current_arg, "helpshort")) {
      tprintf("USAGE: %s\n", usage);
      PrintCommandLineFlags();
      exit(0);
    }
    // Find the starting position of the value if it was specified in this
    // string.
    const char* equals_position = strchr(current_arg, '=');
    const char* rhs = NULL;
    if (equals_position != NULL) {
      rhs = equals_position + 1;
    }
    // Extract the flag name.
    STRING lhs;
    if (equals_position == NULL) {
      lhs = current_arg;
    } else {
      lhs.assign(current_arg, equals_position - current_arg);
    }
    if (!lhs.length()) {
      tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
      exit(1);
    }

    // Find the flag name in the list of global flags.
    // inT32 flag
    inT32 int_val;
    if (IntFlagExists(lhs.string(), &int_val)) {
      if (rhs != NULL) {
        if (!strlen(rhs)) {
          // Bad input of the format --int_flag=
          tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
          exit(1);
        }
        if (!SafeAtoi(rhs, &int_val)) {
          tprintf("ERROR: Could not parse int from %s in flag %s\n",
                  rhs, (*argv)[i]);
          exit(1);
        }
      } else {
        // We need to parse the next argument
        if (i + 1 >= *argc) {
          tprintf("ERROR: Could not find value argument for flag %s\n",
                  lhs.string());
          exit(1);
        } else {
          ++i;
          if (!SafeAtoi((*argv)[i], &int_val)) {
            tprintf("ERROR: Could not parse inT32 from %s\n", (*argv)[i]);
            exit(1);
          }
        }
      }
      SetIntFlagValue(lhs.string(), int_val);
      continue;
    }

    // double flag
    double double_val;
    if (DoubleFlagExists(lhs.string(), &double_val)) {
      if (rhs != NULL) {
        if (!strlen(rhs)) {
          // Bad input of the format --double_flag=
          tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
          exit(1);
        }
        if (!SafeAtod(rhs, &double_val)) {
          tprintf("ERROR: Could not parse double from %s in flag %s\n",
                  rhs, (*argv)[i]);
          exit(1);
        }
      } else {
        // We need to parse the next argument
        if (i + 1 >= *argc) {
          tprintf("ERROR: Could not find value argument for flag %s\n",
                  lhs.string());
          exit(1);
//.........这里部分代码省略.........
开发者ID:0xkasun,项目名称:tesseract,代码行数:101,代码来源:commandlineflags.cpp

示例10: edges_and_textord

void edges_and_textord(                       //read .pb file
                       const char *filename,  //.pb file
                       BLOCK_LIST *blocks) {
  BLOCK *block;                  //current block
  char *lastdot;                 //of name
  STRING name = filename;        //truncated name
  ICOORD page_tr;
  BOX page_box;                  //bounding_box
  PDBLK_CLIST pd_blocks;         //copy of list
  BLOCK_IT block_it = blocks;    //iterator
  PDBLK_C_IT pd_it = &pd_blocks; //iterator
                                 //different orientations
  TO_BLOCK_LIST land_blocks, port_blocks;
  IMAGE thresh_image;            //thresholded

  lastdot = strrchr (name.string (), '.');
  if (lastdot != NULL)
    *lastdot = '\0';
  if (page_image.get_bpp () == 0) {
    name += tessedit_image_ext;
    if (page_image.read_header (name.string ()))
      CANTOPENFILE.error ("edges_and_textord", EXIT, name.string ());
    if (page_image.read (0))
      READFAILED.error ("edges_and_textord", EXIT, name.string ());
    name = filename;
    lastdot = strrchr (name.string (), '.');
    if (lastdot != NULL)
      *lastdot = '\0';
  }
  page_tr = ICOORD (page_image.get_xsize (), page_image.get_ysize ());
  read_pd_file (name, page_image.get_xsize (), page_image.get_ysize (),
    blocks);
  block_it.set_to_list (blocks);
  if (global_monitor != NULL)
    global_monitor->ocr_alive = TRUE;

  if (page_image.get_bpp () > 1) {
    set_global_loc_code(LOC_ADAPTIVE);
    for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
    block_it.forward ()) {
      block = block_it.data ();
      pd_it.add_after_then_move (block);
    }
    //              adaptive_threshold(&page_image,&pd_blocks,&thresh_image);
    set_global_loc_code(LOC_EDGE_PROG);
#ifndef EMBEDDED
    previous_cpu = clock ();
#endif
    for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
    block_it.forward ()) {
      block = block_it.data ();
      if (!polygon_tess_approximation)
        invert_image(&page_image);
#ifndef GRAPHICS_DISABLED
      extract_edges(NO_WINDOW, &page_image, &thresh_image, page_tr, block);
#else
      extract_edges(&page_image, &thresh_image, page_tr, block);
#endif
      page_box += block->bounding_box ();
    }
    page_image = thresh_image;   //everyone else gets it
  }
  else {
    set_global_loc_code(LOC_EDGE_PROG);
    if (!page_image.white_high ())
      invert_image(&page_image);

#ifndef EMBEDDED
    previous_cpu = clock ();
#endif

    for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
    block_it.forward ()) {
      block = block_it.data ();
#ifndef GRAPHICS_DISABLED
      extract_edges(NO_WINDOW, &page_image, &page_image, page_tr, block);
#else
      extract_edges(&page_image, &page_image, page_tr, block);
#endif
      page_box += block->bounding_box ();
    }
  }
  if (global_monitor != NULL) {
    global_monitor->ocr_alive = TRUE;
    global_monitor->progress = 10;
  }

  assign_blobs_to_blocks2(blocks, &land_blocks, &port_blocks);
  if (global_monitor != NULL)
    global_monitor->ocr_alive = TRUE;
  filter_blobs (page_box.topright (), &land_blocks, textord_test_landscape);
#ifndef EMBEDDED
  previous_cpu = clock ();
#endif
  filter_blobs (page_box.topright (), &port_blocks, !textord_test_landscape);
  if (global_monitor != NULL)
    global_monitor->ocr_alive = TRUE;
  textord_page (page_box.topright (), blocks, &land_blocks, &port_blocks);
}
开发者ID:jan-ruzicka,项目名称:tesseract-ocr-sf,代码行数:99,代码来源:tordmain.cpp

示例11: BuildListOfAllLeaves

// Find all editable parameters used within tesseract and create a
// SVMenuNode tree from it.
// TODO (wanke): This is actually sort of hackish.
SVMenuNode* ParamsEditor::BuildListOfAllLeaves(tesseract::Tesseract *tess) {
  SVMenuNode* mr = new SVMenuNode();
  ParamContent_LIST vclist;
  ParamContent_IT vc_it(&vclist);
  // Amount counts the number of entries for a specific char*.
  // TODO(rays) get rid of the use of std::map.
  std::map<const char*, int> amount;

  // Add all parameters to a list.
  int v, i;
  int num_iterations = (tess->params() == NULL) ? 1 : 2;
  for (v = 0; v < num_iterations; ++v) {
    tesseract::ParamsVectors *vec = (v == 0) ? GlobalParams() : tess->params();
    for (i = 0; i < vec->int_params.size(); ++i) {
      vc_it.add_after_then_move(new ParamContent(vec->int_params[i]));
    }
    for (i = 0; i < vec->bool_params.size(); ++i) {
      vc_it.add_after_then_move(new ParamContent(vec->bool_params[i]));
    }
    for (i = 0; i < vec->string_params.size(); ++i) {
      vc_it.add_after_then_move(new ParamContent(vec->string_params[i]));
    }
    for (i = 0; i < vec->double_params.size(); ++i) {
      vc_it.add_after_then_move(new ParamContent(vec->double_params[i]));
    }
  }

  // Count the # of entries starting with a specific prefix.
  for (vc_it.mark_cycle_pt(); !vc_it.cycled_list(); vc_it.forward()) {
    ParamContent* vc = vc_it.data();
    STRING tag;
    STRING tag2;
    STRING tag3;

    GetPrefixes(vc->GetName(), &tag, &tag2, &tag3);
    amount[tag.string()]++;
    amount[tag2.string()]++;
    amount[tag3.string()]++;
  }

  vclist.sort(ParamContent::Compare);  // Sort the list alphabetically.

  SVMenuNode* other = mr->AddChild("OTHER");

  // go through the list again and this time create the menu structure.
  vc_it.move_to_first();
  for (vc_it.mark_cycle_pt(); !vc_it.cycled_list(); vc_it.forward()) {
    ParamContent* vc = vc_it.data();
    STRING tag;
    STRING tag2;
    STRING tag3;
    GetPrefixes(vc->GetName(), &tag, &tag2, &tag3);

    if (amount[tag.string()] == 1) {
      other->AddChild(vc->GetName(), vc->GetId(), vc->GetValue().string(),
                      vc->GetDescription());
    } else {  // More than one would use this submenu -> create submenu.
      SVMenuNode* sv = mr->AddChild(tag.string());
      if ((amount[tag.string()] <= MAX_ITEMS_IN_SUBMENU) ||
          (amount[tag2.string()] <= 1)) {
        sv->AddChild(vc->GetName(), vc->GetId(),
                     vc->GetValue().string(), vc->GetDescription());
      } else {  // Make subsubmenus.
        SVMenuNode* sv2 = sv->AddChild(tag2.string());
        sv2->AddChild(vc->GetName(), vc->GetId(),
                      vc->GetValue().string(), vc->GetDescription());
      }
    }
  }
  return mr;
}
开发者ID:0xkasun,项目名称:tesseract,代码行数:74,代码来源:paramsd.cpp

示例12: main


//.........这里部分代码省略.........
  PERF_COUNT_START("Tesseract:main")
  tesseract::TessBaseAPI api;

  api.SetOutputName(output);
  int rc = api.Init(datapath, lang, tesseract::OEM_DEFAULT,
                &(argv[arg]), argc - arg, NULL, NULL, false);

  if (rc) {
    fprintf(stderr, "Could not initialize tesseract.\n");
    exit(1);
  }

  char opt1[255], opt2[255];
  for (arg = 0; arg < argc; arg++) {
    if (strcmp(argv[arg], "-c") == 0 && arg + 1 < argc) {
      strncpy(opt1, argv[arg + 1], 255);
      *(strchr(opt1, '=')) = 0;
      strncpy(opt2, strchr(argv[arg + 1], '=') + 1, 255);
      opt2[254] = 0;
      ++arg;

      if (!api.SetVariable(opt1, opt2)) {
        fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2);
      }
    }
  }

  if (list_langs) {
     GenericVector<STRING> languages;
     api.GetAvailableLanguagesAsVector(&languages);
     fprintf(stderr, "List of available languages (%d):\n",
             languages.size());
     for (int index = 0; index < languages.size(); ++index) {
       STRING& string = languages[index];
       fprintf(stderr, "%s\n", string.string());
     }
     api.End();
     exit(0);
  }

  if (print_parameters) {
     FILE* fout = stdout;
     fprintf(stdout, "Tesseract parameters:\n");
     api.PrintVariables(fout);
     api.End();
     exit(0);
  }

  // We have 2 possible sources of pagesegmode: a config file and
  // the command line. For backwards compatability reasons, the
  // default in tesseract is tesseract::PSM_SINGLE_BLOCK, but the
  // default for this program is tesseract::PSM_AUTO. We will let
  // the config file take priority, so the command-line default
  // can take priority over the tesseract default, so we use the
  // value from the command line only if the retrieved mode
  // is still tesseract::PSM_SINGLE_BLOCK, indicating no change
  // in any config file. Therefore the only way to force
  // tesseract::PSM_SINGLE_BLOCK is from the command line.
  // It would be simpler if we could set the value before Init,
  // but that doesn't work.
  if (api.GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK)
     api.SetPageSegMode(pagesegmode);

  bool stdInput = !strcmp(image, "stdin") || !strcmp(image, "-");
  Pix* pixs = NULL;
  if (stdInput) {
开发者ID:BruceWoR,项目名称:tess-two-master,代码行数:67,代码来源:tesseractmain.cpp

示例13: save_summary

void Wordrec::save_summary(inT32 elapsed_time) {
  #ifndef SECURE_NAMES
  STRING outfilename;
  FILE *f;
  int x;
  int total;

  outfilename = imagefile + ".sta";
  f = open_file (outfilename.string(), "w");

  fprintf (f, INT32FORMAT " seconds elapsed\n", elapsed_time);
  fprintf (f, "\n");

  fprintf (f, "%d characters\n", character_count);
  fprintf (f, "%d words\n", word_count);
  fprintf (f, "\n");

  fprintf (f, "%d permutations performed\n", permutation_count);
  fprintf (f, "%d characters classified\n", chars_classified);
  fprintf (f, "%4.0f%% classification overhead\n",
    (float) chars_classified / character_count * 100.0 - 100.0);
  fprintf (f, "\n");

  fprintf (f, "%d words chopped (pass 1) ", words_chopped1);
  fprintf (f, " (%0.0f%%)\n", (float) words_chopped1 / word_count * 100);
  fprintf (f, "%d chops performed\n", chops_performed1);
  fprintf (f, "%d chops attempted\n", chops_attempted1);
  fprintf (f, "\n");

  fprintf (f, "%d words joined (pass 1)", words_segmented1);
  fprintf (f, " (%0.0f%%)\n", (float) words_segmented1 / word_count * 100);
  fprintf (f, "%d segmentation states\n", segmentation_states1);
  fprintf (f, "%d segmentations timed out\n", states_timed_out1);
  fprintf (f, "\n");

  fprintf (f, "%d words chopped (pass 2) ", words_chopped2);
  fprintf (f, " (%0.0f%%)\n", (float) words_chopped2 / word_count * 100);
  fprintf (f, "%d chops performed\n", chops_performed2);
  fprintf (f, "%d chops attempted\n", chops_attempted2);
  fprintf (f, "\n");

  fprintf (f, "%d words joined (pass 2)", words_segmented2);
  fprintf (f, " (%0.0f%%)\n", (float) words_segmented2 / word_count * 100);
  fprintf (f, "%d segmentation states\n", segmentation_states2);
  fprintf (f, "%d segmentations timed out\n", states_timed_out2);
  fprintf (f, "\n");

  total = 0;
  iterate_tally (states_before_best, x)
    total += (tally_entry (states_before_best, x) * x);
  fprintf (f, "segmentations (before best) = %d\n", total);
  if (total != 0.0)
    fprintf (f, "%4.0f%% segmentation overhead\n",
      (float) (segmentation_states1 + segmentation_states2) /
      total * 100.0 - 100.0);
  fprintf (f, "\n");

  print_tally (f, "segmentations (before best)", states_before_best);

  iterate_tally (best_certainties[0], x)
    cprintf ("best certainty of %8.4f = %4d %4d\n",
    x * CERTAINTY_BUCKET_SIZE,
    tally_entry (best_certainties[0], x),
    tally_entry (best_certainties[1], x));

  PrintIntMatcherStats(f);
  dj_statistics(f);
  fclose(f);
  #endif
}
开发者ID:mk219533,项目名称:tesseract-ocr,代码行数:70,代码来源:metrics.cpp


注:本文中的STRING::string方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。