C++ UNICHARSET::load_from_file方法代码示例

本文整理汇总了C++中UNICHARSET::load_from_file方法的典型用法代码示例。如果您正苦于以下问题：C++ UNICHARSET::load_from_file方法的具体用法？C++ UNICHARSET::load_from_file怎么用？C++ UNICHARSET::load_from_file使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类UNICHARSET的用法示例。

在下文中一共展示了UNICHARSET::load_from_file方法的6个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: SetPropertiesForInputFile

// Helper to set the properties for an input unicharset file, writes to the
// output file. If an appropriate script unicharset can be found in the
// script_dir directory, then the tops and bottoms are expanded using the
// script unicharset.
// If non-empty, xheight data for the fonts are written to the xheights_file.
void SetPropertiesForInputFile(const std::string& script_dir,
                               const std::string& input_unicharset_file,
                               const std::string& output_unicharset_file,
                               const std::string& output_xheights_file) {
  UNICHARSET unicharset;

  // Load the input unicharset
  unicharset.load_from_file(input_unicharset_file.c_str());
  tprintf("Loaded unicharset of size %d from file %s\n", unicharset.size(),
          input_unicharset_file.c_str());

  // Set unichar properties
  tprintf("Setting unichar properties\n");
  SetupBasicProperties(true, false, &unicharset);
  tprintf("Setting script properties\n");
  SetScriptProperties(script_dir, &unicharset);
  if (!output_xheights_file.empty()) {
    std::string xheights_str = GetXheightString(script_dir, unicharset);
    File::WriteStringToFileOrDie(xheights_str, output_xheights_file);
  }

  // Write the output unicharset
  tprintf("Writing unicharset to file %s\n", output_unicharset_file.c_str());
  unicharset.save_to_file(output_unicharset_file.c_str());
}

开发者ID:jan-ruzicka，项目名称:tesseract，代码行数:30，代码来源:unicharset_training_utils.cpp

示例2: SetPropertiesForInputFile

// Helper to set the properties for an input unicharset file, writes to the
// output file. If an appropriate script unicharset can be found in the
// script_dir directory, then the tops and bottoms are expanded using the
// script unicharset.
// If non-empty, xheight data for the fonts are written to the xheights_file.
void SetPropertiesForInputFile(const string& script_dir,
                               const string& input_unicharset_file,
                               const string& output_unicharset_file,
                               const string& output_xheights_file) {
  UNICHARSET unicharset;

  // Load the input unicharset
  unicharset.load_from_file(input_unicharset_file.c_str());
  tprintf("Loaded unicharset of size %d from file %s\n", unicharset.size(),
          input_unicharset_file.c_str());

  // Set unichar properties
  tprintf("Setting unichar properties\n");
  SetupBasicProperties(true, false, &unicharset);
  string xheights_str;
  for (int s = 0; s < unicharset.get_script_table_size(); ++s) {
    // Load the unicharset for the script if available.
    string filename = script_dir + "/" +
        unicharset.get_script_from_script_id(s) + ".unicharset";
    UNICHARSET script_set;
    if (script_set.load_from_file(filename.c_str())) {
      unicharset.SetPropertiesFromOther(script_set);
    }
    // Load the xheights for the script if available.
    filename = script_dir + "/" + unicharset.get_script_from_script_id(s) +
        ".xheights";
    string script_heights;
    if (File::ReadFileToString(filename, &script_heights))
      xheights_str += script_heights;
  }
  if (!output_xheights_file.empty())
    File::WriteStringToFileOrDie(xheights_str, output_xheights_file);
  for (int c = SPECIAL_UNICHAR_CODES_COUNT; c < unicharset.size(); ++c) {
    if (unicharset.PropertiesIncomplete(c)) {
      tprintf("Warning: properties incomplete for index %d = %s\n",
              c, unicharset.id_to_unichar(c));
    }
  }

  // Write the output unicharset
  tprintf("Writing unicharset to file %s\n", output_unicharset_file.c_str());
  unicharset.save_to_file(output_unicharset_file.c_str());
}

开发者ID:vnvizitiu，项目名称:tesseract，代码行数:48，代码来源:unicharset_training_utils.cpp

示例3: main

int main(int argc, char** argv) {
  // Sets properties on the input unicharset file, and writes:
  //   rootdir/lang/lang.charset_size=ddd.txt
  //   rootdir/lang/lang.traineddata
  //   rootdir/lang/lang.unicharset
  // If the 3 word lists are provided, the dawgs are also added
  // to the traineddata file.
  // The output unicharset and charset_size files are just for
  // human readability.
  tesseract::CheckSharedLibraryVersion();
  tesseract::ParseCommandLineFlags(argv[0], &argc, &argv, true);

  GenericVector<STRING> words, puncs, numbers;
  // If these reads fail, we get a warning message and an empty list of words.
  tesseract::ReadFile(FLAGS_words.c_str(), nullptr).split('\n', &words);
  tesseract::ReadFile(FLAGS_puncs.c_str(), nullptr).split('\n', &puncs);
  tesseract::ReadFile(FLAGS_numbers.c_str(), nullptr).split('\n', &numbers);
  // Load the input unicharset
  UNICHARSET unicharset;
  if (!unicharset.load_from_file(FLAGS_input_unicharset.c_str(), false)) {
    tprintf("Failed to load unicharset from %s\n",
            FLAGS_input_unicharset.c_str());
    return 1;
  }
  tprintf("Loaded unicharset of size %d from file %s\n", unicharset.size(),
          FLAGS_input_unicharset.c_str());

  // Set unichar properties
  tprintf("Setting unichar properties\n");
  tesseract::SetupBasicProperties(/*report_errors*/ true,
                                  /*decompose (NFD)*/ false, &unicharset);
  tprintf("Setting script properties\n");
  tesseract::SetScriptProperties(FLAGS_script_dir.c_str(), &unicharset);
  // Combine everything into a traineddata file.
  return tesseract::CombineLangModel(
      unicharset, FLAGS_script_dir.c_str(), FLAGS_version_str.c_str(),
      FLAGS_output_dir.c_str(), FLAGS_lang.c_str(), FLAGS_pass_through_recoder,
      words, puncs, numbers, FLAGS_lang_is_rtl, /*reader*/ nullptr,
      /*writer*/ nullptr);
}

开发者ID:Shreeshrii，项目名称:tesseract，代码行数:40，代码来源:combine_lang_model.cpp

示例4: main

int main(int argc, char *argv[]) {
  if (argc != 4) {
    tprintf("Print all the words in a given dawg.\n");
    tprintf("Usage: %s <unicharset> <dawgfile> <wordlistfile>\n",
            argv[0]);
    return 1;
  }
  const char *unicharset_file = argv[1];
  const char *dawg_file = argv[2];
  const char *wordlist_file = argv[3];
  UNICHARSET unicharset;
  if (!unicharset.load_from_file(unicharset_file)) {
    tprintf("Error loading unicharset from %s.\n", unicharset_file);
    return 1;
  }
  tesseract::Dawg *dict = LoadSquishedDawg(unicharset, dawg_file);
  if (dict == NULL) {
    tprintf("Error loading dictionary from %s.\n", dawg_file);
    return 1;
  }
  int retval = WriteDawgAsWordlist(unicharset, dict, wordlist_file);
  delete dict;
  return retval;
}

开发者ID:0ximDigital，项目名称:appsScanner，代码行数:24，代码来源:dawg2wordlist.cpp

示例5: Main


//.........这里部分代码省略.........
    // unichars.
    render.set_vertical_text(true);
    render.set_gravity_hint_strong(true);
    render.set_render_fullwidth_latin(true);
  } else {
    tprintf("Invalid writing mode: %s\n", FLAGS_writing_mode.c_str());
    exit(1);
  }

  std::string src_utf8;
  // This c_str is NOT redundant!
  if (!File::ReadFileToString(FLAGS_text.c_str(), &src_utf8)) {
    tprintf("Failed to read file: %s\n", FLAGS_text.c_str());
    exit(1);
  }

  // Remove the unicode mark if present.
  if (strncmp(src_utf8.c_str(), "\xef\xbb\xbf", 3) == 0) {
    src_utf8.erase(0, 3);
  }
  tlog(1, "Render string of size %d\n", src_utf8.length());

  if (FLAGS_render_ngrams || FLAGS_only_extract_font_properties) {
    // Try to preserve behavior of old text2image by expanding inter-word
    // spaces by a factor of 4.
    const std::string kSeparator = FLAGS_render_ngrams ? "    " : " ";
    // Also restrict the number of characters per line to try and avoid
    // line-breaking in the middle of words like "-A", "R$" etc. which are
    // otherwise allowed by the standard unicode line-breaking rules.
    const unsigned int kCharsPerLine = (FLAGS_ptsize > 20) ? 50 : 100;
    std::string rand_utf8;
    UNICHARSET unicharset;
    if (FLAGS_render_ngrams && !FLAGS_unicharset_file.empty() &&
        !unicharset.load_from_file(FLAGS_unicharset_file.c_str())) {
      tprintf("Failed to load unicharset from file %s\n",
              FLAGS_unicharset_file.c_str());
      exit(1);
    }

    // If we are rendering ngrams that will be OCRed later, shuffle them so that
    // tesseract does not have difficulties finding correct baseline, word
    // spaces, etc.
    const char *str8 = src_utf8.c_str();
    int len = src_utf8.length();
    int step;
    std::vector<std::pair<int, int> > offsets;
    int offset = SpanUTF8Whitespace(str8);
    while (offset < len) {
      step = SpanUTF8NotWhitespace(str8 + offset);
      offsets.push_back(std::make_pair(offset, step));
      offset += step;
      offset += SpanUTF8Whitespace(str8 + offset);
    }
    if (FLAGS_render_ngrams)
      std::random_shuffle(offsets.begin(), offsets.end());

    for (size_t i = 0, line = 1; i < offsets.size(); ++i) {
      const char *curr_pos = str8 + offsets[i].first;
      int ngram_len = offsets[i].second;
      // Skip words that contain characters not in found in unicharset.
      std::string cleaned = UNICHARSET::CleanupString(curr_pos, ngram_len);
      if (!FLAGS_unicharset_file.empty() &&
          !unicharset.encodable_string(cleaned.c_str(), nullptr)) {
        continue;
      }
      rand_utf8.append(curr_pos, ngram_len);

开发者ID:jan-ruzicka，项目名称:tesseract，代码行数:67，代码来源:text2image.cpp

示例6: main

// Apart from command-line flags, input is a collection of lstmf files, that
// were previously created using tesseract with the lstm.train config file.
// The program iterates over the inputs, feeding the data to the network,
// until the error rate reaches a specified target or max_iterations is reached.
int main(int argc, char **argv) {
  ParseArguments(&argc, &argv);
  // Purify the model name in case it is based on the network string.
  if (FLAGS_model_output.empty()) {
    tprintf("Must provide a --model_output!\n");
    return 1;
  }
  STRING model_output = FLAGS_model_output.c_str();
  for (int i = 0; i < model_output.length(); ++i) {
    if (model_output[i] == '[' || model_output[i] == ']')
      model_output[i] = '-';
    if (model_output[i] == '(' || model_output[i] == ')')
      model_output[i] = '_';
  }
  // Setup the trainer.
  STRING checkpoint_file = FLAGS_model_output.c_str();
  checkpoint_file += "_checkpoint";
  STRING checkpoint_bak = checkpoint_file + ".bak";
  tesseract::LSTMTrainer trainer(
      NULL, NULL, NULL, NULL, FLAGS_model_output.c_str(),
      checkpoint_file.c_str(), FLAGS_debug_interval,
      static_cast<inT64>(FLAGS_max_image_MB) * 1048576);

  // Reading something from an existing model doesn't require many flags,
  // so do it now and exit.
  if (FLAGS_stop_training || FLAGS_debug_network) {
    if (!trainer.TryLoadingCheckpoint(FLAGS_continue_from.c_str())) {
      tprintf("Failed to read continue from: %s\n",
              FLAGS_continue_from.c_str());
      return 1;
    }
    if (FLAGS_debug_network) {
      trainer.DebugNetwork();
    } else {
      if (FLAGS_train_mode & tesseract::TF_INT_MODE)
        trainer.ConvertToInt();
      GenericVector<char> recognizer_data;
      trainer.SaveRecognitionDump(&recognizer_data);
      if (!tesseract::SaveDataToFile(recognizer_data,
                                     FLAGS_model_output.c_str())) {
        tprintf("Failed to write recognition model : %s\n",
                FLAGS_model_output.c_str());
      }
    }
    return 0;
  }

  // Get the list of files to process.
  if (FLAGS_train_listfile.empty()) {
    tprintf("Must supply a list of training filenames! --train_listfile\n");
    return 1;
  }
  GenericVector<STRING> filenames;
  if (!tesseract::LoadFileLinesToStrings(FLAGS_train_listfile.c_str(),
                                         &filenames)) {
    tprintf("Failed to load list of training filenames from %s\n",
            FLAGS_train_listfile.c_str());
    return 1;
  }

  UNICHARSET unicharset;
  // Checkpoints always take priority if they are available.
  if (trainer.TryLoadingCheckpoint(checkpoint_file.string()) ||
      trainer.TryLoadingCheckpoint(checkpoint_bak.string())) {
    tprintf("Successfully restored trainer from %s\n",
            checkpoint_file.string());
  } else {
    if (!FLAGS_continue_from.empty()) {
      // Load a past model file to improve upon.
      if (!trainer.TryLoadingCheckpoint(FLAGS_continue_from.c_str())) {
        tprintf("Failed to continue from: %s\n", FLAGS_continue_from.c_str());
        return 1;
      }
      tprintf("Continuing from %s\n", FLAGS_continue_from.c_str());
      trainer.InitIterations();
    }
    if (FLAGS_continue_from.empty() || FLAGS_append_index >= 0) {
      // We need a unicharset to start from scratch or append.
      string unicharset_str;
      // Character coding to be used by the classifier.
      if (!unicharset.load_from_file(FLAGS_U.c_str())) {
        tprintf("Error: must provide a -U unicharset!\n");
        return 1;
      }
      tesseract::SetupBasicProperties(true, &unicharset);
      if (FLAGS_append_index >= 0) {
        tprintf("Appending a new network to an old one!!");
        if (FLAGS_continue_from.empty()) {
          tprintf("Must set --continue_from for appending!\n");
          return 1;
        }
      }
      // We are initializing from scratch.
      trainer.InitCharSet(unicharset, FLAGS_script_dir.c_str(),
                          FLAGS_train_mode);
      if (!trainer.InitNetwork(FLAGS_net_spec.c_str(), FLAGS_append_index,
//.........这里部分代码省略.........

开发者ID:jbarlow83，项目名称:tesseract，代码行数:101，代码来源:lstmtraining.cpp

注：本文中的UNICHARSET::load_from_file方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。