C++ UNICHARSET类代码示例

本文整理汇总了C++中UNICHARSET类的典型用法代码示例。如果您正苦于以下问题：C++ UNICHARSET类的具体用法？C++ UNICHARSET怎么用？C++ UNICHARSET使用的例子？那么, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了UNICHARSET类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: StrLen

 char_32 *CubeUtils::ToUpper(const char_32 *str32, CharSet *char_set) {
     if (!char_set) {
         return NULL;
     }
     UNICHARSET *unicharset = char_set->InternalUnicharset();
     int len = StrLen(str32);
     char_32 *upper = new char_32[len + 1];
     if (!upper)
         return NULL;
     for (int i = 0; i < len; ++i) {
         char_32 ch = str32[i];
         if (ch == INVALID_UNICHAR_ID) {
             delete[] upper;
             return NULL;
         }
         // convert lower-case characters to upper-case
         if (unicharset->get_islower(char_set->ClassID(ch))) {
             UNICHAR_ID uid_upper = unicharset->get_other_case(char_set->ClassID(ch));
             const char_32 *str32_upper = char_set->ClassString(uid_upper);
             // expect upper-case version of character to be a single character
             if (!str32_upper || StrLen(str32_upper) != 1) {
                 delete[] upper;
                 return NULL;
             }
             upper[i] = str32_upper[0];
         } else {
             upper[i] = ch;
         }
     }
     upper[len] = 0;
     return upper;
 }

开发者ID:mehulsbhatt，项目名称:MyOCRTEST，代码行数:32，代码来源:cube_utils.cpp

示例2: SetPropertiesForInputFile

// Helper to set the properties for an input unicharset file, writes to the
// output file. If an appropriate script unicharset can be found in the
// script_dir directory, then the tops and bottoms are expanded using the
// script unicharset.
// If non-empty, xheight data for the fonts are written to the xheights_file.
void SetPropertiesForInputFile(const std::string& script_dir,
                               const std::string& input_unicharset_file,
                               const std::string& output_unicharset_file,
                               const std::string& output_xheights_file) {
  UNICHARSET unicharset;

  // Load the input unicharset
  unicharset.load_from_file(input_unicharset_file.c_str());
  tprintf("Loaded unicharset of size %d from file %s\n", unicharset.size(),
          input_unicharset_file.c_str());

  // Set unichar properties
  tprintf("Setting unichar properties\n");
  SetupBasicProperties(true, false, &unicharset);
  tprintf("Setting script properties\n");
  SetScriptProperties(script_dir, &unicharset);
  if (!output_xheights_file.empty()) {
    std::string xheights_str = GetXheightString(script_dir, unicharset);
    File::WriteStringToFileOrDie(xheights_str, output_xheights_file);
  }

  // Write the output unicharset
  tprintf("Writing unicharset to file %s\n", output_unicharset_file.c_str());
  unicharset.save_to_file(output_unicharset_file.c_str());
}

开发者ID:jan-ruzicka，项目名称:tesseract，代码行数:30，代码来源:unicharset_training_utils.cpp

示例3: PartialSetPropertiesFromOther

// Sets all the properties for this unicharset given a src unicharset with
// everything set. The unicharsets don't have to be the same, and graphemes
// are correctly accounted for.
void UNICHARSET::PartialSetPropertiesFromOther(int start_index,
                                               const UNICHARSET& src) {
  for (int ch = start_index; ch < size_used; ++ch) {
    const char* utf8 = id_to_unichar(ch);
    UNICHAR_PROPERTIES properties;
    if (src.GetStrProperties(utf8, &properties)) {
      // Setup the script_id, other_case, and mirror properly.
      const char* script = src.get_script_from_script_id(properties.script_id);
      properties.script_id = add_script(script);
      const char* other_case = src.id_to_unichar(properties.other_case);
      if (contains_unichar(other_case)) {
        properties.other_case = unichar_to_id(other_case);
      } else {
        properties.other_case = ch;
      }
      const char* mirror_str = src.id_to_unichar(properties.mirror);
      if (contains_unichar(mirror_str)) {
        properties.mirror = unichar_to_id(mirror_str);
      } else {
        properties.mirror = ch;
      }
      unichars[ch].properties.CopyFrom(properties);
      set_normed_ids(ch);
    } else {
      tprintf("Failed to get properties for index %d = %s\n", ch, utf8);
    }
  }
}

开发者ID:0ximDigital，项目名称:appsScanner，代码行数:31，代码来源:unicharset.cpp

示例4: main

int main(int argc, char** argv) {
  int option;
  const char* output_directory = ".";
  STRING unicharset_file_name;
  // Special characters are now included by default.
  UNICHARSET unicharset;

  setlocale(LC_ALL, "");

  // Print usage
  if (argc <= 1) {
    printf("Usage: %s [-D DIRECTORY] FILE...\n", argv[0]);
    exit(1);

  }

  // Parse arguments
  while ((option = tessopt(argc, argv, "D" )) != EOF) {
    switch (option) {
      case 'D':
        output_directory = tessoptarg;
        ++tessoptind;
        break;
    }
  }

  // Save file name
  unicharset_file_name = output_directory;
  unicharset_file_name += "/";
  unicharset_file_name += kUnicharsetFileName;

  // Load box files
  for (; tessoptind < argc; ++tessoptind) {
    printf("Extracting unicharset from %s\n", argv[tessoptind]);

    FILE* box_file = fopen(argv[tessoptind], "rb");
    if (box_file == NULL) {
      printf("Cannot open box file %s\n", argv[tessoptind]);
      return -1;
    }

    TBOX box;
    STRING unichar_string;
    int line_number = 0;
    while (ReadNextBox(&line_number, box_file, &unichar_string, &box)) {
      unicharset.unichar_insert(unichar_string.string());
      set_properties(&unicharset, unichar_string.string());
    }
  }

  // Write unicharset file
  if (unicharset.save_to_file(unicharset_file_name.string())) {
    printf("Wrote unicharset file %s.\n", unicharset_file_name.string());
  }
  else {
    printf("Cannot save unicharset file %s.\n", unicharset_file_name.string());
    return -1;
  }
  return 0;
}

开发者ID:ArnoldWu，项目名称:tess-two，代码行数:60，代码来源:unicharset_extractor.cpp

示例5: print_ratings_info

/**
 * print_ratings_info
 *
 * Send all the ratings out to the logfile.
 *
 * @param fp file to use
 * @param ratings list of results
 * @param current_unicharset unicharset that can be used
 * for id-to-unichar conversion
 */
void print_ratings_info(FILE *fp,
                        BLOB_CHOICE_LIST *ratings,
                        const UNICHARSET &current_unicharset) {
  inT32 index;                    // to list
  inT32 best_index;               // to list
  FLOAT32 best_rat;               // rating
  FLOAT32 best_cert;              // certainty
  const char* first_char = NULL;  // character
  FLOAT32 first_rat;              // rating
  FLOAT32 first_cert;             // certainty
  const char* sec_char = NULL;    // character
  FLOAT32 sec_rat = 0.0f;         // rating
  FLOAT32 sec_cert = 0.0f;        // certainty
  BLOB_CHOICE_IT c_it = ratings;  // iterator

  index = ratings->length();
  if (index > 0) {
    first_char = current_unicharset.id_to_unichar(c_it.data()->unichar_id());
    first_rat = c_it.data()->rating();
    first_cert = -c_it.data()->certainty();
    if (index > 1) {
      sec_char = current_unicharset.id_to_unichar(
          c_it.data_relative(1)->unichar_id());
      sec_rat = c_it.data_relative(1)->rating();
      sec_cert = -c_it.data_relative(1)->certainty();
    } else {
      sec_char = NULL;
      sec_rat = -1;
      sec_cert = -1;
    }
  } else {
    first_char = NULL;
    first_rat = -1;
    first_cert = -1;
  }
  best_index = -1;
  best_rat = -1;
  best_cert = -1;
  for (index = 0, c_it.mark_cycle_pt(); !c_it.cycled_list();
       c_it.forward(), index++) {
    if (strcmp(current_unicharset.id_to_unichar(c_it.data()->unichar_id()),
               blob_answer) == 0) {
      best_index = index;
      best_rat = c_it.data()->rating();
      best_cert = -c_it.data()->certainty();
    }
  }
  if (first_char != NULL && (*first_char == '\0' || *first_char == ' '))
    first_char = NULL;
  if (sec_char != NULL && (*sec_char == '\0' || *sec_char == ' '))
    sec_char = NULL;
  fprintf(matcher_fp,
          " " INT32FORMAT " " INT32FORMAT " %g %g %s %g %g %s %g %g\n",
          ratings->length(), best_index, best_rat, best_cert,
          first_char != NULL ? first_char : "~",
          first_rat, first_cert, sec_char != NULL ? sec_char : "~",
          sec_rat, sec_cert);
}

开发者ID:AngusHardie，项目名称:TesseractOCR-For-Mac，代码行数:68，代码来源:ratngs.cpp

示例6:

// Constructor is private. Only anticipated use of ErrorCounter is via
// the static ComputeErrorRate.
ErrorCounter::ErrorCounter(const UNICHARSET& unicharset, int fontsize)
  : scaled_error_(0.0), rating_epsilon_(kRatingEpsilon),
    unichar_counts_(unicharset.size(), unicharset.size(), 0),
    ok_score_hist_(0, 101), bad_score_hist_(0, 101),
    unicharset_(unicharset) {
  Counts empty_counts;
  font_counts_.init_to_size(fontsize, empty_counts);
  multi_unichar_counts_.init_to_size(unicharset.size(), 0);
}

开发者ID:xmarston，项目名称:BillRecognizer，代码行数:11，代码来源:errorcounter.cpp

示例7: AddAllScriptsConverted

// Helper adds all the scripts from sid_set converted to ids from osd_set to
// allowed_ids.
static void AddAllScriptsConverted(const UNICHARSET& sid_set,
                                   const UNICHARSET& osd_set,
                                   GenericVector<int>* allowed_ids) {
  for (int i = 0; i < sid_set.get_script_table_size(); ++i) {
    if (i != sid_set.null_sid()) {
      const char* script = sid_set.get_script_from_script_id(i);
      allowed_ids->push_back(osd_set.get_script_id_from_name(script));
    }
  }
}

开发者ID:Kailigithub，项目名称:tesseract，代码行数:12，代码来源:pagesegmain.cpp

示例8: absolute_garbage

bool Dict::absolute_garbage(const WERD_CHOICE &word,
                            const UNICHARSET &unicharset) {
  if (word.length() < kMinAbsoluteGarbageWordLength) return false;
  int num_alphanum = 0;
  for (int x = 0; x < word.length(); ++x) {
    num_alphanum += (unicharset.get_isalpha(word.unichar_id(x)) ||
                     unicharset.get_isdigit(word.unichar_id(x)));
  }
  return (static_cast<float>(num_alphanum) /
          static_cast<float>(word.length()) < kMinAbsoluteGarbageAlphanumFrac);
}

开发者ID:0ximDigital，项目名称:appsScanner，代码行数:11，代码来源:context.cpp

示例9: GetXheightString

// Helper gets the combined x-heights string.
std::string GetXheightString(const std::string& script_dir,
                        const UNICHARSET& unicharset) {
  std::string xheights_str;
  for (int s = 0; s < unicharset.get_script_table_size(); ++s) {
    // Load the xheights for the script if available.
    std::string filename = script_dir + "/" +
                      unicharset.get_script_from_script_id(s) + ".xheights";
    std::string script_heights;
    if (File::ReadFileToString(filename, &script_heights))
      xheights_str += script_heights;
  }
  return xheights_str;
}

开发者ID:jan-ruzicka，项目名称:tesseract，代码行数:14，代码来源:unicharset_training_utils.cpp

示例10: print

// Print the best guesses out of the match rating matrix.
void MATRIX::print(const UNICHARSET &unicharset) const {
  tprintf("Ratings Matrix (top 3 choices)\n");
  int dim = dimension();
  int band_width = bandwidth();
  int row, col;
  for (col = 0; col < dim; ++col) {
    for (row = col; row < dim && row < col + band_width; ++row) {
      BLOB_CHOICE_LIST *rating = this->get(col, row);
      if (rating == NOT_CLASSIFIED) continue;
      BLOB_CHOICE_IT b_it(rating);
      tprintf("col=%d row=%d ", col, row);
      for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
        tprintf("%s rat=%g cert=%g " ,
                unicharset.id_to_unichar(b_it.data()->unichar_id()),
                b_it.data()->rating(), b_it.data()->certainty());
      }
      tprintf("\n");
    }
    tprintf("\n");
  }
  tprintf("\n");
  for (col = 0; col < dim; ++col) tprintf("\t%d", col);
  tprintf("\n");
  for (row = 0; row < dim; ++row) {
    for (col = 0; col <= row; ++col) {
      if (col == 0) tprintf("%d\t", row);
      if (row >= col + band_width) {
        tprintf(" \t");
        continue;
      }
      BLOB_CHOICE_LIST *rating = this->get(col, row);
      if (rating != NOT_CLASSIFIED) {
        BLOB_CHOICE_IT b_it(rating);
        int counter = 0;
        for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
          tprintf("%s ",
                  unicharset.id_to_unichar(b_it.data()->unichar_id()));
          ++counter;
          if (counter == 3) break;
        }
        tprintf("\t");
      } else {
        tprintf(" \t");
      }
    }
    tprintf("\n");
  }
}

开发者ID:Kailigithub，项目名称:tesseract，代码行数:49，代码来源:matrix.cpp

示例11: strlen

/**
 * WERD_CHOICE::WERD_CHOICE
 *
 * Constructor to build a WERD_CHOICE from the given string.
 * The function assumes that src_string is not NULL.
 */
WERD_CHOICE::WERD_CHOICE(const char *src_string,
                         const UNICHARSET &unicharset) {
  STRING src_lengths;
  int len = strlen(src_string);
  const char *ptr = src_string;
  int step = unicharset.step(ptr);
  for (; ptr < src_string + len && step > 0;
       step = unicharset.step(ptr), src_lengths += step, ptr += step);
  if (step != 0 && ptr == src_string + len) {
    this->init(src_string, src_lengths.string(),
               0.0, 0.0, NO_PERM, unicharset);
  } else {  // there must have been an invalid unichar in the string
    this->init(8);
    this->make_bad();
  }
}

开发者ID:AngusHardie，项目名称:TesseractOCR-For-Mac，代码行数:22，代码来源:ratngs.cpp

示例12: init

/**
 * WERD_CHOICE::init
 *
 * Helper function to build a WERD_CHOICE from the given string,
 * fragment lengths, rating, certainty and permuter.
 *
 * The function assumes that src_string is not NULL.
 * src_lengths argument could be NULL, in which case the unichars
 * in src_string are assumed to all be of length 1.
 */
void WERD_CHOICE::init(const char *src_string,
                       const char *src_lengths,
                       float src_rating,
                       float src_certainty,
                       uinT8 src_permuter,
                       const UNICHARSET &unicharset) {
  int src_string_len = strlen(src_string);
  if (src_string_len == 0) {
    this->init(8);
  } else {
    this->init(src_lengths ? strlen(src_lengths): src_string_len);
    length_ = reserved_;
    int offset = 0;
    for (int i = 0; i < length_; ++i) {
      int unichar_length = src_lengths ? src_lengths[i] : 1;
      unichar_ids_[i] =
          unicharset.unichar_to_id(src_string+offset, unichar_length);
      fragment_lengths_[i] = 1;
      offset += unichar_length;
    }
  }
  rating_ = src_rating;
  certainty_ = src_certainty;
  permuter_ = src_permuter;
}

开发者ID:AngusHardie，项目名称:TesseractOCR-For-Mac，代码行数:35，代码来源:ratngs.cpp

示例13: AppendOtherUnicharset

// For each id in src, if it does not occur in this, add it, as in
// SetPropertiesFromOther, otherwise expand the ranges, as in
// ExpandRangesFromOther.
void UNICHARSET::AppendOtherUnicharset(const UNICHARSET& src) {
  int initial_used = size_used;
  for (int ch = 0; ch < src.size_used; ++ch) {
    const UNICHAR_PROPERTIES& src_props = src.unichars[ch].properties;
    const char* utf8 = src.id_to_unichar(ch);
    if (strcmp(utf8, " ") != 0 && src_props.AnyRangeEmpty()) {
      // Only use fully valid entries.
      tprintf("Bad properties for index %d, char %s: "
              "%d,%d %d,%d %d,%d %d,%d %d,%d\n",
              ch, utf8, src_props.min_bottom, src_props.max_bottom,
              src_props.min_top, src_props.max_top,
              src_props.min_width, src_props.max_width,
              src_props.min_bearing, src_props.max_bearing,
              src_props.min_advance, src_props.max_advance);
      continue;
    }
    int id = size_used;
    if (contains_unichar(utf8)) {
      id = unichar_to_id(utf8);
      // Just expand current ranges.
      unichars[id].properties.ExpandRangesFrom(src_props);
    } else {
      unichar_insert(utf8);
      unichars[id].properties.SetRangesEmpty();
    }
  }
  // Set properties, including mirror and other_case, WITHOUT reordering
  // the unicharset.
  PartialSetPropertiesFromOther(initial_used, src);
}

开发者ID:0ximDigital，项目名称:appsScanner，代码行数:33，代码来源:unicharset.cpp

示例14: print

// Print the best guesses out of the match rating matrix.
void MATRIX::print(const UNICHARSET &unicharset) {
  tprintf("Ratings Matrix (top choices)\n");
  int row, col;
  for (col = 0; col < this->dimension(); ++col) tprintf("\t%d", col);
  tprintf("\n");
  for (row = 0; row < this->dimension(); ++row) {
    for (col = 0; col <= row; ++col) {
      if (col == 0) tprintf("%d\t", row);
      BLOB_CHOICE_LIST *rating = this->get(col, row);
      if (rating != NOT_CLASSIFIED) {
        BLOB_CHOICE_IT b_it(rating);
        int counter = 0;
        for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
          tprintf("%s ", unicharset.id_to_unichar(b_it.data()->unichar_id()));
          ++counter;
          if (counter == 3) break;
        }
        tprintf("\t");
      } else {
        tprintf(" \t");
      }
    }
    tprintf("\n");
  }
}

开发者ID:0359xiaodong，项目名称:tess-two，代码行数:26，代码来源:matrix.cpp

示例15: print_ratings_list

/**********************************************************************
 * print_ratings_list
 *
 * Send all the ratings out to the logfile.
 **********************************************************************/
void print_ratings_list(
    const char *msg,                      // intro message
    BLOB_CHOICE_LIST *ratings,            // list of results
    const UNICHARSET &current_unicharset  // unicharset that can be used
                                          // for id-to-unichar conversion
    ) {
  if (ratings->length() == 0) {
    tprintf("%s:<none>\n", msg);
    return;
  }
  if (*msg != '\0') {
    tprintf("%s\n", msg);
  }
  BLOB_CHOICE_IT c_it;
  c_it.set_to_list(ratings);
  for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) {
    tprintf("r%.2f c%.2f : %d %s",
            c_it.data()->rating(), c_it.data()->certainty(),
            c_it.data()->unichar_id(),
            current_unicharset.debug_str(c_it.data()->unichar_id()).string());
    if (!c_it.at_last()) {
      tprintf("\n");
    }
  }
  tprintf("\n");
  fflush(stdout);
}

开发者ID:mk219533，项目名称:tesseract-ocr，代码行数:32，代码来源:ratngs.cpp

注：本文中的UNICHARSET类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。