当前位置: 首页>>代码示例>>C++>>正文


C++ UNICHARSET::contains_unichar方法代码示例

本文整理汇总了C++中UNICHARSET::contains_unichar方法的典型用法代码示例。如果您正苦于以下问题:C++ UNICHARSET::contains_unichar方法的具体用法?C++ UNICHARSET::contains_unichar怎么用?C++ UNICHARSET::contains_unichar使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在UNICHARSET的用法示例。


在下文中一共展示了UNICHARSET::contains_unichar方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: ParseAmbiguityLine

bool UnicharAmbigs::ParseAmbiguityLine(
    int line_num, int version, int debug_level, const UNICHARSET &unicharset,
    char *buffer, int *test_ambig_part_size, UNICHAR_ID *test_unichar_ids,
    int *replacement_ambig_part_size, char *replacement_string, int *type) {
  if (version > 1) {
    // Simpler format is just wrong-string correct-string type\n.
    STRING input(buffer);
    GenericVector<STRING> fields;
    input.split(' ', &fields);
    if (fields.size() != 3) {
      if (debug_level) tprintf(kIllegalMsg, line_num);
      return false;
    }
    // Encode wrong-string.
    GenericVector<UNICHAR_ID> unichars;
    if (!unicharset.encode_string(fields[0].string(), true, &unichars, NULL,
                                  NULL)) {
      return false;
    }
    *test_ambig_part_size = unichars.size();
    if (*test_ambig_part_size > MAX_AMBIG_SIZE) {
      if (debug_level)
        tprintf("Too many unichars in ambiguity on line %d\n", line_num);
      return false;
    }
    // Copy encoded string to output.
    for (int i = 0; i < unichars.size(); ++i)
      test_unichar_ids[i] = unichars[i];
    test_unichar_ids[unichars.size()] = INVALID_UNICHAR_ID;
    // Encode replacement-string to check validity.
    if (!unicharset.encode_string(fields[1].string(), true, &unichars, NULL,
                                  NULL)) {
      return false;
    }
    *replacement_ambig_part_size = unichars.size();
    if (*replacement_ambig_part_size > MAX_AMBIG_SIZE) {
      if (debug_level)
        tprintf("Too many unichars in ambiguity on line %d\n", line_num);
      return false;
    }
    if (sscanf(fields[2].string(), "%d", type) != 1) {
      if (debug_level) tprintf(kIllegalMsg, line_num);
      return false;
    }
    snprintf(replacement_string, kMaxAmbigStringSize, "%s", fields[1].string());
    return true;
  }
  int i;
  char *token;
  char *next_token;
  if (!(token = strtok_r(buffer, kAmbigDelimiters, &next_token)) ||
      !sscanf(token, "%d", test_ambig_part_size) ||
      *test_ambig_part_size <= 0) {
    if (debug_level) tprintf(kIllegalMsg, line_num);
    return false;
  }
  if (*test_ambig_part_size > MAX_AMBIG_SIZE) {
    if (debug_level)
      tprintf("Too many unichars in ambiguity on line %d\n", line_num);
    return false;
  }
  for (i = 0; i < *test_ambig_part_size; ++i) {
    if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token))) break;
    if (!unicharset.contains_unichar(token)) {
      if (debug_level) tprintf(kIllegalUnicharMsg, token);
      break;
    }
    test_unichar_ids[i] = unicharset.unichar_to_id(token);
  }
  test_unichar_ids[i] = INVALID_UNICHAR_ID;

  if (i != *test_ambig_part_size ||
      !(token = strtok_r(NULL, kAmbigDelimiters, &next_token)) ||
      !sscanf(token, "%d", replacement_ambig_part_size) ||
        *replacement_ambig_part_size <= 0) {
    if (debug_level) tprintf(kIllegalMsg, line_num);
    return false;
  }
  if (*replacement_ambig_part_size > MAX_AMBIG_SIZE) {
    if (debug_level)
      tprintf("Too many unichars in ambiguity on line %d\n", line_num);
    return false;
  }
  replacement_string[0] = '\0';
  for (i = 0; i < *replacement_ambig_part_size; ++i) {
    if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token))) break;
    strcat(replacement_string, token);
    if (!unicharset.contains_unichar(token)) {
      if (debug_level) tprintf(kIllegalUnicharMsg, token);
      break;
    }
  }
  if (i != *replacement_ambig_part_size) {
    if (debug_level) tprintf(kIllegalMsg, line_num);
    return false;
  }
  if (version > 0) {
    // The next field being true indicates that the abiguity should
    // always be substituted (e.g. '' should always be changed to ").
    // For such "certain" n -> m ambigs tesseract will insert character
//.........这里部分代码省略.........
开发者ID:Kailigithub,项目名称:tesseract,代码行数:101,代码来源:ambigs.cpp

示例2: ParseAmbiguityLine

bool UnicharAmbigs::ParseAmbiguityLine(
    int line_num, int version, const UNICHARSET &unicharset,
    char *buffer, int *TestAmbigPartSize, UNICHAR_ID *TestUnicharIds,
    int *ReplacementAmbigPartSize, char *ReplacementString, int *type) {
  int i;
  char *token;
  char *next_token;
  if (!(token = strtok_r(buffer, kAmbigDelimiters, &next_token)) ||
      !sscanf(token, "%d", TestAmbigPartSize) || TestAmbigPartSize <= 0) {
    if (global_ambigs_debug_level) tprintf(kIllegalMsg, line_num);
    return false;
  }
  if (*TestAmbigPartSize > MAX_AMBIG_SIZE) {
    tprintf("Too many unichars in ambiguity on line %d\n");
    return false;
  }
  for (i = 0; i < *TestAmbigPartSize; ++i) {
    if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token))) break;
    if (!unicharset.contains_unichar(token)) {
      if (global_ambigs_debug_level) tprintf(kIllegalUnicharMsg, token);
      break;
    }
    TestUnicharIds[i] = unicharset.unichar_to_id(token);
  }
  TestUnicharIds[i] = INVALID_UNICHAR_ID;

  if (i != *TestAmbigPartSize ||
      !(token = strtok_r(NULL, kAmbigDelimiters, &next_token)) ||
      !sscanf(token, "%d", ReplacementAmbigPartSize) ||
        *ReplacementAmbigPartSize <= 0) {
    if (global_ambigs_debug_level) tprintf(kIllegalMsg, line_num);
    return false;
  }
  if (*ReplacementAmbigPartSize > MAX_AMBIG_SIZE) {
    tprintf("Too many unichars in ambiguity on line %d\n");
    return false;
  }
  ReplacementString[0] = '\0';
  for (i = 0; i < *ReplacementAmbigPartSize; ++i) {
    if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token))) break;
    strcat(ReplacementString, token);
    if (!unicharset.contains_unichar(token)) {
      if (global_ambigs_debug_level) tprintf(kIllegalUnicharMsg, token);
      break;
    }
  }
  if (i != *ReplacementAmbigPartSize) {
    if (global_ambigs_debug_level) tprintf(kIllegalMsg, line_num);
    return false;
  }
  if (version > 0) {
    // The next field being true indicates that the abiguity should
    // always be substituted (e.g. '' should always be changed to ").
    // For such "certain" n -> m ambigs tesseract will insert character
    // fragments for the n pieces in the unicharset. AmbigsFound()
    // will then replace the incorrect ngram with the character
    // fragments of the correct character (or ngram if m > 1).
    // Note that if m > 1, an ngram will be inserted into the
    // modified word, not the individual unigrams. Tesseract
    // has limited support for ngram unichar (e.g. dawg permuter).
    if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token)) ||
        !sscanf(token, "%d", type)) {
      if (global_ambigs_debug_level) tprintf(kIllegalMsg, line_num);
      return false;
    }
  }
  return true;
}
开发者ID:Appiah,项目名称:tesseractstuff,代码行数:68,代码来源:ambigs.cpp


注:本文中的UNICHARSET::contains_unichar方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。