本文整理汇总了C++中UNICHARSET::contains_unichar方法的典型用法代码示例。如果您正苦于以下问题:C++ UNICHARSET::contains_unichar方法的具体用法?C++ UNICHARSET::contains_unichar怎么用?C++ UNICHARSET::contains_unichar使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类UNICHARSET
的用法示例。
在下文中一共展示了UNICHARSET::contains_unichar方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: ParseAmbiguityLine
bool UnicharAmbigs::ParseAmbiguityLine(
int line_num, int version, int debug_level, const UNICHARSET &unicharset,
char *buffer, int *test_ambig_part_size, UNICHAR_ID *test_unichar_ids,
int *replacement_ambig_part_size, char *replacement_string, int *type) {
if (version > 1) {
// Simpler format is just wrong-string correct-string type\n.
STRING input(buffer);
GenericVector<STRING> fields;
input.split(' ', &fields);
if (fields.size() != 3) {
if (debug_level) tprintf(kIllegalMsg, line_num);
return false;
}
// Encode wrong-string.
GenericVector<UNICHAR_ID> unichars;
if (!unicharset.encode_string(fields[0].string(), true, &unichars, NULL,
NULL)) {
return false;
}
*test_ambig_part_size = unichars.size();
if (*test_ambig_part_size > MAX_AMBIG_SIZE) {
if (debug_level)
tprintf("Too many unichars in ambiguity on line %d\n", line_num);
return false;
}
// Copy encoded string to output.
for (int i = 0; i < unichars.size(); ++i)
test_unichar_ids[i] = unichars[i];
test_unichar_ids[unichars.size()] = INVALID_UNICHAR_ID;
// Encode replacement-string to check validity.
if (!unicharset.encode_string(fields[1].string(), true, &unichars, NULL,
NULL)) {
return false;
}
*replacement_ambig_part_size = unichars.size();
if (*replacement_ambig_part_size > MAX_AMBIG_SIZE) {
if (debug_level)
tprintf("Too many unichars in ambiguity on line %d\n", line_num);
return false;
}
if (sscanf(fields[2].string(), "%d", type) != 1) {
if (debug_level) tprintf(kIllegalMsg, line_num);
return false;
}
snprintf(replacement_string, kMaxAmbigStringSize, "%s", fields[1].string());
return true;
}
int i;
char *token;
char *next_token;
if (!(token = strtok_r(buffer, kAmbigDelimiters, &next_token)) ||
!sscanf(token, "%d", test_ambig_part_size) ||
*test_ambig_part_size <= 0) {
if (debug_level) tprintf(kIllegalMsg, line_num);
return false;
}
if (*test_ambig_part_size > MAX_AMBIG_SIZE) {
if (debug_level)
tprintf("Too many unichars in ambiguity on line %d\n", line_num);
return false;
}
for (i = 0; i < *test_ambig_part_size; ++i) {
if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token))) break;
if (!unicharset.contains_unichar(token)) {
if (debug_level) tprintf(kIllegalUnicharMsg, token);
break;
}
test_unichar_ids[i] = unicharset.unichar_to_id(token);
}
test_unichar_ids[i] = INVALID_UNICHAR_ID;
if (i != *test_ambig_part_size ||
!(token = strtok_r(NULL, kAmbigDelimiters, &next_token)) ||
!sscanf(token, "%d", replacement_ambig_part_size) ||
*replacement_ambig_part_size <= 0) {
if (debug_level) tprintf(kIllegalMsg, line_num);
return false;
}
if (*replacement_ambig_part_size > MAX_AMBIG_SIZE) {
if (debug_level)
tprintf("Too many unichars in ambiguity on line %d\n", line_num);
return false;
}
replacement_string[0] = '\0';
for (i = 0; i < *replacement_ambig_part_size; ++i) {
if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token))) break;
strcat(replacement_string, token);
if (!unicharset.contains_unichar(token)) {
if (debug_level) tprintf(kIllegalUnicharMsg, token);
break;
}
}
if (i != *replacement_ambig_part_size) {
if (debug_level) tprintf(kIllegalMsg, line_num);
return false;
}
if (version > 0) {
// The next field being true indicates that the abiguity should
// always be substituted (e.g. '' should always be changed to ").
// For such "certain" n -> m ambigs tesseract will insert character
//.........这里部分代码省略.........
示例2: ParseAmbiguityLine
bool UnicharAmbigs::ParseAmbiguityLine(
int line_num, int version, const UNICHARSET &unicharset,
char *buffer, int *TestAmbigPartSize, UNICHAR_ID *TestUnicharIds,
int *ReplacementAmbigPartSize, char *ReplacementString, int *type) {
int i;
char *token;
char *next_token;
if (!(token = strtok_r(buffer, kAmbigDelimiters, &next_token)) ||
!sscanf(token, "%d", TestAmbigPartSize) || TestAmbigPartSize <= 0) {
if (global_ambigs_debug_level) tprintf(kIllegalMsg, line_num);
return false;
}
if (*TestAmbigPartSize > MAX_AMBIG_SIZE) {
tprintf("Too many unichars in ambiguity on line %d\n");
return false;
}
for (i = 0; i < *TestAmbigPartSize; ++i) {
if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token))) break;
if (!unicharset.contains_unichar(token)) {
if (global_ambigs_debug_level) tprintf(kIllegalUnicharMsg, token);
break;
}
TestUnicharIds[i] = unicharset.unichar_to_id(token);
}
TestUnicharIds[i] = INVALID_UNICHAR_ID;
if (i != *TestAmbigPartSize ||
!(token = strtok_r(NULL, kAmbigDelimiters, &next_token)) ||
!sscanf(token, "%d", ReplacementAmbigPartSize) ||
*ReplacementAmbigPartSize <= 0) {
if (global_ambigs_debug_level) tprintf(kIllegalMsg, line_num);
return false;
}
if (*ReplacementAmbigPartSize > MAX_AMBIG_SIZE) {
tprintf("Too many unichars in ambiguity on line %d\n");
return false;
}
ReplacementString[0] = '\0';
for (i = 0; i < *ReplacementAmbigPartSize; ++i) {
if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token))) break;
strcat(ReplacementString, token);
if (!unicharset.contains_unichar(token)) {
if (global_ambigs_debug_level) tprintf(kIllegalUnicharMsg, token);
break;
}
}
if (i != *ReplacementAmbigPartSize) {
if (global_ambigs_debug_level) tprintf(kIllegalMsg, line_num);
return false;
}
if (version > 0) {
// The next field being true indicates that the abiguity should
// always be substituted (e.g. '' should always be changed to ").
// For such "certain" n -> m ambigs tesseract will insert character
// fragments for the n pieces in the unicharset. AmbigsFound()
// will then replace the incorrect ngram with the character
// fragments of the correct character (or ngram if m > 1).
// Note that if m > 1, an ngram will be inserted into the
// modified word, not the individual unigrams. Tesseract
// has limited support for ngram unichar (e.g. dawg permuter).
if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token)) ||
!sscanf(token, "%d", type)) {
if (global_ambigs_debug_level) tprintf(kIllegalMsg, line_num);
return false;
}
}
return true;
}