本文整理汇总了C++中UNICHARSET::encode_string方法的典型用法代码示例。如果您正苦于以下问题:C++ UNICHARSET::encode_string方法的具体用法?C++ UNICHARSET::encode_string怎么用?C++ UNICHARSET::encode_string使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类UNICHARSET
的用法示例。
在下文中一共展示了UNICHARSET::encode_string方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1:
/**
* WERD_CHOICE::WERD_CHOICE
*
* Constructor to build a WERD_CHOICE from the given string.
* The function assumes that src_string is not NULL.
*/
WERD_CHOICE::WERD_CHOICE(const char *src_string,
const UNICHARSET &unicharset)
: unicharset_(&unicharset){
GenericVector<UNICHAR_ID> encoding;
GenericVector<char> lengths;
if (unicharset.encode_string(src_string, true, &encoding, &lengths, NULL)) {
lengths.push_back('\0');
STRING src_lengths = &lengths[0];
this->init(src_string, src_lengths.string(), 0.0, 0.0, NO_PERM);
} else { // There must have been an invalid unichar in the string.
this->init(8);
this->make_bad();
}
}
示例2: ParseAmbiguityLine
bool UnicharAmbigs::ParseAmbiguityLine(
int line_num, int version, int debug_level, const UNICHARSET &unicharset,
char *buffer, int *test_ambig_part_size, UNICHAR_ID *test_unichar_ids,
int *replacement_ambig_part_size, char *replacement_string, int *type) {
if (version > 1) {
// Simpler format is just wrong-string correct-string type\n.
STRING input(buffer);
GenericVector<STRING> fields;
input.split(' ', &fields);
if (fields.size() != 3) {
if (debug_level) tprintf(kIllegalMsg, line_num);
return false;
}
// Encode wrong-string.
GenericVector<UNICHAR_ID> unichars;
if (!unicharset.encode_string(fields[0].string(), true, &unichars, NULL,
NULL)) {
return false;
}
*test_ambig_part_size = unichars.size();
if (*test_ambig_part_size > MAX_AMBIG_SIZE) {
if (debug_level)
tprintf("Too many unichars in ambiguity on line %d\n", line_num);
return false;
}
// Copy encoded string to output.
for (int i = 0; i < unichars.size(); ++i)
test_unichar_ids[i] = unichars[i];
test_unichar_ids[unichars.size()] = INVALID_UNICHAR_ID;
// Encode replacement-string to check validity.
if (!unicharset.encode_string(fields[1].string(), true, &unichars, NULL,
NULL)) {
return false;
}
*replacement_ambig_part_size = unichars.size();
if (*replacement_ambig_part_size > MAX_AMBIG_SIZE) {
if (debug_level)
tprintf("Too many unichars in ambiguity on line %d\n", line_num);
return false;
}
if (sscanf(fields[2].string(), "%d", type) != 1) {
if (debug_level) tprintf(kIllegalMsg, line_num);
return false;
}
snprintf(replacement_string, kMaxAmbigStringSize, "%s", fields[1].string());
return true;
}
int i;
char *token;
char *next_token;
if (!(token = strtok_r(buffer, kAmbigDelimiters, &next_token)) ||
!sscanf(token, "%d", test_ambig_part_size) ||
*test_ambig_part_size <= 0) {
if (debug_level) tprintf(kIllegalMsg, line_num);
return false;
}
if (*test_ambig_part_size > MAX_AMBIG_SIZE) {
if (debug_level)
tprintf("Too many unichars in ambiguity on line %d\n", line_num);
return false;
}
for (i = 0; i < *test_ambig_part_size; ++i) {
if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token))) break;
if (!unicharset.contains_unichar(token)) {
if (debug_level) tprintf(kIllegalUnicharMsg, token);
break;
}
test_unichar_ids[i] = unicharset.unichar_to_id(token);
}
test_unichar_ids[i] = INVALID_UNICHAR_ID;
if (i != *test_ambig_part_size ||
!(token = strtok_r(NULL, kAmbigDelimiters, &next_token)) ||
!sscanf(token, "%d", replacement_ambig_part_size) ||
*replacement_ambig_part_size <= 0) {
if (debug_level) tprintf(kIllegalMsg, line_num);
return false;
}
if (*replacement_ambig_part_size > MAX_AMBIG_SIZE) {
if (debug_level)
tprintf("Too many unichars in ambiguity on line %d\n", line_num);
return false;
}
replacement_string[0] = '\0';
for (i = 0; i < *replacement_ambig_part_size; ++i) {
if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token))) break;
strcat(replacement_string, token);
if (!unicharset.contains_unichar(token)) {
if (debug_level) tprintf(kIllegalUnicharMsg, token);
break;
}
}
if (i != *replacement_ambig_part_size) {
if (debug_level) tprintf(kIllegalMsg, line_num);
return false;
}
if (version > 0) {
// The next field being true indicates that the abiguity should
// always be substituted (e.g. '' should always be changed to ").
// For such "certain" n -> m ambigs tesseract will insert character
//.........这里部分代码省略.........