本文整理汇总了C++中UNICHARSET::unichar_to_id方法的典型用法代码示例。如果您正苦于以下问题:C++ UNICHARSET::unichar_to_id方法的具体用法?C++ UNICHARSET::unichar_to_id怎么用?C++ UNICHARSET::unichar_to_id使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类UNICHARSET
的用法示例。
在下文中一共展示了UNICHARSET::unichar_to_id方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: wc_to_unichar_id
UNICHAR_ID wc_to_unichar_id(const UNICHARSET &unicharset, int wc) {
UNICHAR uch(wc);
char *unichar = uch.utf8_str();
UNICHAR_ID unichar_id = unicharset.unichar_to_id(unichar);
delete[] unichar;
return unichar_id;
}
示例2: init
/**
* WERD_CHOICE::init
*
* Helper function to build a WERD_CHOICE from the given string,
* fragment lengths, rating, certainty and permuter.
*
* The function assumes that src_string is not NULL.
* src_lengths argument could be NULL, in which case the unichars
* in src_string are assumed to all be of length 1.
*/
void WERD_CHOICE::init(const char *src_string,
const char *src_lengths,
float src_rating,
float src_certainty,
uinT8 src_permuter,
const UNICHARSET &unicharset) {
int src_string_len = strlen(src_string);
if (src_string_len == 0) {
this->init(8);
} else {
this->init(src_lengths ? strlen(src_lengths): src_string_len);
length_ = reserved_;
int offset = 0;
for (int i = 0; i < length_; ++i) {
int unichar_length = src_lengths ? src_lengths[i] : 1;
unichar_ids_[i] =
unicharset.unichar_to_id(src_string+offset, unichar_length);
fragment_lengths_[i] = 1;
offset += unichar_length;
}
}
rating_ = src_rating;
certainty_ = src_certainty;
permuter_ = src_permuter;
}
示例3: check_for_words
int Dawg::check_for_words(const char *filename,
const UNICHARSET &unicharset,
bool enable_wildcard) const {
if (filename == nullptr) return 0;
FILE *word_file;
char string [CHARS_PER_LINE];
int misses = 0;
UNICHAR_ID wildcard = unicharset.unichar_to_id(kWildcard);
word_file = fopen(filename, "r");
if (word_file == nullptr) {
tprintf("Error: Could not open file %s\n", filename);
ASSERT_HOST(word_file);
}
while (fgets (string, CHARS_PER_LINE, word_file) != nullptr) {
chomp_string(string); // remove newline
WERD_CHOICE word(string, unicharset);
if (word.length() > 0 &&
!word.contains_unichar_id(INVALID_UNICHAR_ID)) {
if (!match_words(&word, 0, 0,
enable_wildcard ? wildcard : INVALID_UNICHAR_ID)) {
tprintf("Missing word: %s\n", string);
++misses;
}
} else {
tprintf("Failed to create a valid word from %s\n", string);
}
}
fclose (word_file);
// Make sure the user sees this with fprintf instead of tprintf.
if (debug_level_) tprintf("Number of lost words=%d\n", misses);
return misses;
}
示例4: ParseAmbiguityLine
bool UnicharAmbigs::ParseAmbiguityLine(
int line_num, int version, int debug_level, const UNICHARSET &unicharset,
char *buffer, int *test_ambig_part_size, UNICHAR_ID *test_unichar_ids,
int *replacement_ambig_part_size, char *replacement_string, int *type) {
if (version > 1) {
// Simpler format is just wrong-string correct-string type\n.
STRING input(buffer);
GenericVector<STRING> fields;
input.split(' ', &fields);
if (fields.size() != 3) {
if (debug_level) tprintf(kIllegalMsg, line_num);
return false;
}
// Encode wrong-string.
GenericVector<UNICHAR_ID> unichars;
if (!unicharset.encode_string(fields[0].string(), true, &unichars, NULL,
NULL)) {
return false;
}
*test_ambig_part_size = unichars.size();
if (*test_ambig_part_size > MAX_AMBIG_SIZE) {
if (debug_level)
tprintf("Too many unichars in ambiguity on line %d\n", line_num);
return false;
}
// Copy encoded string to output.
for (int i = 0; i < unichars.size(); ++i)
test_unichar_ids[i] = unichars[i];
test_unichar_ids[unichars.size()] = INVALID_UNICHAR_ID;
// Encode replacement-string to check validity.
if (!unicharset.encode_string(fields[1].string(), true, &unichars, NULL,
NULL)) {
return false;
}
*replacement_ambig_part_size = unichars.size();
if (*replacement_ambig_part_size > MAX_AMBIG_SIZE) {
if (debug_level)
tprintf("Too many unichars in ambiguity on line %d\n", line_num);
return false;
}
if (sscanf(fields[2].string(), "%d", type) != 1) {
if (debug_level) tprintf(kIllegalMsg, line_num);
return false;
}
snprintf(replacement_string, kMaxAmbigStringSize, "%s", fields[1].string());
return true;
}
int i;
char *token;
char *next_token;
if (!(token = strtok_r(buffer, kAmbigDelimiters, &next_token)) ||
!sscanf(token, "%d", test_ambig_part_size) ||
*test_ambig_part_size <= 0) {
if (debug_level) tprintf(kIllegalMsg, line_num);
return false;
}
if (*test_ambig_part_size > MAX_AMBIG_SIZE) {
if (debug_level)
tprintf("Too many unichars in ambiguity on line %d\n", line_num);
return false;
}
for (i = 0; i < *test_ambig_part_size; ++i) {
if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token))) break;
if (!unicharset.contains_unichar(token)) {
if (debug_level) tprintf(kIllegalUnicharMsg, token);
break;
}
test_unichar_ids[i] = unicharset.unichar_to_id(token);
}
test_unichar_ids[i] = INVALID_UNICHAR_ID;
if (i != *test_ambig_part_size ||
!(token = strtok_r(NULL, kAmbigDelimiters, &next_token)) ||
!sscanf(token, "%d", replacement_ambig_part_size) ||
*replacement_ambig_part_size <= 0) {
if (debug_level) tprintf(kIllegalMsg, line_num);
return false;
}
if (*replacement_ambig_part_size > MAX_AMBIG_SIZE) {
if (debug_level)
tprintf("Too many unichars in ambiguity on line %d\n", line_num);
return false;
}
replacement_string[0] = '\0';
for (i = 0; i < *replacement_ambig_part_size; ++i) {
if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token))) break;
strcat(replacement_string, token);
if (!unicharset.contains_unichar(token)) {
if (debug_level) tprintf(kIllegalUnicharMsg, token);
break;
}
}
if (i != *replacement_ambig_part_size) {
if (debug_level) tprintf(kIllegalMsg, line_num);
return false;
}
if (version > 0) {
// The next field being true indicates that the abiguity should
// always be substituted (e.g. '' should always be changed to ").
// For such "certain" n -> m ambigs tesseract will insert character
//.........这里部分代码省略.........
示例5: ParseAmbiguityLine
bool UnicharAmbigs::ParseAmbiguityLine(
int line_num, int version, const UNICHARSET &unicharset,
char *buffer, int *TestAmbigPartSize, UNICHAR_ID *TestUnicharIds,
int *ReplacementAmbigPartSize, char *ReplacementString, int *type) {
int i;
char *token;
char *next_token;
if (!(token = strtok_r(buffer, kAmbigDelimiters, &next_token)) ||
!sscanf(token, "%d", TestAmbigPartSize) || TestAmbigPartSize <= 0) {
if (global_ambigs_debug_level) tprintf(kIllegalMsg, line_num);
return false;
}
if (*TestAmbigPartSize > MAX_AMBIG_SIZE) {
tprintf("Too many unichars in ambiguity on line %d\n");
return false;
}
for (i = 0; i < *TestAmbigPartSize; ++i) {
if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token))) break;
if (!unicharset.contains_unichar(token)) {
if (global_ambigs_debug_level) tprintf(kIllegalUnicharMsg, token);
break;
}
TestUnicharIds[i] = unicharset.unichar_to_id(token);
}
TestUnicharIds[i] = INVALID_UNICHAR_ID;
if (i != *TestAmbigPartSize ||
!(token = strtok_r(NULL, kAmbigDelimiters, &next_token)) ||
!sscanf(token, "%d", ReplacementAmbigPartSize) ||
*ReplacementAmbigPartSize <= 0) {
if (global_ambigs_debug_level) tprintf(kIllegalMsg, line_num);
return false;
}
if (*ReplacementAmbigPartSize > MAX_AMBIG_SIZE) {
tprintf("Too many unichars in ambiguity on line %d\n");
return false;
}
ReplacementString[0] = '\0';
for (i = 0; i < *ReplacementAmbigPartSize; ++i) {
if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token))) break;
strcat(ReplacementString, token);
if (!unicharset.contains_unichar(token)) {
if (global_ambigs_debug_level) tprintf(kIllegalUnicharMsg, token);
break;
}
}
if (i != *ReplacementAmbigPartSize) {
if (global_ambigs_debug_level) tprintf(kIllegalMsg, line_num);
return false;
}
if (version > 0) {
// The next field being true indicates that the abiguity should
// always be substituted (e.g. '' should always be changed to ").
// For such "certain" n -> m ambigs tesseract will insert character
// fragments for the n pieces in the unicharset. AmbigsFound()
// will then replace the incorrect ngram with the character
// fragments of the correct character (or ngram if m > 1).
// Note that if m > 1, an ngram will be inserted into the
// modified word, not the individual unigrams. Tesseract
// has limited support for ngram unichar (e.g. dawg permuter).
if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token)) ||
!sscanf(token, "%d", type)) {
if (global_ambigs_debug_level) tprintf(kIllegalMsg, line_num);
return false;
}
}
return true;
}
示例6: SetUpForFloat2Int
/** SetUpForFloat2Int **************************************************/
void SetUpForFloat2Int(
LIST LabeledClassList)
{
MERGE_CLASS MergeClass;
CLASS_TYPE Class;
int NumProtos;
int NumConfigs;
int NumWords;
int i, j;
float Values[3];
PROTO NewProto;
PROTO OldProto;
BIT_VECTOR NewConfig;
BIT_VECTOR OldConfig;
// printf("Float2Int ...\n");
iterate(LabeledClassList)
{
UnicityTableEqEq<int> font_set;
MergeClass = (MERGE_CLASS) first_node (LabeledClassList);
Class = &TrainingData[unicharset_training.unichar_to_id(
MergeClass->Label)];
NumProtos = MergeClass->Class->NumProtos;
NumConfigs = MergeClass->Class->NumConfigs;
font_set.move(&MergeClass->Class->font_set);
Class->NumProtos = NumProtos;
Class->MaxNumProtos = NumProtos;
Class->Prototypes = (PROTO) Emalloc (sizeof(PROTO_STRUCT) * NumProtos);
for(i=0; i < NumProtos; i++)
{
NewProto = ProtoIn(Class, i);
OldProto = ProtoIn(MergeClass->Class, i);
Values[0] = OldProto->X;
Values[1] = OldProto->Y;
Values[2] = OldProto->Angle;
Normalize(Values);
NewProto->X = OldProto->X;
NewProto->Y = OldProto->Y;
NewProto->Length = OldProto->Length;
NewProto->Angle = OldProto->Angle;
NewProto->A = Values[0];
NewProto->B = Values[1];
NewProto->C = Values[2];
}
Class->NumConfigs = NumConfigs;
Class->MaxNumConfigs = NumConfigs;
Class->font_set.move(&font_set);
Class->Configurations = (BIT_VECTOR*) Emalloc (sizeof(BIT_VECTOR) * NumConfigs);
NumWords = WordsInVectorOfSize(NumProtos);
for(i=0; i < NumConfigs; i++)
{
NewConfig = NewBitVector(NumProtos);
OldConfig = MergeClass->Class->Configurations[i];
for(j=0; j < NumWords; j++)
NewConfig[j] = OldConfig[j];
Class->Configurations[i] = NewConfig;
}
}
} // SetUpForFloat2Int