本文整理汇总了C++中WERD::CleanNoise方法的典型用法代码示例。如果您正苦于以下问题:C++ WERD::CleanNoise方法的具体用法?C++ WERD::CleanNoise怎么用?C++ WERD::CleanNoise使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类WERD
的用法示例。
在下文中一共展示了WERD::CleanNoise方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: clean_noise_from_words
//.........这里部分代码省略.........
int32_t trans_threshold; //noise tolerance
int32_t dot_count; //small objects
int32_t norm_count; //normal objects
int32_t dud_words; //number discarded
int32_t ok_words; //number remaining
int32_t word_index; //current word
//words of row
WERD_IT word_it = row->word_list ();
C_BLOB_IT blob_it; //blob iterator
C_OUTLINE_IT out_it; //outline iterator
ok_words = word_it.length ();
if (ok_words == 0 || textord_no_rejects)
return;
// was it chucked
std::vector<int8_t> word_dud(ok_words);
dud_words = 0;
ok_words = 0;
word_index = 0;
for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
word = word_it.data (); //current word
dot_count = 0;
norm_count = 0;
//blobs in word
blob_it.set_to_list (word->cblob_list ());
for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
blob_it.forward ()) {
blob = blob_it.data ();
if (!word->flag (W_DONT_CHOP)) {
//get outlines
out_it.set_to_list (blob->out_list ());
for (out_it.mark_cycle_pt (); !out_it.cycled_list ();
out_it.forward ()) {
outline = out_it.data ();
blob_box = outline->bounding_box ();
blob_size =
blob_box.width () >
blob_box.height ()? blob_box.width () : blob_box.
height();
if (blob_size < textord_noise_sizelimit * row->x_height ())
dot_count++; //count smal outlines
if (!outline->child ()->empty ()
&& blob_box.height () <
(1 + textord_noise_syfract) * row->x_height ()
&& blob_box.height () >
(1 - textord_noise_syfract) * row->x_height ()
&& blob_box.width () <
(1 + textord_noise_sxfract) * row->x_height ()
&& blob_box.width () >
(1 - textord_noise_sxfract) * row->x_height ())
norm_count++; //count smal outlines
}
}
else
norm_count++;
blob_box = blob->bounding_box ();
blob_size =
blob_box.width () >
blob_box.height ()? blob_box.width () : blob_box.height ();
if (blob_size >= textord_noise_sizelimit * row->x_height ()
&& blob_size < row->x_height () * 2) {
trans_threshold = blob_size / textord_noise_sizefraction;
trans_count = blob->count_transitions (trans_threshold);
if (trans_count < textord_noise_translimit)
norm_count++;
}
else if (blob_box.height () > row->x_height () * 2
&& (!word_it.at_first () || !blob_it.at_first ()))
dot_count += 2;
}
if (dot_count > 2 && !word->flag(W_REP_CHAR)) {
if (dot_count > norm_count * textord_noise_normratio * 2)
word_dud[word_index] = 2;
else if (dot_count > norm_count * textord_noise_normratio)
word_dud[word_index] = 1;
else
word_dud[word_index] = 0;
} else {
word_dud[word_index] = 0;
}
if (word_dud[word_index] == 2)
dud_words++;
else
ok_words++;
word_index++;
}
word_index = 0;
for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
if (word_dud[word_index] == 2
|| (word_dud[word_index] == 1 && dud_words > ok_words)) {
word = word_it.data(); // Current word.
// Previously we threw away the entire word.
// Now just aggressively throw all small blobs into the reject list, where
// the classifier can decide whether they are actually needed.
word->CleanNoise(textord_noise_sizelimit * row->x_height());
}
word_index++;
}
}