本文整理匯總了C++中ASSERT_HOST函數的典型用法代碼示例。如果您正苦於以下問題:C++ ASSERT_HOST函數的具體用法?C++ ASSERT_HOST怎麽用?C++ ASSERT_HOST使用的例子?那麽, 這裏精選的函數代碼示例或許可以為您提供幫助。
在下文中一共展示了ASSERT_HOST函數的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的C++代碼示例。
示例1: ASSERT_HOST
// Returns a random number in [-range, range].
double Network::Random(double range) {
ASSERT_HOST(randomizer_ != NULL);
return randomizer_->SignedRand(range);
}
示例2: ASSERT_HOST
void Dict::ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size,
UNICHAR_ID correct_ngram_id, WERD_CHOICE *werd_choice,
MATRIX *ratings) {
int num_blobs_to_replace = 0;
int begin_blob_index = 0;
int i;
// Rating and certainty for the new BLOB_CHOICE are derived from the
// replaced choices.
float new_rating = 0.0f;
float new_certainty = 0.0f;
BLOB_CHOICE* old_choice = nullptr;
for (i = 0; i < wrong_ngram_begin_index + wrong_ngram_size; ++i) {
if (i >= wrong_ngram_begin_index) {
int num_blobs = werd_choice->state(i);
int col = begin_blob_index + num_blobs_to_replace;
int row = col + num_blobs - 1;
BLOB_CHOICE_LIST* choices = ratings->get(col, row);
ASSERT_HOST(choices != nullptr);
old_choice = FindMatchingChoice(werd_choice->unichar_id(i), choices);
ASSERT_HOST(old_choice != nullptr);
new_rating += old_choice->rating();
new_certainty += old_choice->certainty();
num_blobs_to_replace += num_blobs;
} else {
begin_blob_index += werd_choice->state(i);
}
}
new_certainty /= wrong_ngram_size;
// If there is no entry in the ratings matrix, add it.
MATRIX_COORD coord(begin_blob_index,
begin_blob_index + num_blobs_to_replace - 1);
if (!coord.Valid(*ratings)) {
ratings->IncreaseBandSize(coord.row - coord.col + 1);
}
if (ratings->get(coord.col, coord.row) == nullptr)
ratings->put(coord.col, coord.row, new BLOB_CHOICE_LIST);
BLOB_CHOICE_LIST* new_choices = ratings->get(coord.col, coord.row);
BLOB_CHOICE* choice = FindMatchingChoice(correct_ngram_id, new_choices);
if (choice != nullptr) {
// Already there. Upgrade if new rating better.
if (new_rating < choice->rating())
choice->set_rating(new_rating);
if (new_certainty < choice->certainty())
choice->set_certainty(new_certainty);
// DO NOT SORT!! It will mess up the iterator in LanguageModel::UpdateState.
} else {
// Need a new choice with the correct_ngram_id.
choice = new BLOB_CHOICE(*old_choice);
choice->set_unichar_id(correct_ngram_id);
choice->set_rating(new_rating);
choice->set_certainty(new_certainty);
choice->set_classifier(BCC_AMBIG);
choice->set_matrix_cell(coord.col, coord.row);
BLOB_CHOICE_IT it (new_choices);
it.add_to_end(choice);
}
// Remove current unichar from werd_choice. On the last iteration
// set the correct replacement unichar instead of removing a unichar.
for (int replaced_count = 0; replaced_count < wrong_ngram_size;
++replaced_count) {
if (replaced_count + 1 == wrong_ngram_size) {
werd_choice->set_blob_choice(wrong_ngram_begin_index,
num_blobs_to_replace, choice);
} else {
werd_choice->remove_unichar_id(wrong_ngram_begin_index + 1);
}
}
if (stopper_debug_level >= 1) {
werd_choice->print("ReplaceAmbig() ");
tprintf("Modified blob_choices: ");
print_ratings_list("\n", new_choices, getUnicharset());
}
}
示例3: worst_noise_blob
inT16 worst_noise_blob(WERD_RES *word_res, float *worst_noise_score) {
PBLOB_IT blob_it;
inT16 blob_count;
float noise_score[512];
int i;
int min_noise_blob; //1st contender
int max_noise_blob; //last contender
int non_noise_count;
int worst_noise_blob; //Worst blob
float small_limit = bln_x_height * fixsp_small_outlines_size;
float non_noise_limit = bln_x_height * 0.8;
blob_it.set_to_list (word_res->outword->blob_list ());
//normalised
blob_count = blob_it.length ();
ASSERT_HOST (blob_count <= 512);
if (blob_count < 5)
return -1; //too short to split
/* Get the noise scores for all blobs */
#ifndef SECURE_NAMES
if (debug_fix_space_level > 5)
tprintf ("FP fixspace Noise metrics for \"%s\": ",
word_res->best_choice->string ().string ());
#endif
for (i = 0; i < blob_count; i++, blob_it.forward ()) {
if (word_res->reject_map[i].accepted ())
noise_score[i] = non_noise_limit;
else
noise_score[i] = blob_noise_score (blob_it.data ());
if (debug_fix_space_level > 5)
tprintf ("%1.1f ", noise_score[i]);
}
if (debug_fix_space_level > 5)
tprintf ("\n");
/* Now find the worst one which is far enough away from the end of the word */
non_noise_count = 0;
for (i = 0;
(i < blob_count) && (non_noise_count < fixsp_non_noise_limit); i++) {
if (noise_score[i] >= non_noise_limit)
non_noise_count++;
}
if (non_noise_count < fixsp_non_noise_limit)
return -1;
min_noise_blob = i;
non_noise_count = 0;
for (i = blob_count - 1;
(i >= 0) && (non_noise_count < fixsp_non_noise_limit); i--) {
if (noise_score[i] >= non_noise_limit)
non_noise_count++;
}
if (non_noise_count < fixsp_non_noise_limit)
return -1;
max_noise_blob = i;
if (min_noise_blob > max_noise_blob)
return -1;
*worst_noise_score = small_limit;
worst_noise_blob = -1;
for (i = min_noise_blob; i <= max_noise_blob; i++) {
if (noise_score[i] < *worst_noise_score) {
worst_noise_blob = i;
*worst_noise_score = noise_score[i];
}
}
return worst_noise_blob;
}
示例4: ASSERT_HOST
// Top-level method to perform splitting based on current settings.
// Returns true if a split was actually performed.
// split_for_pageseg should be true if the splitting is being done prior to
// page segmentation. This mode uses the flag
// pageseg_devanagari_split_strategy to determine the splitting strategy.
bool ShiroRekhaSplitter::Split(bool split_for_pageseg) {
SplitStrategy split_strategy = split_for_pageseg ? pageseg_split_strategy_ :
ocr_split_strategy_;
if (split_strategy == NO_SPLIT) {
return false; // Nothing to do.
}
ASSERT_HOST(split_strategy == MINIMAL_SPLIT ||
split_strategy == MAXIMAL_SPLIT);
ASSERT_HOST(orig_pix_);
if (devanagari_split_debuglevel > 0) {
tprintf("Splitting shiro-rekha ...\n");
tprintf("Split strategy = %s\n",
split_strategy == MINIMAL_SPLIT ? "Minimal" : "Maximal");
tprintf("Initial pageseg available = %s\n",
segmentation_block_list_ ? "yes" : "no");
}
// Create a copy of original image to store the splitting output.
pixDestroy(&splitted_image_);
splitted_image_ = pixCopy(NULL, orig_pix_);
// Initialize debug image if required.
if (devanagari_split_debugimage) {
pixDestroy(&debug_image_);
debug_image_ = pixConvertTo32(orig_pix_);
}
// Determine all connected components in the input image. A close operation
// may be required prior to this, depending on the current settings.
Pix* pix_for_ccs = pixClone(orig_pix_);
if (perform_close_ && global_xheight_ != kUnspecifiedXheight &&
!segmentation_block_list_) {
if (devanagari_split_debuglevel > 0) {
tprintf("Performing a global close operation..\n");
}
// A global measure is available for xheight, but no local information
// exists.
pixDestroy(&pix_for_ccs);
pix_for_ccs = pixCopy(NULL, orig_pix_);
PerformClose(pix_for_ccs, global_xheight_);
}
Pixa* ccs;
Boxa* tmp_boxa = pixConnComp(pix_for_ccs, &ccs, 8);
boxaDestroy(&tmp_boxa);
pixDestroy(&pix_for_ccs);
// Iterate over all connected components. Get their bounding boxes and clip
// out the image regions corresponding to these boxes from the original image.
// Conditionally run splitting on each of them.
Boxa* regions_to_clear = boxaCreate(0);
for (int i = 0; i < pixaGetCount(ccs); ++i) {
Box* box = ccs->boxa->box[i];
Pix* word_pix = pixClipRectangle(orig_pix_, box, NULL);
ASSERT_HOST(word_pix);
int xheight = GetXheightForCC(box);
if (xheight == kUnspecifiedXheight && segmentation_block_list_ &&
devanagari_split_debugimage) {
pixRenderBoxArb(debug_image_, box, 1, 255, 0, 0);
}
// If some xheight measure is available, attempt to pre-eliminate small
// blobs from the shiro-rekha process. This is primarily to save the CCs
// corresponding to punctuation marks/small dots etc which are part of
// larger graphemes.
if (xheight == kUnspecifiedXheight ||
(box->w > xheight / 3 && box->h > xheight / 2)) {
SplitWordShiroRekha(split_strategy, word_pix, xheight,
box->x, box->y, regions_to_clear);
} else if (devanagari_split_debuglevel > 0) {
tprintf("CC dropped from splitting: %d,%d (%d, %d)\n",
box->x, box->y, box->w, box->h);
}
pixDestroy(&word_pix);
}
// Actually clear the boxes now.
for (int i = 0; i < boxaGetCount(regions_to_clear); ++i) {
Box* box = boxaGetBox(regions_to_clear, i, L_CLONE);
pixClearInRect(splitted_image_, box);
boxDestroy(&box);
}
boxaDestroy(®ions_to_clear);
pixaDestroy(&ccs);
if (devanagari_split_debugimage) {
DumpDebugImage(split_for_pageseg ? "pageseg_split_debug.png" :
"ocr_split_debug.png");
}
return true;
}
示例5: part_it
// Attempt to improve this by adding partitions or expanding partitions.
void ColPartitionSet::ImproveColumnCandidate(WidthCallback* cb,
PartSetVector* src_sets) {
int set_size = src_sets->size();
// Iterate over the provided column sets, as each one may have something
// to improve this.
for (int i = 0; i < set_size; ++i) {
ColPartitionSet* column_set = src_sets->get(i);
if (column_set == NULL)
continue;
// Iterate over the parts in this and column_set, adding bigger or
// new parts in column_set to this.
ColPartition_IT part_it(&parts_);
ASSERT_HOST(!part_it.empty());
int prev_right = MIN_INT32;
part_it.mark_cycle_pt();
ColPartition_IT col_it(&column_set->parts_);
for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) {
ColPartition* col_part = col_it.data();
if (col_part->blob_type() < BRT_UNKNOWN)
continue; // Ignore image partitions.
int col_left = col_part->left_key();
int col_right = col_part->right_key();
// Sync-up part_it (in this) so it matches the col_part in column_set.
ColPartition* part = part_it.data();
while (!part_it.at_last() && part->right_key() < col_left) {
prev_right = part->right_key();
part_it.forward();
part = part_it.data();
}
int part_left = part->left_key();
int part_right = part->right_key();
if (part_right < col_left || col_right < part_left) {
// There is no overlap so this is a new partition.
AddPartition(col_part->ShallowCopy(), &part_it);
continue;
}
// Check the edges of col_part to see if they can improve part.
bool part_width_ok = cb->Run(part->KeyWidth(part_left, part_right));
if (col_left < part_left && col_left > prev_right) {
// The left edge of the column is better and it doesn't overlap,
// so we can potentially expand it.
int col_box_left = col_part->BoxLeftKey();
bool tab_width_ok = cb->Run(part->KeyWidth(col_left, part_right));
bool box_width_ok = cb->Run(part->KeyWidth(col_box_left, part_right));
if (tab_width_ok || (!part_width_ok )) {
// The tab is leaving the good column metric at least as good as
// it was before, so use the tab.
part->CopyLeftTab(*col_part, false);
part->SetColumnGoodness(cb);
} else if (col_box_left < part_left &&
(box_width_ok || !part_width_ok)) {
// The box is leaving the good column metric at least as good as
// it was before, so use the box.
part->CopyLeftTab(*col_part, true);
part->SetColumnGoodness(cb);
}
part_left = part->left_key();
}
if (col_right > part_right &&
(part_it.at_last() ||
part_it.data_relative(1)->left_key() > col_right)) {
// The right edge is better, so we can possibly expand it.
int col_box_right = col_part->BoxRightKey();
bool tab_width_ok = cb->Run(part->KeyWidth(part_left, col_right));
bool box_width_ok = cb->Run(part->KeyWidth(part_left, col_box_right));
if (tab_width_ok || (!part_width_ok )) {
// The tab is leaving the good column metric at least as good as
// it was before, so use the tab.
part->CopyRightTab(*col_part, false);
part->SetColumnGoodness(cb);
} else if (col_box_right > part_right &&
(box_width_ok || !part_width_ok)) {
// The box is leaving the good column metric at least as good as
// it was before, so use the box.
part->CopyRightTab(*col_part, true);
part->SetColumnGoodness(cb);
}
}
}
}
ComputeCoverage();
}
示例6: bleft
/**
* Sets up auto page segmentation, determines the orientation, and corrects it.
* Somewhat arbitrary chunk of functionality, factored out of AutoPageSeg to
* facilitate testing.
* photo_mask_pix is a pointer to a NULL pointer that will be filled on return
* with the leptonica photo mask, which must be pixDestroyed by the caller.
* to_blocks is an empty list that will be filled with (usually a single)
* block that is used during layout analysis. This ugly API is required
* because of the possibility of a unlv zone file.
* TODO(rays) clean this up.
* See AutoPageSeg for other arguments.
* The returned ColumnFinder must be deleted after use.
*/
ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
PageSegMode pageseg_mode, BLOCK_LIST* blocks, Tesseract* osd_tess,
OSResults* osr, TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix,
Pix** music_mask_pix) {
int vertical_x = 0;
int vertical_y = 1;
TabVector_LIST v_lines;
TabVector_LIST h_lines;
ICOORD bleft(0, 0);
ASSERT_HOST(pix_binary_ != NULL);
if (tessedit_dump_pageseg_images) {
pixa_debug_.AddPix(pix_binary_, "PageSegInput");
}
// Leptonica is used to find the rule/separator lines in the input.
LineFinder::FindAndRemoveLines(source_resolution_,
textord_tabfind_show_vlines, pix_binary_,
&vertical_x, &vertical_y, music_mask_pix,
&v_lines, &h_lines);
if (tessedit_dump_pageseg_images) {
pixa_debug_.AddPix(pix_binary_, "NoLines");
}
// Leptonica is used to find a mask of the photo regions in the input.
*photo_mask_pix = ImageFind::FindImages(pix_binary_, &pixa_debug_);
if (tessedit_dump_pageseg_images) {
pixa_debug_.AddPix(pix_binary_, "NoImages");
}
if (!PSM_COL_FIND_ENABLED(pageseg_mode)) v_lines.clear();
// The rest of the algorithm uses the usual connected components.
textord_.find_components(pix_binary_, blocks, to_blocks);
TO_BLOCK_IT to_block_it(to_blocks);
// There must be exactly one input block.
// TODO(rays) handle new textline finding with a UNLV zone file.
ASSERT_HOST(to_blocks->singleton());
TO_BLOCK* to_block = to_block_it.data();
TBOX blkbox = to_block->block->bounding_box();
ColumnFinder* finder = NULL;
int estimated_resolution = source_resolution_;
if (source_resolution_ == kMinCredibleResolution) {
// Try to estimate resolution from typical body text size.
int res = IntCastRounded(to_block->line_size * kResolutionEstimationFactor);
if (res > estimated_resolution && res < kMaxCredibleResolution) {
estimated_resolution = res;
tprintf("Estimating resolution as %d\n", estimated_resolution);
}
}
if (to_block->line_size >= 2) {
finder = new ColumnFinder(static_cast<int>(to_block->line_size),
blkbox.botleft(), blkbox.topright(),
estimated_resolution, textord_use_cjk_fp_model,
textord_tabfind_aligned_gap_fraction, &v_lines,
&h_lines, vertical_x, vertical_y);
finder->SetupAndFilterNoise(pageseg_mode, *photo_mask_pix, to_block);
if (equ_detect_) {
equ_detect_->LabelSpecialText(to_block);
}
BLOBNBOX_CLIST osd_blobs;
// osd_orientation is the number of 90 degree rotations to make the
// characters upright. (See osdetect.h for precise definition.)
// We want the text lines horizontal, (vertical text indicates vertical
// textlines) which may conflict (eg vertically written CJK).
int osd_orientation = 0;
bool vertical_text = textord_tabfind_force_vertical_text ||
pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT;
if (!vertical_text && textord_tabfind_vertical_text &&
PSM_ORIENTATION_ENABLED(pageseg_mode)) {
vertical_text =
finder->IsVerticallyAlignedText(textord_tabfind_vertical_text_ratio,
to_block, &osd_blobs);
}
if (PSM_OSD_ENABLED(pageseg_mode) && osd_tess != NULL && osr != NULL) {
GenericVector<int> osd_scripts;
if (osd_tess != this) {
// We are running osd as part of layout analysis, so constrain the
// scripts to those allowed by *this.
AddAllScriptsConverted(unicharset, osd_tess->unicharset, &osd_scripts);
for (int s = 0; s < sub_langs_.size(); ++s) {
AddAllScriptsConverted(sub_langs_[s]->unicharset,
osd_tess->unicharset, &osd_scripts);
}
}
//.........這裏部分代碼省略.........
示例7: Emalloc
//.........這裏部分代碼省略.........
DoError (ILLEGALSAMPLECOUNT, "Illegal sample count");
Proto->NumSamples = SampleCount;
Proto->Mean = ReadNFloats (File, N, NULL);
if (Proto->Mean == NULL)
DoError (ILLEGALMEANSPEC, "Illegal prototype mean");
switch (Proto->Style) {
case spherical:
if (ReadNFloats (File, 1, &(Proto->Variance.Spherical)) == NULL)
DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");
Proto->Magnitude.Spherical =
1.0 / sqrt ((double) (2.0 * PI * Proto->Variance.Spherical));
Proto->TotalMagnitude =
pow (Proto->Magnitude.Spherical, (float) N);
Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical;
Proto->Distrib = NULL;
break;
case elliptical:
Proto->Variance.Elliptical = ReadNFloats (File, N, NULL);
if (Proto->Variance.Elliptical == NULL)
DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");
Proto->Magnitude.Elliptical =
(FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
Proto->Weight.Elliptical =
(FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
Proto->TotalMagnitude = 1.0;
for (i = 0; i < N; i++) {
Proto->Magnitude.Elliptical[i] =
1.0 /
sqrt ((double) (2.0 * PI * Proto->Variance.Elliptical[i]));
Proto->Weight.Elliptical[i] =
1.0 / Proto->Variance.Elliptical[i];
Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
}
Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
Proto->Distrib = NULL;
break;
case mixed:
Proto->Distrib =
(DISTRIBUTION *) Emalloc (N * sizeof (DISTRIBUTION));
for (i = 0; i < N; i++) {
if (tfscanf(File, "%s", Token) != 1)
DoError (ILLEGALDISTRIBUTION,
"Illegal prototype distribution");
switch (Token[0]) {
case 'n':
Proto->Distrib[i] = normal;
break;
case 'u':
Proto->Distrib[i] = uniform;
break;
case 'r':
Proto->Distrib[i] = D_random;
break;
default:
DoError (ILLEGALDISTRIBUTION,
"Illegal prototype distribution");
}
}
Proto->Variance.Elliptical = ReadNFloats (File, N, NULL);
if (Proto->Variance.Elliptical == NULL)
DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");
Proto->Magnitude.Elliptical =
(FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
Proto->Weight.Elliptical =
(FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
Proto->TotalMagnitude = 1.0;
for (i = 0; i < N; i++) {
switch (Proto->Distrib[i]) {
case normal:
Proto->Magnitude.Elliptical[i] = 1.0 /
sqrt ((double)
(2.0 * PI * Proto->Variance.Elliptical[i]));
Proto->Weight.Elliptical[i] =
1.0 / Proto->Variance.Elliptical[i];
break;
case uniform:
case D_random:
Proto->Magnitude.Elliptical[i] = 1.0 /
(2.0 * Proto->Variance.Elliptical[i]);
break;
case DISTRIBUTION_COUNT:
ASSERT_HOST(!"Distribution count not allowed!");
}
Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
}
Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
break;
}
return (Proto);
}
else if (Status == EOF)
return (NULL);
else {
DoError (ILLEGALSIGNIFICANCESPEC, "Illegal significance specification");
return (NULL);
}
}
示例8: bleft
/**
* Sets up auto page segmentation, determines the orientation, and corrects it.
* Somewhat arbitrary chunk of functionality, factored out of AutoPageSeg to
* facilitate testing.
* photo_mask_pix is a pointer to a NULL pointer that will be filled on return
* with the leptonica photo mask, which must be pixDestroyed by the caller.
* to_blocks is an empty list that will be filled with (usually a single)
* block that is used during layout analysis. This ugly API is required
* because of the possibility of a unlv zone file.
* TODO(rays) clean this up.
* See AutoPageSeg for other arguments.
* The returned ColumnFinder must be deleted after use.
*/
ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
bool single_column, bool osd, bool only_osd,
BLOCK_LIST* blocks, Tesseract* osd_tess, OSResults* osr,
TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix, Pix** music_mask_pix) {
int vertical_x = 0;
int vertical_y = 1;
TabVector_LIST v_lines;
TabVector_LIST h_lines;
ICOORD bleft(0, 0);
ASSERT_HOST(pix_binary_ != NULL);
if (tessedit_dump_pageseg_images) {
pixWrite("tessinput.png", pix_binary_, IFF_PNG);
}
// Leptonica is used to find the rule/separator lines in the input.
LineFinder::FindAndRemoveLines(source_resolution_,
textord_tabfind_show_vlines, pix_binary_,
&vertical_x, &vertical_y, music_mask_pix,
&v_lines, &h_lines);
if (tessedit_dump_pageseg_images)
pixWrite("tessnolines.png", pix_binary_, IFF_PNG);
// Leptonica is used to find a mask of the photo regions in the input.
*photo_mask_pix = ImageFind::FindImages(pix_binary_);
if (tessedit_dump_pageseg_images)
pixWrite("tessnoimages.png", pix_binary_, IFF_PNG);
if (single_column)
v_lines.clear();
// The rest of the algorithm uses the usual connected components.
textord_.find_components(pix_binary_, blocks, to_blocks);
TO_BLOCK_IT to_block_it(to_blocks);
// There must be exactly one input block.
// TODO(rays) handle new textline finding with a UNLV zone file.
ASSERT_HOST(to_blocks->singleton());
TO_BLOCK* to_block = to_block_it.data();
TBOX blkbox = to_block->block->bounding_box();
ColumnFinder* finder = NULL;
if (to_block->line_size >= 2) {
finder = new ColumnFinder(static_cast<int>(to_block->line_size),
blkbox.botleft(), blkbox.topright(),
source_resolution_,
&v_lines, &h_lines, vertical_x, vertical_y);
finder->SetupAndFilterNoise(*photo_mask_pix, to_block);
if (equ_detect_) {
equ_detect_->LabelSpecialText(to_block);
}
BLOBNBOX_CLIST osd_blobs;
// osd_orientation is the number of 90 degree rotations to make the
// characters upright. (See osdetect.h for precise definition.)
// We want the text lines horizontal, (vertical text indicates vertical
// textlines) which may conflict (eg vertically written CJK).
int osd_orientation = 0;
bool vertical_text = finder->IsVerticallyAlignedText(to_block, &osd_blobs);
if (osd && osd_tess != NULL && osr != NULL) {
os_detect_blobs(&osd_blobs, osr, osd_tess);
if (only_osd) {
delete finder;
return NULL;
}
osd_orientation = osr->best_result.orientation_id;
double osd_score = osr->orientations[osd_orientation];
double osd_margin = min_orientation_margin * 2;
for (int i = 0; i < 4; ++i) {
if (i != osd_orientation &&
osd_score - osr->orientations[i] < osd_margin) {
osd_margin = osd_score - osr->orientations[i];
}
}
if (osd_margin < min_orientation_margin) {
// The margin is weak.
int best_script_id = osr->best_result.script_id;
bool cjk = (best_script_id == osd_tess->unicharset.han_sid()) ||
(best_script_id == osd_tess->unicharset.hiragana_sid()) ||
(best_script_id == osd_tess->unicharset.katakana_sid());
if (!cjk && !vertical_text && osd_orientation == 2) {
// upside down latin text is improbable with such a weak margin.
tprintf("OSD: Weak margin (%.2f), horiz textlines, not CJK: "
"Don't rotate.\n", osd_margin);
osd_orientation = 0;
} else {
tprintf("OSD: Weak margin (%.2f) for %d blob text block, "
//.........這裏部分代碼省略.........
示例9: res_it
// Returns the mean confidence of the current object at the given level.
// The number should be interpreted as a percent probability. (0.0f-100.0f)
float LTRResultIterator::Confidence(PageIteratorLevel level) const {
if (it_->word() == NULL) return 0.0f; // Already at the end!
float mean_certainty = 0.0f;
int certainty_count = 0;
PAGE_RES_IT res_it(*it_);
WERD_CHOICE* best_choice = res_it.word()->best_choice;
ASSERT_HOST(best_choice != NULL);
switch (level) {
case RIL_BLOCK:
do {
best_choice = res_it.word()->best_choice;
ASSERT_HOST(best_choice != NULL);
mean_certainty += best_choice->certainty();
++certainty_count;
res_it.forward();
} while (res_it.block() == res_it.prev_block());
break;
case RIL_PARA:
do {
best_choice = res_it.word()->best_choice;
ASSERT_HOST(best_choice != NULL);
mean_certainty += best_choice->certainty();
++certainty_count;
res_it.forward();
} while (res_it.block() == res_it.prev_block() &&
res_it.row()->row->para() == res_it.prev_row()->row->para());
break;
case RIL_TEXTLINE:
do {
best_choice = res_it.word()->best_choice;
ASSERT_HOST(best_choice != NULL);
mean_certainty += best_choice->certainty();
++certainty_count;
res_it.forward();
} while (res_it.row() == res_it.prev_row());
break;
case RIL_WORD:
mean_certainty += best_choice->certainty();
++certainty_count;
break;
case RIL_SYMBOL:
BLOB_CHOICE_LIST_CLIST* choices = best_choice->blob_choices();
if (choices != NULL) {
BLOB_CHOICE_LIST_C_IT blob_choices_it(choices);
for (int blob = 0; blob < blob_index_; ++blob)
blob_choices_it.forward();
BLOB_CHOICE_IT choice_it(blob_choices_it.data());
for (choice_it.mark_cycle_pt();
!choice_it.cycled_list();
choice_it.forward()) {
if (choice_it.data()->unichar_id() ==
best_choice->unichar_id(blob_index_))
break;
}
mean_certainty += choice_it.data()->certainty();
} else {
mean_certainty += best_choice->certainty();
}
++certainty_count;
}
if (certainty_count > 0) {
mean_certainty /= certainty_count;
float confidence = 100 + 5 * mean_certainty;
if (confidence < 0.0f) confidence = 0.0f;
if (confidence > 100.0f) confidence = 100.0f;
return confidence;
}
return 0.0f;
}
示例10: while
/**
* Split input into space-separated tokens, strip trailing punctuation
* from each, determine case properties, call UTF-8 flavor of cost
* function on each word, and aggregate all into single mean word
* cost.
*/
int WordUnigrams::Cost(const char_32 *key_str32,
LangModel *lang_mod,
CharSet *char_set) const {
if (!key_str32)
return 0;
// convert string to UTF8 to split into space-separated words
string key_str;
CubeUtils::UTF32ToUTF8(key_str32, &key_str);
vector<string> words;
CubeUtils::SplitStringUsing(key_str, " \t", &words);
// no words => no cost
if (words.size() <= 0) {
return 0;
}
// aggregate the costs of all the words
int cost = 0;
for (int word_idx = 0; word_idx < words.size(); word_idx++) {
// convert each word back to UTF32 for analyzing case and punctuation
string_32 str32;
CubeUtils::UTF8ToUTF32(words[word_idx].c_str(), &str32);
int len = CubeUtils::StrLen(str32.c_str());
// strip all trailing punctuation
string clean_str;
int clean_len = len;
bool trunc = false;
while (clean_len > 0 &&
lang_mod->IsTrailingPunc(str32.c_str()[clean_len - 1])) {
--clean_len;
trunc = true;
}
// If either the original string was not truncated (no trailing
// punctuation) or the entire string was removed (all characters
// are trailing punctuation), evaluate original word as is;
// otherwise, copy all but the trailing punctuation characters
char_32 *clean_str32 = NULL;
if (clean_len == 0 || !trunc) {
clean_str32 = CubeUtils::StrDup(str32.c_str());
} else {
clean_str32 = new char_32[clean_len + 1];
for (int i = 0; i < clean_len; ++i) {
clean_str32[i] = str32[i];
}
clean_str32[clean_len] = '\0';
}
ASSERT_HOST(clean_str32 != NULL);
string str8;
CubeUtils::UTF32ToUTF8(clean_str32, &str8);
int word_cost = CostInternal(str8.c_str());
// if case invariant, get costs of all-upper-case and all-lower-case
// versions and return the min cost
if (clean_len >= kMinLengthNumOrCaseInvariant &&
CubeUtils::IsCaseInvariant(clean_str32, char_set)) {
char_32 *lower_32 = CubeUtils::ToLower(clean_str32, char_set);
if (lower_32) {
string lower_8;
CubeUtils::UTF32ToUTF8(lower_32, &lower_8);
word_cost = MIN(word_cost, CostInternal(lower_8.c_str()));
delete [] lower_32;
}
char_32 *upper_32 = CubeUtils::ToUpper(clean_str32, char_set);
if (upper_32) {
string upper_8;
CubeUtils::UTF32ToUTF8(upper_32, &upper_8);
word_cost = MIN(word_cost, CostInternal(upper_8.c_str()));
delete [] upper_32;
}
}
if (clean_len >= kMinLengthNumOrCaseInvariant) {
// if characters are all numeric, incur 0 word cost
bool is_numeric = true;
for (int i = 0; i < clean_len; ++i) {
if (!lang_mod->IsDigit(clean_str32[i]))
is_numeric = false;
}
if (is_numeric)
word_cost = 0;
}
delete [] clean_str32;
cost += word_cost;
} // word_idx
// return the mean cost
return static_cast<int>(cost / static_cast<double>(words.size()));
}
示例11: ASSERT_HOST
/**
* Segment the page according to the current value of tessedit_pageseg_mode.
* pix_binary_ is used as the source image and should not be NULL.
* On return the blocks list owns all the constructed page layout.
*/
int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
Tesseract* osd_tess, OSResults* osr) {
ASSERT_HOST(pix_binary_ != NULL);
int width = pixGetWidth(pix_binary_);
int height = pixGetHeight(pix_binary_);
// Get page segmentation mode.
PageSegMode pageseg_mode = static_cast<PageSegMode>(
static_cast<int>(tessedit_pageseg_mode));
// If a UNLV zone file can be found, use that instead of segmentation.
if (!PSM_COL_FIND_ENABLED(pageseg_mode) &&
input_file != NULL && input_file->length() > 0) {
STRING name = *input_file;
const char* lastdot = strrchr(name.string(), '.');
if (lastdot != NULL)
name[lastdot - name.string()] = '\0';
read_unlv_file(name, width, height, blocks);
}
if (blocks->empty()) {
// No UNLV file present. Work according to the PageSegMode.
// First make a single block covering the whole image.
BLOCK_IT block_it(blocks);
BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height);
block->set_right_to_left(right_to_left());
block_it.add_to_end(block);
} else {
// UNLV file present. Use PSM_SINGLE_BLOCK.
pageseg_mode = PSM_SINGLE_BLOCK;
}
int auto_page_seg_ret_val = 0;
TO_BLOCK_LIST to_blocks;
if (PSM_OSD_ENABLED(pageseg_mode) || PSM_BLOCK_FIND_ENABLED(pageseg_mode) ||
PSM_SPARSE(pageseg_mode)) {
auto_page_seg_ret_val =
AutoPageSeg(pageseg_mode, blocks, &to_blocks, osd_tess, osr);
if (pageseg_mode == PSM_OSD_ONLY)
return auto_page_seg_ret_val;
// To create blobs from the image region bounds uncomment this line:
// to_blocks.clear(); // Uncomment to go back to the old mode.
} else {
deskew_ = FCOORD(1.0f, 0.0f);
reskew_ = FCOORD(1.0f, 0.0f);
if (pageseg_mode == PSM_CIRCLE_WORD) {
Pix* pixcleaned = RemoveEnclosingCircle(pix_binary_);
if (pixcleaned != NULL) {
pixDestroy(&pix_binary_);
pix_binary_ = pixcleaned;
}
}
}
if (auto_page_seg_ret_val < 0) {
return -1;
}
if (blocks->empty()) {
if (textord_debug_tabfind)
tprintf("Empty page\n");
return 0; // AutoPageSeg found an empty page.
}
textord_.TextordPage(pageseg_mode, width, height, pix_binary_,
blocks, &to_blocks);
return auto_page_seg_ret_val;
}
示例12: STRING
//.........這裏部分代碼省略.........
}
stats_.tilde_crunch_written = true;
stats_.last_char_was_newline = false;
stats_.write_results_empty_block = false;
}
if ((word->word->flag (W_EOL) && !stats_.last_char_was_newline) || force_eol) {
/* Add a new line output */
txt_chs[txt_index] = '\n';
map_chs[txt_index++] = '\n';
//end line
ep_chars[ep_chars_index++] = newline_type;
//Cos of the real newline
stats_.tilde_crunch_written = false;
stats_.last_char_was_newline = true;
stats_.last_char_was_tilde = false;
}
txt_chs[txt_index] = '\0';
map_chs[txt_index] = '\0';
ep_chars[ep_chars_index] = '\0'; // terminate string
word->ep_choice = new WERD_CHOICE(ep_chars, uchset);
if (force_eol)
stats_.write_results_empty_block = true;
return;
}
/* NORMAL PROCESSING of non tilde crunched words */
stats_.tilde_crunch_written = false;
if (newline_type)
stats_.last_char_was_newline = true;
else
stats_.last_char_was_newline = false;
stats_.write_results_empty_block = force_eol; // about to write a real word
if (unlv_tilde_crunching &&
stats_.last_char_was_tilde &&
(word->word->space() == 0) &&
!(word->word->flag(W_REP_CHAR) && tessedit_write_rep_codes) &&
(word->best_choice->unichar_id(0) == space)) {
/* Prevent adjacent tilde across words - we know that adjacent tildes within
words have been removed */
word->best_choice->remove_unichar_id(0);
if (word->best_choice->blob_choices() != NULL) {
BLOB_CHOICE_LIST_C_IT blob_choices_it(word->best_choice->blob_choices());
if (!blob_choices_it.empty()) delete blob_choices_it.extract();
}
word->reject_map.remove_pos (0);
word->box_word->DeleteBox(0);
}
if (newline_type ||
(word->word->flag (W_REP_CHAR) && tessedit_write_rep_codes))
stats_.last_char_was_tilde = false;
else {
if (word->reject_map.length () > 0) {
if (word->best_choice->unichar_id(word->reject_map.length() - 1) == space)
stats_.last_char_was_tilde = true;
else
stats_.last_char_was_tilde = false;
}
else if (word->word->space () > 0)
stats_.last_char_was_tilde = false;
/* else it is unchanged as there are no output chars */
}
ASSERT_HOST (word->best_choice->length() == word->reject_map.length());
set_unlv_suspects(word);
check_debug_pt (word, 120);
if (tessedit_rejection_debug) {
tprintf ("Dict word: \"%s\": %d\n",
word->best_choice->debug_string().string(),
dict_word(*(word->best_choice)));
}
if (word->word->flag (W_REP_CHAR) && tessedit_write_rep_codes) {
repetition_code = "|^~R";
wordstr_lengths = "\001\001\001\001";
repetition_code += uchset.id_to_unichar(get_rep_char(word));
wordstr_lengths += strlen(uchset.id_to_unichar(get_rep_char(word)));
wordstr = &repetition_code;
} else {
if (tessedit_zero_rejection) {
/* OVERRIDE ALL REJECTION MECHANISMS - ONLY REJECT TESS FAILURES */
for (i = 0; i < word->best_choice->length(); ++i) {
if (word->reject_map[i].rejected())
word->reject_map[i].setrej_minimal_rej_accept();
}
}
if (tessedit_minimal_rejection) {
/* OVERRIDE ALL REJECTION MECHANISMS - ONLY REJECT TESS FAILURES */
for (i = 0; i < word->best_choice->length(); ++i) {
if ((word->best_choice->unichar_id(i) != space) &&
word->reject_map[i].rejected())
word->reject_map[i].setrej_minimal_rej_accept();
}
}
}
}
示例13: pixGetWidth
// Segment the page according to the current value of tessedit_pageseg_mode.
// If the pix_binary_ member is not NULL, it is used as the source image,
// and copied to image, otherwise it just uses image as the input.
// On return the blocks list owns all the constructed page layout.
int Tesseract::SegmentPage(const STRING* input_file,
IMAGE* image, BLOCK_LIST* blocks) {
int width = image->get_xsize();
int height = image->get_ysize();
int resolution = image->get_res();
#ifdef HAVE_LIBLEPT
if (pix_binary_ != NULL) {
width = pixGetWidth(pix_binary_);
height = pixGetHeight(pix_binary_);
resolution = pixGetXRes(pix_binary_);
}
#endif
// Zero resolution messes up the algorithms, so make sure it is credible.
if (resolution < kMinCredibleResolution)
resolution = kDefaultResolution;
// Get page segmentation mode.
PageSegMode pageseg_mode = static_cast<PageSegMode>(
static_cast<int>(tessedit_pageseg_mode));
// If a UNLV zone file can be found, use that instead of segmentation.
if (pageseg_mode != tesseract::PSM_AUTO &&
input_file != NULL && input_file->length() > 0) {
STRING name = *input_file;
const char* lastdot = strrchr(name.string(), '.');
if (lastdot != NULL)
name[lastdot - name.string()] = '\0';
read_unlv_file(name, width, height, blocks);
}
bool single_column = pageseg_mode > PSM_AUTO;
if (blocks->empty()) {
// No UNLV file present. Work according to the PageSegMode.
// First make a single block covering the whole image.
BLOCK_IT block_it(blocks);
BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height);
block_it.add_to_end(block);
} else {
// UNLV file present. Use PSM_SINGLE_COLUMN.
pageseg_mode = PSM_SINGLE_COLUMN;
}
TO_BLOCK_LIST land_blocks, port_blocks;
TBOX page_box;
if (pageseg_mode <= PSM_SINGLE_COLUMN) {
if (AutoPageSeg(width, height, resolution, single_column,
image, blocks, &port_blocks) < 0) {
return -1;
}
// To create blobs from the image region bounds uncomment this line:
// port_blocks.clear(); // Uncomment to go back to the old mode.
} else {
#if HAVE_LIBLEPT
image->FromPix(pix_binary_);
#endif
deskew_ = FCOORD(1.0f, 0.0f);
reskew_ = FCOORD(1.0f, 0.0f);
}
if (blocks->empty()) {
tprintf("Empty page\n");
return 0; // AutoPageSeg found an empty page.
}
if (port_blocks.empty()) {
// AutoPageSeg was not used, so we need to find_components first.
find_components(blocks, &land_blocks, &port_blocks, &page_box);
} else {
// AutoPageSeg does not need to find_components as it did that already.
page_box.set_left(0);
page_box.set_bottom(0);
page_box.set_right(width);
page_box.set_top(height);
// Filter_blobs sets up the TO_BLOCKs the same as find_components does.
filter_blobs(page_box.topright(), &port_blocks, true);
}
TO_BLOCK_IT to_block_it(&port_blocks);
ASSERT_HOST(!port_blocks.empty());
TO_BLOCK* to_block = to_block_it.data();
if (pageseg_mode <= PSM_SINGLE_BLOCK ||
to_block->line_size < 2) {
// For now, AUTO, SINGLE_COLUMN and SINGLE_BLOCK all map to the old
// textord. The difference is the number of blocks and how the are made.
textord_page(page_box.topright(), blocks, &land_blocks, &port_blocks,
this);
} else {
// SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
float gradient = make_single_row(page_box.topright(),
to_block, &port_blocks, this);
if (pageseg_mode == PSM_SINGLE_LINE) {
// SINGLE_LINE uses the old word maker on the single line.
make_words(page_box.topright(), gradient, blocks,
&land_blocks, &port_blocks, this);
} else {
// SINGLE_WORD and SINGLE_CHAR cram all the blobs into a
// single word, and in SINGLE_CHAR mode, all the outlines
// go in a single blob.
make_single_word(pageseg_mode == PSM_SINGLE_CHAR,
to_block->get_rows(), to_block->block->row_list());
//.........這裏部分代碼省略.........
示例14: bleft
// Auto page segmentation. Divide the page image into blocks of uniform
// text linespacing and images.
// Width, height and resolution are derived from the input image.
// If the pix is non-NULL, then it is assumed to be the input, and it is
// copied to the image, otherwise the image is used directly.
// The output goes in the blocks list with corresponding TO_BLOCKs in the
// to_blocks list.
// If single_column is true, then no attempt is made to divide the image
// into columns, but multiple blocks are still made if the text is of
// non-uniform linespacing.
int Tesseract::AutoPageSeg(int width, int height, int resolution,
bool single_column, IMAGE* image,
BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) {
int vertical_x = 0;
int vertical_y = 1;
TabVector_LIST v_lines;
TabVector_LIST h_lines;
ICOORD bleft(0, 0);
Boxa* boxa = NULL;
Pixa* pixa = NULL;
// The blocks made by the ColumnFinder. Moved to blocks before return.
BLOCK_LIST found_blocks;
#ifdef HAVE_LIBLEPT
if (pix_binary_ != NULL) {
if (textord_debug_images) {
Pix* grey_pix = pixCreate(width, height, 8);
// Printable images are light grey on white, but for screen display
// they are black on dark grey so the other colors show up well.
if (textord_debug_printable) {
pixSetAll(grey_pix);
pixSetMasked(grey_pix, pix_binary_, 192);
} else {
pixSetAllArbitrary(grey_pix, 64);
pixSetMasked(grey_pix, pix_binary_, 0);
}
AlignedBlob::IncrementDebugPix();
pixWrite(AlignedBlob::textord_debug_pix().string(), grey_pix, IFF_PNG);
pixDestroy(&grey_pix);
}
if (tessedit_dump_pageseg_images)
pixWrite("tessinput.png", pix_binary_, IFF_PNG);
// Leptonica is used to find the lines and image regions in the input.
LineFinder::FindVerticalLines(resolution, pix_binary_,
&vertical_x, &vertical_y, &v_lines);
LineFinder::FindHorizontalLines(resolution, pix_binary_, &h_lines);
if (tessedit_dump_pageseg_images)
pixWrite("tessnolines.png", pix_binary_, IFF_PNG);
ImageFinder::FindImages(pix_binary_, &boxa, &pixa);
if (tessedit_dump_pageseg_images)
pixWrite("tessnoimages.png", pix_binary_, IFF_PNG);
// Copy the Pix to the IMAGE.
image->FromPix(pix_binary_);
if (single_column)
v_lines.clear();
}
#endif
TO_BLOCK_LIST land_blocks, port_blocks;
TBOX page_box;
// The rest of the algorithm uses the usual connected components.
find_components(blocks, &land_blocks, &port_blocks, &page_box);
TO_BLOCK_IT to_block_it(&port_blocks);
ASSERT_HOST(!to_block_it.empty());
for (to_block_it.mark_cycle_pt(); !to_block_it.cycled_list();
to_block_it.forward()) {
TO_BLOCK* to_block = to_block_it.data();
TBOX blkbox = to_block->block->bounding_box();
if (to_block->line_size >= 2) {
// Note: if there are multiple blocks, then v_lines, boxa, and pixa
// are empty on the next iteration, but in this case, we assume
// that there aren't any interesting line separators or images, since
// it means that we have a pre-defined unlv zone file.
ColumnFinder finder(static_cast<int>(to_block->line_size),
blkbox.botleft(), blkbox.topright(),
&v_lines, &h_lines, vertical_x, vertical_y);
if (finder.FindBlocks(height, resolution, single_column,
to_block, boxa, pixa, &found_blocks, to_blocks) < 0)
return -1;
finder.ComputeDeskewVectors(&deskew_, &reskew_);
boxa = NULL;
pixa = NULL;
}
}
#ifdef HAVE_LIBLEPT
boxaDestroy(&boxa);
pixaDestroy(&pixa);
#endif
blocks->clear();
BLOCK_IT block_it(blocks);
// Move the found blocks to the input/output blocks.
block_it.add_list_after(&found_blocks);
if (textord_debug_images) {
// The debug image is no longer needed so delete it.
unlink(AlignedBlob::textord_debug_pix().string());
}
return 0;
}
示例15: find_components
// Make the textlines and words inside each block.
void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew,
int width, int height, Pix *binary_pix,
Pix *thresholds_pix, Pix *grey_pix,
bool use_box_bottoms,
BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks) {
page_tr_.set_x(width);
page_tr_.set_y(height);
if (to_blocks->empty()) {
// AutoPageSeg was not used, so we need to find_components first.
find_components(binary_pix, blocks, to_blocks);
TO_BLOCK_IT it(to_blocks);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
TO_BLOCK *to_block = it.data();
// Compute the edge offsets whether or not there is a grey_pix.
// We have by-passed auto page seg, so we have to run it here.
// By page segmentation mode there is no non-text to avoid running on.
to_block->ComputeEdgeOffsets(thresholds_pix, grey_pix);
}
} else if (!PSM_SPARSE(pageseg_mode)) {
// AutoPageSeg does not need to find_components as it did that already.
// Filter_blobs sets up the TO_BLOCKs the same as find_components does.
filter_blobs(page_tr_, to_blocks, true);
}
ASSERT_HOST(!to_blocks->empty());
if (pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT) {
const FCOORD anticlockwise90(0.0f, 1.0f);
const FCOORD clockwise90(0.0f, -1.0f);
TO_BLOCK_IT it(to_blocks);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
TO_BLOCK *to_block = it.data();
BLOCK *block = to_block->block;
// Create a fake poly_block in block from its bounding box.
block->set_poly_block(new POLY_BLOCK(block->bounding_box(),
PT_VERTICAL_TEXT));
// Rotate the to_block along with its contained block and blobnbox lists.
to_block->rotate(anticlockwise90);
// Set the block's rotation values to obey the convention followed in
// layout analysis for vertical text.
block->set_re_rotation(clockwise90);
block->set_classify_rotation(clockwise90);
}
}
TO_BLOCK_IT to_block_it(to_blocks);
TO_BLOCK *to_block = to_block_it.data();
// Make the rows in the block.
float gradient = 0;
// Do it the old fashioned way.
if (PSM_LINE_FIND_ENABLED(pageseg_mode)) {
gradient = make_rows(page_tr_, to_blocks);
} else if (!PSM_SPARSE(pageseg_mode)) {
// RAW_LINE, SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE,
to_block, to_blocks);
}
BaselineDetect baseline_detector(textord_baseline_debug,
reskew, to_blocks);
baseline_detector.ComputeStraightBaselines(use_box_bottoms);
baseline_detector.ComputeBaselineSplinesAndXheights(page_tr_, true,
textord_heavy_nr,
textord_show_final_rows,
this);
// Now make the words in the lines.
if (PSM_WORD_FIND_ENABLED(pageseg_mode)) {
// SINGLE_LINE uses the old word maker on the single line.
make_words(this, page_tr_, gradient, blocks, to_blocks);
} else {
// SINGLE_WORD and SINGLE_CHAR cram all the blobs into a
// single word, and in SINGLE_CHAR mode, all the outlines
// go in a single blob.
TO_BLOCK *to_block = to_block_it.data();
make_single_word(pageseg_mode == PSM_SINGLE_CHAR,
to_block->get_rows(), to_block->block->row_list());
}
cleanup_blocks(PSM_WORD_FIND_ENABLED(pageseg_mode), blocks);
// Remove empties.
// Compute the margins for each row in the block, to be used later for
// paragraph detection.
BLOCK_IT b_it(blocks);
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
b_it.data()->compute_row_margins();
}
#ifndef GRAPHICS_DISABLED
close_to_win();
#endif
}