本文整理汇总了C++中STRING::string方法的典型用法代码示例。如果您正苦于以下问题:C++ STRING::string方法的具体用法?C++ STRING::string怎么用?C++ STRING::string使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类STRING
的用法示例。
在下文中一共展示了STRING::string方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: word_display
//.........这里部分代码省略.........
}
image_win->Pen(color);
TBOX box = box_word->BlobBox(i);
image_win->Rectangle(box.left(), box.bottom(), box.right(), box.top());
}
return true;
}
/*
Note the double coercions of(COLOUR)((inT32)editor_image_word_bb_color)
etc. are to keep the compiler happy.
*/
// display bounding box
if (word->display_flag(DF_BOX)) {
word->bounding_box().plot(image_win,
(ScrollView::Color)((inT32)
editor_image_word_bb_color),
(ScrollView::Color)((inT32)
editor_image_word_bb_color));
ScrollView::Color c = (ScrollView::Color)
((inT32) editor_image_blob_bb_color);
image_win->Pen(c);
c_it.set_to_list(word->cblob_list());
for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward())
c_it.data()->bounding_box().plot(image_win);
displayed_something = TRUE;
}
// display edge steps
if (word->display_flag(DF_EDGE_STEP)) { // edgesteps available
word->plot(image_win); // rainbow colors
displayed_something = TRUE;
}
// display poly approx
if (word->display_flag(DF_POLYGONAL)) {
// need to convert
TWERD* tword = TWERD::PolygonalCopy(poly_allow_detailed_fx, word);
tword->plot(image_win);
delete tword;
displayed_something = TRUE;
}
// Display correct text and blamer information.
STRING text;
STRING blame;
if (word->display_flag(DF_TEXT) && word->text() != NULL) {
text = word->text();
}
if (word->display_flag(DF_BLAMER) &&
!(word_res->blamer_bundle != NULL &&
word_res->blamer_bundle->incorrect_result_reason() == IRR_CORRECT)) {
text = "";
const BlamerBundle *blamer_bundle = word_res->blamer_bundle;
if (blamer_bundle == NULL) {
text += "NULL";
} else {
text = blamer_bundle->TruthString();
}
text += " -> ";
STRING best_choice_str;
if (word_res->best_choice == NULL) {
best_choice_str = "NULL";
} else {
word_res->best_choice->string_and_lengths(&best_choice_str, NULL);
}
text += best_choice_str;
IncorrectResultReason reason = (blamer_bundle == NULL) ?
IRR_PAGE_LAYOUT : blamer_bundle->incorrect_result_reason();
ASSERT_HOST(reason < IRR_NUM_REASONS)
blame += " [";
blame += BlamerBundle::IncorrectReasonName(reason);
blame += "]";
}
if (text.length() > 0) {
word_bb = word->bounding_box();
image_win->Pen(ScrollView::RED);
word_height = word_bb.height();
int text_height = 0.50 * word_height;
if (text_height > 20) text_height = 20;
image_win->TextAttributes("Arial", text_height, false, false, false);
shift = (word_height < word_bb.width()) ? 0.25 * word_height : 0.0f;
image_win->Text(word_bb.left() + shift,
word_bb.bottom() + 0.25 * word_height, text.string());
if (blame.length() > 0) {
image_win->Text(word_bb.left() + shift,
word_bb.bottom() + 0.25 * word_height - text_height,
blame.string());
}
displayed_something = TRUE;
}
if (!displayed_something) // display BBox anyway
word->bounding_box().plot(image_win,
(ScrollView::Color)((inT32) editor_image_word_bb_color),
(ScrollView::Color)((inT32)
editor_image_word_bb_color));
return TRUE;
}
示例2: SegmentPage
/**
* Segment the page according to the current value of tessedit_pageseg_mode.
* pix_binary_ is used as the source image and should not be NULL.
* On return the blocks list owns all the constructed page layout.
*/
int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
Tesseract* osd_tess, OSResults* osr) {
ASSERT_HOST(pix_binary_ != NULL);
int width = pixGetWidth(pix_binary_);
int height = pixGetHeight(pix_binary_);
// Get page segmentation mode.
PageSegMode pageseg_mode = static_cast<PageSegMode>(
static_cast<int>(tessedit_pageseg_mode));
// If a UNLV zone file can be found, use that instead of segmentation.
if (!PSM_COL_FIND_ENABLED(pageseg_mode) &&
input_file != NULL && input_file->length() > 0) {
STRING name = *input_file;
const char* lastdot = strrchr(name.string(), '.');
if (lastdot != NULL)
name[lastdot - name.string()] = '\0';
read_unlv_file(name, width, height, blocks);
}
if (blocks->empty()) {
// No UNLV file present. Work according to the PageSegMode.
// First make a single block covering the whole image.
BLOCK_IT block_it(blocks);
BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height);
block->set_right_to_left(right_to_left());
block_it.add_to_end(block);
} else {
// UNLV file present. Use PSM_SINGLE_BLOCK.
pageseg_mode = PSM_SINGLE_BLOCK;
}
int auto_page_seg_ret_val = 0;
TO_BLOCK_LIST to_blocks;
if (PSM_OSD_ENABLED(pageseg_mode) || PSM_BLOCK_FIND_ENABLED(pageseg_mode) ||
PSM_SPARSE(pageseg_mode)) {
auto_page_seg_ret_val =
AutoPageSeg(pageseg_mode, blocks, &to_blocks, osd_tess, osr);
if (pageseg_mode == PSM_OSD_ONLY)
return auto_page_seg_ret_val;
// To create blobs from the image region bounds uncomment this line:
// to_blocks.clear(); // Uncomment to go back to the old mode.
} else {
deskew_ = FCOORD(1.0f, 0.0f);
reskew_ = FCOORD(1.0f, 0.0f);
if (pageseg_mode == PSM_CIRCLE_WORD) {
Pix* pixcleaned = RemoveEnclosingCircle(pix_binary_);
if (pixcleaned != NULL) {
pixDestroy(&pix_binary_);
pix_binary_ = pixcleaned;
}
}
}
if (auto_page_seg_ret_val < 0) {
return -1;
}
if (blocks->empty()) {
if (textord_debug_tabfind)
tprintf("Empty page\n");
return 0; // AutoPageSeg found an empty page.
}
bool splitting =
pageseg_devanagari_split_strategy != ShiroRekhaSplitter::NO_SPLIT;
bool cjk_mode = textord_use_cjk_fp_model;
textord_.TextordPage(pageseg_mode, reskew_, width, height, pix_binary_,
pix_thresholds_, pix_grey_, splitting || cjk_mode,
blocks, &to_blocks);
return auto_page_seg_ret_val;
}
示例3:
static void CallWithUTF8(TessCallback1<const char *> *cb,
const WERD_CHOICE *wc) {
STRING s;
wc->string_and_lengths(&s, nullptr);
cb->Run(s.string());
}
示例4: main
int main(int argc, char **argv) {
#ifdef USING_GETTEXT
setlocale (LC_ALL, "");
bindtextdomain (PACKAGE, LOCALEDIR);
textdomain (PACKAGE);
#endif
if ((argc == 2 && strcmp(argv[1], "-v") == 0) ||
(argc == 2 && strcmp(argv[1], "--version") == 0)) {
char *versionStrP;
fprintf(stderr, "tesseract %s\n", tesseract::TessBaseAPI::Version());
versionStrP = getLeptonicaVersion();
fprintf(stderr, " %s\n", versionStrP);
lept_free(versionStrP);
versionStrP = getImagelibVersions();
fprintf(stderr, " %s\n", versionStrP);
lept_free(versionStrP);
exit(0);
}
// Make the order of args a bit more forgiving than it used to be.
const char* lang = "eng";
const char* image = NULL;
const char* output = NULL;
bool noocr = false;
bool list_langs = false;
bool print_parameters = false;
tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO;
int arg = 1;
while (arg < argc && (output == NULL || argv[arg][0] == '-')) {
if (strcmp(argv[arg], "-l") == 0 && arg + 1 < argc) {
lang = argv[arg + 1];
++arg;
} else if (strcmp(argv[arg], "-psm") == 0 && arg + 1 < argc) {
pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[arg + 1]));
++arg;
} else if (strcmp(argv[arg], "--print-parameters") == 0) {
noocr = true;
print_parameters = true;
} else if (strcmp(argv[arg], "-c") == 0 && arg + 1 < argc) {
// handled properly after api init
++arg;
} else if (image == NULL) {
image = argv[arg];
} else if (output == NULL) {
output = argv[arg];
}
++arg;
}
if (argc == 2 && strcmp(argv[1], "--list-langs") == 0) {
list_langs = true;
noocr = true;
}
if (output == NULL && noocr == false) {
fprintf(stderr, _("Usage:%s imagename outputbase|stdout [-l lang] "
"[-psm pagesegmode] [-c configvar=value] "
"[configfile...]\n\n"), argv[0]);
fprintf(stderr,
_("pagesegmode values are:\n"
"0 = Orientation and script detection (OSD) only.\n"
"1 = Automatic page segmentation with OSD.\n"
"2 = Automatic page segmentation, but no OSD, or OCR\n"
"3 = Fully automatic page segmentation, but no OSD. (Default)\n"
"4 = Assume a single column of text of variable sizes.\n"
"5 = Assume a single uniform block of vertically aligned text.\n"
"6 = Assume a single uniform block of text.\n"
"7 = Treat the image as a single text line.\n"
"8 = Treat the image as a single word.\n"
"9 = Treat the image as a single word in a circle.\n"
"10 = Treat the image as a single character.\n"));
fprintf(stderr, _("multiple -c arguments are allowed.\n"));
fprintf(stderr, _("-l lang, -psm pagesegmode and any -c options must occur"
"before any configfile.\n\n"));
fprintf(stderr, _("Single options:\n"));
fprintf(stderr, _(" -v --version: version info\n"));
fprintf(stderr, _(" --list-langs: list available languages for tesseract "
"engine\n"));
fprintf(stderr, _(" --print-parameters: print tesseract parameters to the "
"stdout\n"));
exit(1);
}
tesseract::TessBaseAPI api;
STRING tessdata_dir;
truncate_path(argv[0], &tessdata_dir);
api.SetOutputName(output);
int rc = api.Init(tessdata_dir.string(), lang, tesseract::OEM_DEFAULT,
&(argv[arg]), argc - arg, NULL, NULL, false);
if (rc) {
fprintf(stderr, _("Could not initialize tesseract.\n"));
exit(1);
}
//.........这里部分代码省略.........
示例5: SegmentPage
// Segment the page according to the current value of tessedit_pageseg_mode.
// If the pix_binary_ member is not NULL, it is used as the source image,
// and copied to image, otherwise it just uses image as the input.
// On return the blocks list owns all the constructed page layout.
int Tesseract::SegmentPage(const STRING* input_file,
IMAGE* image, BLOCK_LIST* blocks) {
int width = image->get_xsize();
int height = image->get_ysize();
int resolution = image->get_res();
#ifdef HAVE_LIBLEPT
if (pix_binary_ != NULL) {
width = pixGetWidth(pix_binary_);
height = pixGetHeight(pix_binary_);
resolution = pixGetXRes(pix_binary_);
}
#endif
// Zero resolution messes up the algorithms, so make sure it is credible.
if (resolution < kMinCredibleResolution)
resolution = kDefaultResolution;
// Get page segmentation mode.
PageSegMode pageseg_mode = static_cast<PageSegMode>(
static_cast<int>(tessedit_pageseg_mode));
// If a UNLV zone file can be found, use that instead of segmentation.
if (pageseg_mode != tesseract::PSM_AUTO &&
input_file != NULL && input_file->length() > 0) {
STRING name = *input_file;
const char* lastdot = strrchr(name.string(), '.');
if (lastdot != NULL)
name[lastdot - name.string()] = '\0';
read_unlv_file(name, width, height, blocks);
}
bool single_column = pageseg_mode > PSM_AUTO;
if (blocks->empty()) {
// No UNLV file present. Work according to the PageSegMode.
// First make a single block covering the whole image.
BLOCK_IT block_it(blocks);
BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height);
block_it.add_to_end(block);
} else {
// UNLV file present. Use PSM_SINGLE_COLUMN.
pageseg_mode = PSM_SINGLE_COLUMN;
}
TO_BLOCK_LIST land_blocks, port_blocks;
TBOX page_box;
if (pageseg_mode <= PSM_SINGLE_COLUMN) {
if (AutoPageSeg(width, height, resolution, single_column,
image, blocks, &port_blocks) < 0) {
return -1;
}
// To create blobs from the image region bounds uncomment this line:
// port_blocks.clear(); // Uncomment to go back to the old mode.
} else {
#if HAVE_LIBLEPT
image->FromPix(pix_binary_);
#endif
deskew_ = FCOORD(1.0f, 0.0f);
reskew_ = FCOORD(1.0f, 0.0f);
}
if (blocks->empty()) {
tprintf("Empty page\n");
return 0; // AutoPageSeg found an empty page.
}
if (port_blocks.empty()) {
// AutoPageSeg was not used, so we need to find_components first.
find_components(blocks, &land_blocks, &port_blocks, &page_box);
} else {
// AutoPageSeg does not need to find_components as it did that already.
page_box.set_left(0);
page_box.set_bottom(0);
page_box.set_right(width);
page_box.set_top(height);
// Filter_blobs sets up the TO_BLOCKs the same as find_components does.
filter_blobs(page_box.topright(), &port_blocks, true);
}
TO_BLOCK_IT to_block_it(&port_blocks);
ASSERT_HOST(!port_blocks.empty());
TO_BLOCK* to_block = to_block_it.data();
if (pageseg_mode <= PSM_SINGLE_BLOCK ||
to_block->line_size < 2) {
// For now, AUTO, SINGLE_COLUMN and SINGLE_BLOCK all map to the old
// textord. The difference is the number of blocks and how the are made.
textord_page(page_box.topright(), blocks, &land_blocks, &port_blocks,
this);
} else {
// SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
float gradient = make_single_row(page_box.topright(),
to_block, &port_blocks, this);
if (pageseg_mode == PSM_SINGLE_LINE) {
// SINGLE_LINE uses the old word maker on the single line.
make_words(page_box.topright(), gradient, blocks,
&land_blocks, &port_blocks, this);
} else {
// SINGLE_WORD and SINGLE_CHAR cram all the blobs into a
// single word, and in SINGLE_CHAR mode, all the outlines
// go in a single blob.
make_single_word(pageseg_mode == PSM_SINGLE_CHAR,
to_block->get_rows(), to_block->block->row_list());
//.........这里部分代码省略.........
示例6: main
// Main program to combine/extract/overwrite tessdata components
// in [lang].traineddata files.
//
// To combine all the individual tessdata components (unicharset, DAWGs,
// classifier templates, ambiguities, language configs) located at, say,
// /home/$USER/temp/eng.* run:
//
// combine_tessdata /home/$USER/temp/eng.
//
// The result will be a combined tessdata file /home/$USER/temp/eng.traineddata
//
// Specify option -e if you would like to extract individual components
// from a combined traineddata file. For DC, to extract language config
// file and the unicharset from tessdata/eng.traineddata run:
//
// combine_tessdata -e tessdata/eng.traineddata
// /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset
//
// The desired config file and unicharset will be written to
// /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset
//
// Specify option -o to overwrite individual components of the given
// [lang].traineddata file. For DC, to overwrite language config
// and unichar ambiguities files in tessdata/eng.traineddata use:
//
// combine_tessdata -o tessdata/eng.traineddata
// /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharambigs
//
// As a result, tessdata/eng.traineddata will contain the new language config
// and unichar ambigs, plus all the original DAWGs, classifier teamples, etc.
//
// Note: the file names of the files to extract to and to overwrite from should
// have the appropriate file suffixes (extensions) indicating their tessdata
// component type (.unicharset for the unicharset, .unicharambigs for unichar
// ambigs, etc). See k*FileSuffix variable in ccutil/tessdatamanager.h.
//
// Specify option -u to unpack all the components to the specified path:
//
// combine_tessdata -u tessdata/eng.traineddata /home/$USER/temp/eng.
//
// This will create /home/$USER/temp/eng.* files with individual tessdata
// components from tessdata/eng.traineddata.
//
int main(int argc, char **argv) {
int i;
if (argc == 2) {
printf("Combining tessdata files\n");
STRING lang = argv[1];
char* last = &argv[1][strlen(argv[1])-1];
if (*last != '.')
lang += '.';
STRING output_file = lang;
output_file += kTrainedDataSuffix;
if (!tesseract::TessdataManager::CombineDataFiles(
lang.string(), output_file.string())) {
printf("Error combining tessdata files into %s\n",
output_file.string());
} else {
printf("Output %s created sucessfully.\n", output_file.string());
}
} else if (argc >= 4 && (strcmp(argv[1], "-e") == 0 ||
strcmp(argv[1], "-u") == 0)) {
// Initialize TessdataManager with the data in the given traineddata file.
tesseract::TessdataManager tm;
tm.Init(argv[2], 0);
printf("Extracting tessdata components from %s\n", argv[2]);
if (strcmp(argv[1], "-e") == 0) {
for (i = 3; i < argc; ++i) {
if (tm.ExtractToFile(argv[i])) {
printf("Wrote %s\n", argv[i]);
} else {
printf("Not extracting %s, since this component"
" is not present\n", argv[i]);
}
}
} else { // extract all the components
for (i = 0; i < tesseract::TESSDATA_NUM_ENTRIES; ++i) {
STRING filename = argv[3];
char* last = &argv[3][strlen(argv[3])-1];
if (*last != '.')
filename += '.';
filename += tesseract::kTessdataFileSuffixes[i];
if (tm.ExtractToFile(filename.string())) {
printf("Wrote %s\n", filename.string());
}
}
}
tm.End();
} else if (argc >= 4 && strcmp(argv[1], "-o") == 0) {
// Rename the current traineddata file to a temporary name.
const char *new_traineddata_filename = argv[2];
STRING traineddata_filename = new_traineddata_filename;
traineddata_filename += ".__tmp__";
if (rename(new_traineddata_filename, traineddata_filename.string()) != 0) {
tprintf("Failed to create a temporary file %s\n",
traineddata_filename.string());
exit(1);
}
// Initialize TessdataManager with the data in the given traineddata file.
//.........这里部分代码省略.........
示例7: main
// Main program to combine/extract/overwrite tessdata components
// in [lang].traineddata files.
//
// To combine all the individual tessdata components (unicharset, DAWGs,
// classifier templates, ambiguities, language configs) located at, say,
// /home/$USER/temp/eng.* run:
//
// combine_tessdata /home/$USER/temp/eng.
//
// The result will be a combined tessdata file /home/$USER/temp/eng.traineddata
//
// Specify option -e if you would like to extract individual components
// from a combined traineddata file. For example, to extract language config
// file and the unicharset from tessdata/eng.traineddata run:
//
// combine_tessdata -e tessdata/eng.traineddata
// /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset
//
// The desired config file and unicharset will be written to
// /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset
//
// Specify option -o to overwrite individual components of the given
// [lang].traineddata file. For example, to overwrite language config
// and unichar ambiguities files in tessdata/eng.traineddata use:
//
// combine_tessdata -o tessdata/eng.traineddata
// /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharambigs
//
// As a result, tessdata/eng.traineddata will contain the new language config
// and unichar ambigs, plus all the original DAWGs, classifier teamples, etc.
//
// Note: the file names of the files to extract to and to overwrite from should
// have the appropriate file suffixes (extensions) indicating their tessdata
// component type (.unicharset for the unicharset, .unicharambigs for unichar
// ambigs, etc). See k*FileSuffix variable in ccutil/tessdatamanager.h.
//
// Specify option -u to unpack all the components to the specified path:
//
// combine_tessdata -u tessdata/eng.traineddata /home/$USER/temp/eng.
//
// This will create /home/$USER/temp/eng.* files with individual tessdata
// components from tessdata/eng.traineddata.
//
int main(int argc, char **argv) {
tesseract::CheckSharedLibraryVersion();
int i;
tesseract::TessdataManager tm;
if (argc > 1 && (!strcmp(argv[1], "-v") || !strcmp(argv[1], "--version"))) {
printf("%s\n", tesseract::TessBaseAPI::Version());
return EXIT_SUCCESS;
} else if (argc == 2) {
printf("Combining tessdata files\n");
STRING lang = argv[1];
char* last = &argv[1][strlen(argv[1])-1];
if (*last != '.')
lang += '.';
STRING output_file = lang;
output_file += kTrainedDataSuffix;
if (!tm.CombineDataFiles(lang.string(), output_file.string())) {
printf("Error combining tessdata files into %s\n",
output_file.string());
} else {
printf("Output %s created successfully.\n", output_file.string());
}
} else if (argc >= 4 && (strcmp(argv[1], "-e") == 0 ||
strcmp(argv[1], "-u") == 0)) {
// Initialize TessdataManager with the data in the given traineddata file.
if (!tm.Init(argv[2])) {
tprintf("Failed to read %s\n", argv[2]);
return EXIT_FAILURE;
}
printf("Extracting tessdata components from %s\n", argv[2]);
if (strcmp(argv[1], "-e") == 0) {
for (i = 3; i < argc; ++i) {
errno = 0;
if (tm.ExtractToFile(argv[i])) {
printf("Wrote %s\n", argv[i]);
} else if (errno == 0) {
printf("Not extracting %s, since this component"
" is not present\n", argv[i]);
return EXIT_FAILURE;
} else {
printf("Error, could not extract %s: %s\n",
argv[i], strerror(errno));
return EXIT_FAILURE;
}
}
} else { // extract all the components
for (i = 0; i < tesseract::TESSDATA_NUM_ENTRIES; ++i) {
STRING filename = argv[3];
char* last = &argv[3][strlen(argv[3])-1];
if (*last != '.')
filename += '.';
filename += tesseract::kTessdataFileSuffixes[i];
errno = 0;
if (tm.ExtractToFile(filename.string())) {
printf("Wrote %s\n", filename.string());
} else if (errno != 0) {
printf("Error, could not extract %s: %s\n",
//.........这里部分代码省略.........
示例8: recog_training_segmented
// This function takes tif/box pair of files and runs recognition on the image,
// while making sure that the word bounds that tesseract identified roughly
// match to those specified by the input box file. For each word (ngram in a
// single bounding box from the input box file) it outputs the ocred result,
// the correct label, rating and certainty.
void Tesseract::recog_training_segmented(const STRING &fname,
PAGE_RES *page_res,
volatile ETEXT_DESC *monitor,
FILE *output_file) {
STRING box_fname = fname;
const char *lastdot = strrchr(box_fname.string(), '.');
if (lastdot != NULL) box_fname[lastdot - box_fname.string()] = '\0';
box_fname += ".box";
// read_next_box() will close box_file
FILE *box_file = open_file(box_fname.string(), "r");
PAGE_RES_IT page_res_it;
page_res_it.page_res = page_res;
page_res_it.restart_page();
STRING label;
// Process all the words on this page.
TBOX tbox; // tesseract-identified box
TBOX bbox; // box from the box file
bool keep_going;
int line_number = 0;
int examined_words = 0;
do {
keep_going = read_t(&page_res_it, &tbox);
keep_going &= ReadNextBox(applybox_page, &line_number, box_file, &label,
&bbox);
// Align bottom left points of the TBOXes.
while (keep_going &&
!NearlyEqual<int>(tbox.bottom(), bbox.bottom(), kMaxBoxEdgeDiff)) {
if (bbox.bottom() < tbox.bottom()) {
page_res_it.forward();
keep_going = read_t(&page_res_it, &tbox);
} else {
keep_going = ReadNextBox(applybox_page, &line_number, box_file, &label,
&bbox);
}
}
while (keep_going &&
!NearlyEqual<int>(tbox.left(), bbox.left(), kMaxBoxEdgeDiff)) {
if (bbox.left() > tbox.left()) {
page_res_it.forward();
keep_going = read_t(&page_res_it, &tbox);
} else {
keep_going = ReadNextBox(applybox_page, &line_number, box_file, &label,
&bbox);
}
}
// OCR the word if top right points of the TBOXes are similar.
if (keep_going &&
NearlyEqual<int>(tbox.right(), bbox.right(), kMaxBoxEdgeDiff) &&
NearlyEqual<int>(tbox.top(), bbox.top(), kMaxBoxEdgeDiff)) {
ambigs_classify_and_output(label.string(), &page_res_it, output_file);
examined_words++;
}
page_res_it.forward();
} while (keep_going);
fclose(box_file);
// Set up scripts on all of the words that did not get sent to
// ambigs_classify_and_output. They all should have, but if all the
// werd_res's don't get uch_sets, tesseract will crash when you try
// to iterate over them. :-(
int total_words = 0;
for (page_res_it.restart_page(); page_res_it.block() != NULL;
page_res_it.forward()) {
if (page_res_it.word()) {
if (page_res_it.word()->uch_set == NULL)
page_res_it.word()->SetupFake(unicharset);
total_words++;
}
}
if (examined_words < 0.85 * total_words) {
tprintf("TODO(antonova): clean up recog_training_segmented; "
" It examined only a small fraction of the ambigs image.\n");
}
tprintf("recog_training_segmented: examined %d / %d words.\n",
examined_words, total_words);
}
示例9: ParseCommandLineFlags
void ParseCommandLineFlags(const char* usage,
int* argc, char*** argv,
const bool remove_flags) {
if (*argc == 1) {
tprintf("USAGE: %s\n", usage);
PrintCommandLineFlags();
exit(0);
}
unsigned int i = 1;
for (i = 1; i < *argc; ++i) {
const char* current_arg = (*argv)[i];
// If argument does not start with a hyphen then break.
if (current_arg[0] != '-') {
break;
}
// Position current_arg after startings hyphens. We treat a sequence of
// consecutive hyphens of any length identically.
while (*current_arg == '-') {
++current_arg;
}
// If this is asking for usage, print the help message and abort.
if (!strcmp(current_arg, "help") ||
!strcmp(current_arg, "helpshort")) {
tprintf("USAGE: %s\n", usage);
PrintCommandLineFlags();
exit(0);
}
// Find the starting position of the value if it was specified in this
// string.
const char* equals_position = strchr(current_arg, '=');
const char* rhs = NULL;
if (equals_position != NULL) {
rhs = equals_position + 1;
}
// Extract the flag name.
STRING lhs;
if (equals_position == NULL) {
lhs = current_arg;
} else {
lhs.assign(current_arg, equals_position - current_arg);
}
if (!lhs.length()) {
tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
exit(1);
}
// Find the flag name in the list of global flags.
// inT32 flag
inT32 int_val;
if (IntFlagExists(lhs.string(), &int_val)) {
if (rhs != NULL) {
if (!strlen(rhs)) {
// Bad input of the format --int_flag=
tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
exit(1);
}
if (!SafeAtoi(rhs, &int_val)) {
tprintf("ERROR: Could not parse int from %s in flag %s\n",
rhs, (*argv)[i]);
exit(1);
}
} else {
// We need to parse the next argument
if (i + 1 >= *argc) {
tprintf("ERROR: Could not find value argument for flag %s\n",
lhs.string());
exit(1);
} else {
++i;
if (!SafeAtoi((*argv)[i], &int_val)) {
tprintf("ERROR: Could not parse inT32 from %s\n", (*argv)[i]);
exit(1);
}
}
}
SetIntFlagValue(lhs.string(), int_val);
continue;
}
// double flag
double double_val;
if (DoubleFlagExists(lhs.string(), &double_val)) {
if (rhs != NULL) {
if (!strlen(rhs)) {
// Bad input of the format --double_flag=
tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
exit(1);
}
if (!SafeAtod(rhs, &double_val)) {
tprintf("ERROR: Could not parse double from %s in flag %s\n",
rhs, (*argv)[i]);
exit(1);
}
} else {
// We need to parse the next argument
if (i + 1 >= *argc) {
tprintf("ERROR: Could not find value argument for flag %s\n",
lhs.string());
exit(1);
//.........这里部分代码省略.........
示例10: edges_and_textord
void edges_and_textord( //read .pb file
const char *filename, //.pb file
BLOCK_LIST *blocks) {
BLOCK *block; //current block
char *lastdot; //of name
STRING name = filename; //truncated name
ICOORD page_tr;
BOX page_box; //bounding_box
PDBLK_CLIST pd_blocks; //copy of list
BLOCK_IT block_it = blocks; //iterator
PDBLK_C_IT pd_it = &pd_blocks; //iterator
//different orientations
TO_BLOCK_LIST land_blocks, port_blocks;
IMAGE thresh_image; //thresholded
lastdot = strrchr (name.string (), '.');
if (lastdot != NULL)
*lastdot = '\0';
if (page_image.get_bpp () == 0) {
name += tessedit_image_ext;
if (page_image.read_header (name.string ()))
CANTOPENFILE.error ("edges_and_textord", EXIT, name.string ());
if (page_image.read (0))
READFAILED.error ("edges_and_textord", EXIT, name.string ());
name = filename;
lastdot = strrchr (name.string (), '.');
if (lastdot != NULL)
*lastdot = '\0';
}
page_tr = ICOORD (page_image.get_xsize (), page_image.get_ysize ());
read_pd_file (name, page_image.get_xsize (), page_image.get_ysize (),
blocks);
block_it.set_to_list (blocks);
if (global_monitor != NULL)
global_monitor->ocr_alive = TRUE;
if (page_image.get_bpp () > 1) {
set_global_loc_code(LOC_ADAPTIVE);
for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
block_it.forward ()) {
block = block_it.data ();
pd_it.add_after_then_move (block);
}
// adaptive_threshold(&page_image,&pd_blocks,&thresh_image);
set_global_loc_code(LOC_EDGE_PROG);
#ifndef EMBEDDED
previous_cpu = clock ();
#endif
for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
block_it.forward ()) {
block = block_it.data ();
if (!polygon_tess_approximation)
invert_image(&page_image);
#ifndef GRAPHICS_DISABLED
extract_edges(NO_WINDOW, &page_image, &thresh_image, page_tr, block);
#else
extract_edges(&page_image, &thresh_image, page_tr, block);
#endif
page_box += block->bounding_box ();
}
page_image = thresh_image; //everyone else gets it
}
else {
set_global_loc_code(LOC_EDGE_PROG);
if (!page_image.white_high ())
invert_image(&page_image);
#ifndef EMBEDDED
previous_cpu = clock ();
#endif
for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
block_it.forward ()) {
block = block_it.data ();
#ifndef GRAPHICS_DISABLED
extract_edges(NO_WINDOW, &page_image, &page_image, page_tr, block);
#else
extract_edges(&page_image, &page_image, page_tr, block);
#endif
page_box += block->bounding_box ();
}
}
if (global_monitor != NULL) {
global_monitor->ocr_alive = TRUE;
global_monitor->progress = 10;
}
assign_blobs_to_blocks2(blocks, &land_blocks, &port_blocks);
if (global_monitor != NULL)
global_monitor->ocr_alive = TRUE;
filter_blobs (page_box.topright (), &land_blocks, textord_test_landscape);
#ifndef EMBEDDED
previous_cpu = clock ();
#endif
filter_blobs (page_box.topright (), &port_blocks, !textord_test_landscape);
if (global_monitor != NULL)
global_monitor->ocr_alive = TRUE;
textord_page (page_box.topright (), blocks, &land_blocks, &port_blocks);
}
示例11: BuildListOfAllLeaves
// Find all editable parameters used within tesseract and create a
// SVMenuNode tree from it.
// TODO (wanke): This is actually sort of hackish.
SVMenuNode* ParamsEditor::BuildListOfAllLeaves(tesseract::Tesseract *tess) {
SVMenuNode* mr = new SVMenuNode();
ParamContent_LIST vclist;
ParamContent_IT vc_it(&vclist);
// Amount counts the number of entries for a specific char*.
// TODO(rays) get rid of the use of std::map.
std::map<const char*, int> amount;
// Add all parameters to a list.
int v, i;
int num_iterations = (tess->params() == NULL) ? 1 : 2;
for (v = 0; v < num_iterations; ++v) {
tesseract::ParamsVectors *vec = (v == 0) ? GlobalParams() : tess->params();
for (i = 0; i < vec->int_params.size(); ++i) {
vc_it.add_after_then_move(new ParamContent(vec->int_params[i]));
}
for (i = 0; i < vec->bool_params.size(); ++i) {
vc_it.add_after_then_move(new ParamContent(vec->bool_params[i]));
}
for (i = 0; i < vec->string_params.size(); ++i) {
vc_it.add_after_then_move(new ParamContent(vec->string_params[i]));
}
for (i = 0; i < vec->double_params.size(); ++i) {
vc_it.add_after_then_move(new ParamContent(vec->double_params[i]));
}
}
// Count the # of entries starting with a specific prefix.
for (vc_it.mark_cycle_pt(); !vc_it.cycled_list(); vc_it.forward()) {
ParamContent* vc = vc_it.data();
STRING tag;
STRING tag2;
STRING tag3;
GetPrefixes(vc->GetName(), &tag, &tag2, &tag3);
amount[tag.string()]++;
amount[tag2.string()]++;
amount[tag3.string()]++;
}
vclist.sort(ParamContent::Compare); // Sort the list alphabetically.
SVMenuNode* other = mr->AddChild("OTHER");
// go through the list again and this time create the menu structure.
vc_it.move_to_first();
for (vc_it.mark_cycle_pt(); !vc_it.cycled_list(); vc_it.forward()) {
ParamContent* vc = vc_it.data();
STRING tag;
STRING tag2;
STRING tag3;
GetPrefixes(vc->GetName(), &tag, &tag2, &tag3);
if (amount[tag.string()] == 1) {
other->AddChild(vc->GetName(), vc->GetId(), vc->GetValue().string(),
vc->GetDescription());
} else { // More than one would use this submenu -> create submenu.
SVMenuNode* sv = mr->AddChild(tag.string());
if ((amount[tag.string()] <= MAX_ITEMS_IN_SUBMENU) ||
(amount[tag2.string()] <= 1)) {
sv->AddChild(vc->GetName(), vc->GetId(),
vc->GetValue().string(), vc->GetDescription());
} else { // Make subsubmenus.
SVMenuNode* sv2 = sv->AddChild(tag2.string());
sv2->AddChild(vc->GetName(), vc->GetId(),
vc->GetValue().string(), vc->GetDescription());
}
}
}
return mr;
}
示例12: main
//.........这里部分代码省略.........
PERF_COUNT_START("Tesseract:main")
tesseract::TessBaseAPI api;
api.SetOutputName(output);
int rc = api.Init(datapath, lang, tesseract::OEM_DEFAULT,
&(argv[arg]), argc - arg, NULL, NULL, false);
if (rc) {
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}
char opt1[255], opt2[255];
for (arg = 0; arg < argc; arg++) {
if (strcmp(argv[arg], "-c") == 0 && arg + 1 < argc) {
strncpy(opt1, argv[arg + 1], 255);
*(strchr(opt1, '=')) = 0;
strncpy(opt2, strchr(argv[arg + 1], '=') + 1, 255);
opt2[254] = 0;
++arg;
if (!api.SetVariable(opt1, opt2)) {
fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2);
}
}
}
if (list_langs) {
GenericVector<STRING> languages;
api.GetAvailableLanguagesAsVector(&languages);
fprintf(stderr, "List of available languages (%d):\n",
languages.size());
for (int index = 0; index < languages.size(); ++index) {
STRING& string = languages[index];
fprintf(stderr, "%s\n", string.string());
}
api.End();
exit(0);
}
if (print_parameters) {
FILE* fout = stdout;
fprintf(stdout, "Tesseract parameters:\n");
api.PrintVariables(fout);
api.End();
exit(0);
}
// We have 2 possible sources of pagesegmode: a config file and
// the command line. For backwards compatability reasons, the
// default in tesseract is tesseract::PSM_SINGLE_BLOCK, but the
// default for this program is tesseract::PSM_AUTO. We will let
// the config file take priority, so the command-line default
// can take priority over the tesseract default, so we use the
// value from the command line only if the retrieved mode
// is still tesseract::PSM_SINGLE_BLOCK, indicating no change
// in any config file. Therefore the only way to force
// tesseract::PSM_SINGLE_BLOCK is from the command line.
// It would be simpler if we could set the value before Init,
// but that doesn't work.
if (api.GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK)
api.SetPageSegMode(pagesegmode);
bool stdInput = !strcmp(image, "stdin") || !strcmp(image, "-");
Pix* pixs = NULL;
if (stdInput) {
示例13: save_summary
void Wordrec::save_summary(inT32 elapsed_time) {
#ifndef SECURE_NAMES
STRING outfilename;
FILE *f;
int x;
int total;
outfilename = imagefile + ".sta";
f = open_file (outfilename.string(), "w");
fprintf (f, INT32FORMAT " seconds elapsed\n", elapsed_time);
fprintf (f, "\n");
fprintf (f, "%d characters\n", character_count);
fprintf (f, "%d words\n", word_count);
fprintf (f, "\n");
fprintf (f, "%d permutations performed\n", permutation_count);
fprintf (f, "%d characters classified\n", chars_classified);
fprintf (f, "%4.0f%% classification overhead\n",
(float) chars_classified / character_count * 100.0 - 100.0);
fprintf (f, "\n");
fprintf (f, "%d words chopped (pass 1) ", words_chopped1);
fprintf (f, " (%0.0f%%)\n", (float) words_chopped1 / word_count * 100);
fprintf (f, "%d chops performed\n", chops_performed1);
fprintf (f, "%d chops attempted\n", chops_attempted1);
fprintf (f, "\n");
fprintf (f, "%d words joined (pass 1)", words_segmented1);
fprintf (f, " (%0.0f%%)\n", (float) words_segmented1 / word_count * 100);
fprintf (f, "%d segmentation states\n", segmentation_states1);
fprintf (f, "%d segmentations timed out\n", states_timed_out1);
fprintf (f, "\n");
fprintf (f, "%d words chopped (pass 2) ", words_chopped2);
fprintf (f, " (%0.0f%%)\n", (float) words_chopped2 / word_count * 100);
fprintf (f, "%d chops performed\n", chops_performed2);
fprintf (f, "%d chops attempted\n", chops_attempted2);
fprintf (f, "\n");
fprintf (f, "%d words joined (pass 2)", words_segmented2);
fprintf (f, " (%0.0f%%)\n", (float) words_segmented2 / word_count * 100);
fprintf (f, "%d segmentation states\n", segmentation_states2);
fprintf (f, "%d segmentations timed out\n", states_timed_out2);
fprintf (f, "\n");
total = 0;
iterate_tally (states_before_best, x)
total += (tally_entry (states_before_best, x) * x);
fprintf (f, "segmentations (before best) = %d\n", total);
if (total != 0.0)
fprintf (f, "%4.0f%% segmentation overhead\n",
(float) (segmentation_states1 + segmentation_states2) /
total * 100.0 - 100.0);
fprintf (f, "\n");
print_tally (f, "segmentations (before best)", states_before_best);
iterate_tally (best_certainties[0], x)
cprintf ("best certainty of %8.4f = %4d %4d\n",
x * CERTAINTY_BUCKET_SIZE,
tally_entry (best_certainties[0], x),
tally_entry (best_certainties[1], x));
PrintIntMatcherStats(f);
dj_statistics(f);
fclose(f);
#endif
}