本文整理汇总了C++中Pattern::n方法的典型用法代码示例。如果您正苦于以下问题:C++ Pattern::n方法的具体用法?C++ Pattern::n怎么用?C++ Pattern::n使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Pattern
的用法示例。
在下文中一共展示了Pattern::n方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: exit
LanguageModel::LanguageModel(const std::string & filename, ClassEncoder & encoder, ClassDecoder * classdecoder, bool debug) {
this->DEBUG = debug;
this->classdecoder = classdecoder;
order = 0;
bool hasunk = false;
ifstream f;
f.open(filename.c_str(), ios::in);
if ((!f) || (!f.good())) {
cerr << "File does not exist: " << filename << endl;
exit(3);
}
while (!f.eof()) {
string line;
getline(f, line);
if (line == "\\data\\") {
order = 0;
} else if (line == "\\1-grams:") { //bit inelegant, but simplest
order = 1;
} else if (line == "\\2-grams:") {
order = 2;
} else if (line == "\\3-grams:") {
order = 3;
} else if (line == "\\4-grams:") {
order = 4;
} else if (line == "\\5-grams:") {
order = 5;
} else if (line == "\\6-grams:") {
order = 6;
} else if (line == "\\7-grams:") {
order = 7;
} else if (line == "\\8-grams:") {
order = 8;
} else if (line == "\\9-grams:") {
order = 9;
} else if (!line.empty()) {
if (order == 0) {
if (line.substr(0,5) == "ngram") {
string n_s = line.substr(6,1);
string v_s = line.substr(8);
int n = atoi(n_s.c_str());
int v = atoi(v_s.c_str());
total[n] = v;
}
} else if (order > 0) {
string logprob_s = "";
string backofflogprob_s = "";
string ngramcontent = "";
int fields = 0;
int begin = 0;
for (unsigned int i = 0; i <= line.length(); i++) {
if ((line[i] == '\t') || (line[i] == '\n') || (i == line.length())) {
if (fields == 0) {
logprob_s = line.substr(begin, i - begin);
} else if (fields == 1) {
ngramcontent = line.substr(begin, i - begin);
} else if (fields == 2) {
backofflogprob_s = line.substr(begin, i - begin);
}
begin = i + 1;
fields++;
}
}
if ((!logprob_s.empty()) && (!ngramcontent.empty())) {
if (ngramcontent == "<unk>") {
ngrams[UNKPATTERN] = atof(logprob_s.c_str()) * log(10); //* log(10) does log10 to log_e conversion
hasunk = true;
if (DEBUG) {
cerr << " Adding UNKNOWN to LM: " << (int) UNKPATTERN.n() << "\t" << ngramcontent << "\t" << ngrams[UNKPATTERN] << endl;
}
} else {
Pattern ngram = encoder.buildpattern(ngramcontent);
if (ngram != UNKPATTERN) {
ngrams[ngram] = atof(logprob_s.c_str()) * log(10); //* log(10) does log10 to log_e conversion
if (!backofflogprob_s.empty()) {
backoff[ngram] = atof(backofflogprob_s.c_str()) * log(10); //* log(10) does log10 to log_e conversion
if (DEBUG) cerr << " Adding to LM: " << (int) ngram.n() << "\t" << ngramcontent << "\t" << ngrams[ngram] << "\t" << backoff[ngram] << endl;
} else {
if (DEBUG) cerr << " Adding to LM: " << (int) ngram.n() << "\t" << ngramcontent << "\t" << ngrams[ngram] << endl;
}
}
}
} else {
cerr << "WARNING: Ignoring line: " << line << endl;
}
} else {
cerr << "WARNING: Don't know what to do with line: " << line << endl;
}
}
}
f.close();
if (!hasunk) {
cerr << "ERROR: Language Model has no value <unk>, make sure to generate SRILM model with -unk parameter" << endl;
exit(3);
}
}
示例2: loadmosesphrasetable
void loadmosesphrasetable(PatternAlignmentModel<double> & model, const std::string & filename, ClassEncoder & sourceencoder, ClassEncoder & targetencoder, PatternSetModel * constrainsourcemodel = NULL, PatternSetModel * constraintargetmodel = NULL, int max_sourcen =0, const double pts=0, const double pst=0, const double joinedthreshold=0, const double divergencefrombestthreshold=0.0, const std::string delimiter = "|||", const int score_column=3, const int pstfield = 0, const int ptsfield=2, const int maxscores = 10)
{
unsigned int added = 0;
unsigned int skipped = 0;
unsigned int constrained = 0;
unsigned int count = 0;
PatternSetModel firstwords;
if (constrainsourcemodel != NULL) {
cerr << "(Inferring extra contraints from source model, for faster discarding of patterns)" << endl;
for (PatternSetModel::iterator iter = constrainsourcemodel->begin(); iter != constrainsourcemodel->end(); iter++) {
const Pattern pattern = *iter;
const Pattern firstword = pattern[0];
firstwords.insert(firstword);
}
cerr << "(added " << firstwords.size() << " unigrams)" << endl;
}
//load from moses-style phrasetable file
istream * f = NULL;
if (filename.substr(filename.size()-3) == ".gz") {
cerr << "(Reading from gzip)" << endl;
f = new igzstream(filename.c_str(), ios::in | ios::binary);
} else {
f = new ifstream(filename.c_str(), ios::in | ios::binary);
}
if ((f == NULL) || (!f->good())) {
cerr << "File does not exist: " << filename << endl;
exit(2);
}
vector<BufferItem> buffer;
string firstword;
bool skipsamesource = false;
string prevsource;
string skipfirstword;
string source = "";
string target = "";
string scores_s;
bool abort = false;
int mode = 0;
int begin = 0;
bool updated = false;
string line;
vector<double> scores;
while (!f->eof()) {
line.clear();
getline(*f, line);
count++;
if (count % 100000 == 0) {
cerr << endl;
cerr << "Loading and encoding phrase-table: @" << count << " total added: " << added << ", skipped because of threshold: " << skipped << ", skipped because of constraints: " << constrained;
}
if (count % 1000 == 0) {
if (updated) {
cerr << ":";
} else {
cerr << ".";
}
updated = false;
}
mode = 0;
abort = false;
begin = 0;
source.clear();
target.clear();
scores_s.clear();
const int linesize = line.size();
if (linesize == 0) continue;
for (unsigned int i = 0; i < linesize; i++) {
if (line.substr(i,5) == " ||| ") {
if (mode == 0) {
source = line.substr(begin, i - begin);
int j = 0;
firstword = source;
for (auto c : source) {
if (c == ' ') {
firstword = source.substr(0,j);
break;
}
j++;
}
if (firstword == skipfirstword) {
abort = true;
break;
}
} else if (mode == 1) {
target = line.substr(begin, i - begin);
} else if (mode == 2) {
scores_s = line.substr(begin, i - begin);
}
begin = i+5;
mode++;
}
}
if (mode == 2) {
//.........这里部分代码省略.........