本文整理汇总了C++中PhraseAlignment::match方法的典型用法代码示例。如果您正苦于以下问题:C++ PhraseAlignment::match方法的具体用法?C++ PhraseAlignment::match怎么用?C++ PhraseAlignment::match使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类PhraseAlignment
的用法示例。
在下文中一共展示了PhraseAlignment::match方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: computeCountOfCounts
void computeCountOfCounts( char* fileNameExtract )
{
cerr << "computing counts of counts";
for(int i=1;i<=GT_MAX;i++) countOfCounts[i] = 0;
ifstream extractFile;
extractFile.open( fileNameExtract );
if (extractFile.fail()) {
cerr << "ERROR: could not open extract file " << fileNameExtract << endl;
exit(1);
}
istream &extractFileP = extractFile;
// loop through all extracted phrase translations
int i=0;
char line[LINE_MAX_LENGTH],lastLine[LINE_MAX_LENGTH];
lastLine[0] = '\0';
PhraseAlignment *lastPhrasePair = NULL;
while(true) {
if (extractFileP.eof()) break;
if (++i % 100000 == 0) cerr << "." << flush;
SAFE_GETLINE((extractFileP), line, LINE_MAX_LENGTH, '\n', __FILE__);
if (extractFileP.eof()) break;
// identical to last line? just add count
if (strcmp(line,lastLine) == 0)
{
lastPhrasePair->addToCount( line );
continue;
}
strcpy( lastLine, line );
// create new phrase pair
PhraseAlignment *phrasePair = new PhraseAlignment();
phrasePair->create( line, i );
if (i == 1)
{
lastPhrasePair = phrasePair;
continue;
}
// only differs in count? just add count
if (lastPhrasePair->match( *phrasePair ))
{
lastPhrasePair->count += phrasePair->count;
phrasePair->clear();
delete(phrasePair);
continue;
}
// periodically house cleaning
if (phrasePair->GetSource() != lastPhrasePair->GetSource())
{
phraseTableT.clear(); // these would get too big
phraseTableS.clear(); // these would get too big
// process line again, since phrase tables flushed
phrasePair->clear();
phrasePair->create( line, i );
}
int count = lastPhrasePair->count + 0.99999;
if(count <= GT_MAX)
countOfCounts[ count ]++;
lastPhrasePair->clear();
delete( lastPhrasePair );
lastPhrasePair = phrasePair;
}
delete lastPhrasePair;
discountFactor[0] = 0.01; // floor
cerr << "\n";
for(int i=1;i<GT_MAX;i++)
{
discountFactor[i] = ((float)i+1)/(float)i*(((float)countOfCounts[i+1]+0.1) / ((float)countOfCounts[i]+0.1));
cerr << "count " << i << ": " << countOfCounts[ i ] << ", discount factor: " << discountFactor[i];
// some smoothing...
if (discountFactor[i]>1)
discountFactor[i] = 1;
if (discountFactor[i]<discountFactor[i-1])
discountFactor[i] = discountFactor[i-1];
cerr << " -> " << discountFactor[i]*i << endl;
}
}
示例2: computeCountOfCounts
void computeCountOfCounts(const string& fileNameExtract) {
if (fileNameExtract == "-") {
cerr << "The ‘GoodTuring Discounting’ option may not be used with piped input!" << endl;
exit(9);
}
cerr << "computing counts of counts";
for (size_t i=1; i<=GT_MAX; countOfCounts[i++] = 0);
Bz2LineReader extractFile(fileNameExtract);
// loop through all extracted phrase translations
int i=0;
string lastLine;
PhraseAlignment *lastPhrasePair = NULL;
for (string line = extractFile.readLine(); !line.empty(); line = extractFile.readLine()) {
if (line.empty()) break;
if ((++i)%10000000 == 0) cerr << "[" << i << "]" << endl;
else if (i % 100000 == 0) cerr << "," << flush;
// identical to last line? just add count
if (line == lastLine) {
lastPhrasePair->addToCount(line);
continue;
}
lastLine = line;
// create new phrase pair
PhraseAlignment *phrasePair = new PhraseAlignment();
vector<string> lineVector = tokenize(line.c_str());
phrasePair->create(lineVector, i);
if (i == 1) {
lastPhrasePair = phrasePair;
continue;
}
// only differs in count? just add count
if (lastPhrasePair->match( *phrasePair )) {
lastPhrasePair->count += phrasePair->count;
phrasePair->clear();
delete(phrasePair);
continue;
}
// periodically house cleaning
if (phrasePair->source != lastPhrasePair->source) {
phraseTableT.clear(); // these would get too big
phraseTableS.clear(); // these would get too big
// process line again, since phrase tables flushed
phrasePair->clear();
phrasePair->create(lineVector, i);
}
int count = lastPhrasePair->count + 0.99999;
if(count <= GT_MAX)
++countOfCounts[ count ];
lastPhrasePair->clear();
delete( lastPhrasePair );
lastPhrasePair = phrasePair;
}
discountFactor[0] = 0.01; // floor
cerr << "\n";
for(int i=1;i<GT_MAX; ++i) {
discountFactor[i] = ((float)i+1)/(float)i*(((float)countOfCounts[i+1]+0.1) / ((float)countOfCounts[i]+0.1));
cerr << "count " << i << ": " << countOfCounts[ i ] << ", discount factor: " << discountFactor[i];
// some smoothing...
if (discountFactor[i]>1)
discountFactor[i] = 1;
if (discountFactor[i]<discountFactor[i-1])
discountFactor[i] = discountFactor[i-1];
cerr << " -> " << discountFactor[i]*i << endl;
}
}