本文整理汇总了C++中PhraseAlignment类的典型用法代码示例。如果您正苦于以下问题:C++ PhraseAlignment类的具体用法?C++ PhraseAlignment怎么用?C++ PhraseAlignment使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了PhraseAlignment类的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: Compare
int PhraseAlignment::Compare(const PhraseAlignment &other) const
{
if (this == &other) // comparing with itself
return 0;
if (GetTarget() != other.GetTarget())
return ( GetTarget() < other.GetTarget() ) ? -1 : +1;
if (GetSource() != other.GetSource())
return ( GetSource() < other.GetSource() ) ? -1 : +1;
if (!hierarchicalFlag)
return 0;
// loop over all words (note: 0 = left hand side of rule)
for(size_t i=0; i<phraseT.size()-1; i++) {
if (isNonTerminal( vcbT.getWord( phraseT[i] ) )) {
size_t thisAlign = *(alignedToT[i].begin());
size_t otherAlign = *(other.alignedToT[i].begin());
if (alignedToT[i].size() != 1 ||
other.alignedToT[i].size() != 1 ||
thisAlign != otherAlign)
{
int ret = (thisAlign < otherAlign) ? -1 : +1;
return ret;
}
}
}
return 0;
}
示例2: equals
// check if two word alignments between a phrase pair are the same
bool PhraseAlignment::equals( const PhraseAlignment& other )
{
if (this == &other) return true;
if (other.GetTarget() != GetTarget()) return false;
if (other.GetSource() != GetSource()) return false;
if (other.alignedToT != alignedToT) return false;
if (other.alignedToS != alignedToS) return false;
return true;
}
示例3: Compare
int PhraseAlignment::Compare(const PhraseAlignment &other) const
{
if (this == &other) // comparing with itself
return 0;
if (GetTarget() != other.GetTarget()) //先比的是目标端
return ( GetTarget() < other.GetTarget() ) ? -1 : +1;
if (GetSource() != other.GetSource())
return ( GetSource() < other.GetSource() ) ? -1 : +1;
return 0;
}
示例4: equals
// check if two word alignments between a phrase pair are the same
bool PhraseAlignment::equals( const PhraseAlignment& other )
{
if (this == &other) return true;
if (other.GetRuleId() != this->GetRuleId() ) return false;
if (other.alignedToT != alignedToT) return false;
if (other.alignedToS != alignedToS) return false;
return true;
}
示例5: match
// check if two word alignments between a phrase pairs "match"
// i.e. they do not differ in the alignment of non-termimals
bool PhraseAlignment::match( const PhraseAlignment& other )
{
if (this == &other) return true;
if (other.GetTarget() != GetTarget()) return false;
if (other.GetSource() != GetSource()) return false;
if (!hierarchicalFlag) return true;
assert(phraseT.size() == alignedToT.size() + 1);
assert(alignedToT.size() == other.alignedToT.size());
// loop over all words (note: 0 = left hand side of rule)
for(size_t i=0; i<phraseT.size()-1; i++) {
if (isNonTerminal( vcbT.getWord( phraseT[i] ) )) {
if (alignedToT[i].size() != 1 ||
other.alignedToT[i].size() != 1 ||
*(alignedToT[i].begin()) != *(other.alignedToT[i].begin()))
return false;
}
}
return true;
}
示例6: main
int main(int argc, char* argv[])
{
cerr << "PhraseStatistics v1.1 written by Nicola Bertoldi\n"
<< "modifying PhraseScore v1.4 written by Philipp Koehn\n"
<< "It computes statistics for extracted phrase pairs\n"
<< "if (direct):\n"
<< "src_phrase ||| trg_phrase || freq(src_phrase, trg_phrase) freq(src_phrase) length(src_phrase) length(trg_phrase)\n"
<< "if (inverse)\n"
<< "src_phrase ||| trg_phrase || freq(src_phrase, trg_phrase) freq(trg_phrase) length(src_phrase) length(trg_phrase)\n";
if (argc != 4 && argc != 5) {
cerr << "syntax: statistics extract lex phrase-table [inverse]\n";
exit(1);
}
char* &fileNameExtract = argv[1];
char* &fileNameLex = argv[2];
char* &fileNamePhraseTable = argv[3];
inverseFlag = false;
if (argc > 4) {
inverseFlag = true;
cerr << "using inverse mode\n";
}
// lexical translation table
lexTable.load( fileNameLex );
// sorted phrase extraction file
Moses::InputFileStream extractFile(fileNameExtract);
if (extractFile.fail()) {
cerr << "ERROR: could not open extract file " << fileNameExtract << endl;
exit(1);
}
istream &extractFileP = extractFile;
// output file: phrase translation table
phraseTableFile.open(fileNamePhraseTable);
if (phraseTableFile.fail()) {
cerr << "ERROR: could not open file phrase table file "
<< fileNamePhraseTable << endl;
exit(1);
}
// loop through all extracted phrase translations
int lastForeign = -1;
vector< PhraseAlignment > phrasePairsWithSameF;
int i=0;
string line;
while(getline(extractFileP, line)) {
if (extractFileP.eof()) break;
if (++i % 100000 == 0) cerr << "." << flush;
PhraseAlignment phrasePair;
bool isPhrasePair = phrasePair.create( line.c_str(), i );
if (lastForeign >= 0 && lastForeign != phrasePair.foreign) {
processPhrasePairs( phrasePairsWithSameF );
for(size_t j=0; j<phrasePairsWithSameF.size(); j++)
phrasePairsWithSameF[j].clear();
phrasePairsWithSameF.clear();
phraseTableE.clear();
phraseTableF.clear();
phrasePair.clear(); // process line again, since phrase tables flushed
phrasePair.create( line.c_str(), i );
phrasePairBase = 0;
}
lastForeign = phrasePair.foreign;
if (isPhrasePair)
phrasePairsWithSameF.push_back( phrasePair );
else
phrasePairBase++;
}
processPhrasePairs( phrasePairsWithSameF );
phraseTableFile.close();
}
示例7: main
//.........这里部分代码省略.........
// lexical translation table
if (lexFlag)
lexTable.load( fileNameLex );
// function word list
if (unalignedFWFlag)
loadFunctionWords( fileNameFunctionWords );
// compute count of counts for Good Turing discounting
if (goodTuringFlag || kneserNeyFlag) {
for(int i=1; i<=COC_MAX; i++) countOfCounts[i] = 0;
}
// sorted phrase extraction file
Moses::InputFileStream extractFile(fileNameExtract);
if (extractFile.fail()) {
cerr << "ERROR: could not open extract file " << fileNameExtract << endl;
exit(1);
}
istream &extractFileP = extractFile;
// output file: phrase translation table
ostream *phraseTableFile;
if (strcmp(fileNamePhraseTable, "-") == 0) {
phraseTableFile = &cout;
}
else {
ofstream *outputFile = new ofstream();
outputFile->open(fileNamePhraseTable);
if (outputFile->fail()) {
cerr << "ERROR: could not open file phrase table file "
<< fileNamePhraseTable << endl;
exit(1);
}
phraseTableFile = outputFile;
}
// loop through all extracted phrase translations
float lastCount = 0.0f;
float lastPcfgSum = 0.0f;
vector< PhraseAlignment > phrasePairsWithSameF;
int i=0;
char line[LINE_MAX_LENGTH],lastLine[LINE_MAX_LENGTH];
lastLine[0] = '\0';
PhraseAlignment *lastPhrasePair = NULL;
while(true) {
if (extractFileP.eof()) break;
if (++i % 100000 == 0) cerr << "." << flush;
SAFE_GETLINE((extractFileP), line, LINE_MAX_LENGTH, '\n', __FILE__);
if (extractFileP.eof()) break;
// identical to last line? just add count
if (strcmp(line,lastLine) == 0) {
lastPhrasePair->count += lastCount;
lastPhrasePair->pcfgSum += lastPcfgSum;
continue;
}
strcpy( lastLine, line );
// create new phrase pair
PhraseAlignment phrasePair;
phrasePair.create( line, i );
lastCount = phrasePair.count;
lastPcfgSum = phrasePair.pcfgSum;
// only differs in count? just add count
if (lastPhrasePair != NULL && lastPhrasePair->equals( phrasePair )) {
lastPhrasePair->count += phrasePair.count;
lastPhrasePair->pcfgSum += phrasePair.pcfgSum;
continue;
}
// if new source phrase, process last batch
if (lastPhrasePair != NULL &&
lastPhrasePair->GetSource() != phrasePair.GetSource()) {
processPhrasePairs( phrasePairsWithSameF, *phraseTableFile );
phrasePairsWithSameF.clear();
lastPhrasePair = NULL;
}
// add phrase pairs to list, it's now the last one
phrasePairsWithSameF.push_back( phrasePair );
lastPhrasePair = &phrasePairsWithSameF.back();
}
processPhrasePairs( phrasePairsWithSameF, *phraseTableFile );
phraseTableFile->flush();
if (phraseTableFile != &cout) {
(dynamic_cast<ofstream*>(phraseTableFile))->close();
delete phraseTableFile;
}
// output count of count statistics
if (goodTuringFlag || kneserNeyFlag) {
writeCountOfCounts( fileNameCountOfCounts );
}
}
示例8: match
// check if two word alignments between a phrase pairs "match"
// i.e. they do not differ in the alignment of non-termimals
bool PhraseAlignment::match( const PhraseAlignment& other )
{
if (this == &other) return true;
if(other.GetRuleId() != this->GetRuleId()) return false;
return true;
}
示例9: main
int main(int argc, char* argv[])
{
cerr << "Score v2.0 written by Philipp Koehn\n"
<< "scoring methods for extracted rules\n";
if (argc < 4) {
cerr << "syntax: score extract lex phrase-table [--Inverse] [--Hierarchical] [--OnlyDirect] [--LogProb] [--NegLogProb] [--NoLex] [--GoodTuring] [--WordAlignment file]\n";
exit(1);
}
char* fileNameExtract = argv[1];
char* fileNameLex = argv[2];
char* fileNamePhraseTable = argv[3];
for(int i=4;i<argc;i++) {
if (strcmp(argv[i],"inverse") == 0 || strcmp(argv[i],"--Inverse") == 0) {
inverseFlag = true;
cerr << "using inverse mode\n";
}
else if (strcmp(argv[i],"--Hierarchical") == 0) {
hierarchicalFlag = true;
cerr << "processing hierarchical rules\n";
}
else if (strcmp(argv[i],"--OnlyDirect") == 0) {
onlyDirectFlag = true;
cerr << "outputing in correct phrase table format (no merging with inverse)\n";
}
else if (strcmp(argv[i],"--WordAlignment") == 0) {
wordAlignmentFlag = true;
cerr << "outputing word alignment" << endl;
}
else if (strcmp(argv[i],"--NoLex") == 0) {
lexFlag = false;
cerr << "not computing lexical translation score\n";
}
else if (strcmp(argv[i],"--GoodTuring") == 0) {
goodTuringFlag = true;
cerr << "using Good Turing discounting\n";
}
else if (strcmp(argv[i],"--LogProb") == 0) {
logProbFlag = true;
cerr << "using log-probabilities\n";
}
else if (strcmp(argv[i],"--NegLogProb") == 0) {
logProbFlag = true;
negLogProb = -1;
cerr << "using negative log-probabilities\n";
}
else {
cerr << "ERROR: unknown option " << argv[i] << endl;
exit(1);
}
}
// lexical translation table
if (lexFlag)
lexTable.load( fileNameLex );
// compute count of counts for Good Turing discounting
if (goodTuringFlag)
computeCountOfCounts( fileNameExtract );
// sorted phrase extraction file
ifstream extractFile;
extractFile.open(fileNameExtract);
if (extractFile.fail()) {
cerr << "ERROR: could not open extract file " << fileNameExtract << endl;
exit(1);
}
istream &extractFileP = extractFile;
// output file: phrase translation table
phraseTableFile.open(fileNamePhraseTable);
if (phraseTableFile.fail())
{
cerr << "ERROR: could not open file phrase table file "
<< fileNamePhraseTable << endl;
exit(1);
}
// loop through all extracted phrase translations
int lastSource = -1;
vector< PhraseAlignment > phrasePairsWithSameF;
int i=0;
char line[LINE_MAX_LENGTH],lastLine[LINE_MAX_LENGTH];
lastLine[0] = '\0';
PhraseAlignment *lastPhrasePair = NULL;
while(true) {
if (extractFileP.eof()) break;
if (++i % 100000 == 0) cerr << "." << flush;
SAFE_GETLINE((extractFileP), line, LINE_MAX_LENGTH, '\n', __FILE__);
if (extractFileP.eof()) break;
// identical to last line? just add count
if (lastSource > 0 && strcmp(line,lastLine) == 0)
{
lastPhrasePair->addToCount( line );
continue;
}
strcpy( lastLine, line );
//.........这里部分代码省略.........
示例10: computeCountOfCounts
void computeCountOfCounts( char* fileNameExtract )
{
cerr << "computing counts of counts";
for(int i=1;i<=GT_MAX;i++) countOfCounts[i] = 0;
ifstream extractFile;
extractFile.open( fileNameExtract );
if (extractFile.fail()) {
cerr << "ERROR: could not open extract file " << fileNameExtract << endl;
exit(1);
}
istream &extractFileP = extractFile;
// loop through all extracted phrase translations
int i=0;
char line[LINE_MAX_LENGTH],lastLine[LINE_MAX_LENGTH];
lastLine[0] = '\0';
PhraseAlignment *lastPhrasePair = NULL;
while(true) {
if (extractFileP.eof()) break;
if (++i % 100000 == 0) cerr << "." << flush;
SAFE_GETLINE((extractFileP), line, LINE_MAX_LENGTH, '\n', __FILE__);
if (extractFileP.eof()) break;
// identical to last line? just add count
if (strcmp(line,lastLine) == 0)
{
lastPhrasePair->addToCount( line );
continue;
}
strcpy( lastLine, line );
// create new phrase pair
PhraseAlignment *phrasePair = new PhraseAlignment();
phrasePair->create( line, i );
if (i == 1)
{
lastPhrasePair = phrasePair;
continue;
}
// only differs in count? just add count
if (lastPhrasePair->match( *phrasePair ))
{
lastPhrasePair->count += phrasePair->count;
phrasePair->clear();
delete(phrasePair);
continue;
}
// periodically house cleaning
if (phrasePair->GetSource() != lastPhrasePair->GetSource())
{
phraseTableT.clear(); // these would get too big
phraseTableS.clear(); // these would get too big
// process line again, since phrase tables flushed
phrasePair->clear();
phrasePair->create( line, i );
}
int count = lastPhrasePair->count + 0.99999;
if(count <= GT_MAX)
countOfCounts[ count ]++;
lastPhrasePair->clear();
delete( lastPhrasePair );
lastPhrasePair = phrasePair;
}
delete lastPhrasePair;
discountFactor[0] = 0.01; // floor
cerr << "\n";
for(int i=1;i<GT_MAX;i++)
{
discountFactor[i] = ((float)i+1)/(float)i*(((float)countOfCounts[i+1]+0.1) / ((float)countOfCounts[i]+0.1));
cerr << "count " << i << ": " << countOfCounts[ i ] << ", discount factor: " << discountFactor[i];
// some smoothing...
if (discountFactor[i]>1)
discountFactor[i] = 1;
if (discountFactor[i]<discountFactor[i-1])
discountFactor[i] = discountFactor[i-1];
cerr << " -> " << discountFactor[i]*i << endl;
}
}
示例11: main
//.........这里部分代码省略.........
if (goodTuringFlag || kneserNeyFlag) {
for(int i=1; i<=COC_MAX; i++) countOfCounts[i] = 0;
}
// sorted phrase extraction file
Moses::InputFileStream extractFile(fileNameExtract);
if (extractFile.fail()) {
cerr << "ERROR: could not open extract file " << fileNameExtract << endl;
exit(1);
}
istream &extractFileP = extractFile;
// output file: phrase translation table
ostream *phraseTableFile;
if (fileNamePhraseTable == "-") {
phraseTableFile = &cout;
}
else {
Moses::OutputFileStream *outputFile = new Moses::OutputFileStream();
bool success = outputFile->Open(fileNamePhraseTable);
if (!success) {
cerr << "ERROR: could not open file phrase table file "
<< fileNamePhraseTable << endl;
exit(1);
}
phraseTableFile = outputFile;
}
// loop through all extracted phrase translations
float lastCount = 0.0f;
float lastPcfgSum = 0.0f;
vector< PhraseAlignment > phrasePairsWithSameF;
bool isSingleton = true;
int i=0;
char line[LINE_MAX_LENGTH],lastLine[LINE_MAX_LENGTH];
lastLine[0] = '\0';
PhraseAlignment *lastPhrasePair = NULL;
while(true) {
if (extractFileP.eof()) break;
if (++i % 100000 == 0) cerr << "." << flush;
SAFE_GETLINE((extractFileP), line, LINE_MAX_LENGTH, '\n', __FILE__);
if (extractFileP.eof()) break;
// identical to last line? just add count
if (strcmp(line,lastLine) == 0) {
lastPhrasePair->count += lastCount;
lastPhrasePair->pcfgSum += lastPcfgSum;
continue;
}
strcpy( lastLine, line );
// create new phrase pair
PhraseAlignment phrasePair;
phrasePair.create( line, i, includeSentenceIdFlag );
lastCount = phrasePair.count;
lastPcfgSum = phrasePair.pcfgSum;
// only differs in count? just add count
if (lastPhrasePair != NULL
&& lastPhrasePair->equals( phrasePair )
&& (!domainFlag
|| domain->getDomainOfSentence( lastPhrasePair->sentenceId )
== domain->getDomainOfSentence( phrasePair.sentenceId ) )) {
lastPhrasePair->count += phrasePair.count;
示例12: main
int main(int argc, char* argv[]) {
cerr << "Score v2.5 written by Philipp Koehn" << endl
<< "Modified by Ventsislav Zhechev, Autodesk Development Sàrl" << endl
<< "scoring methods for extracted rules" << endl
;
if (argc < 4) {
cerr << "syntax: score extract lex phrase-table [--Inverse] [--Hierarchical] [--OnlyDirect] [--LogProb] [--NegLogProb] [--NoLex] [--GoodTuring] [--WordAlignment file]\n";
exit(1);
}
char* fileNameExtract = argv[1];
char* fileNameLex = argv[2];
char* fileNamePhraseTable = argv[3];
char* fileNameWordAlignment;
for(int i=4; i<argc; ++i) {
if (strcmp(argv[i],"inverse") == 0 || strcmp(argv[i],"--Inverse") == 0) {
inverseFlag = true;
cerr << "using inverse mode\n";
}
else if (strcmp(argv[i],"--Hierarchical") == 0) {
hierarchicalFlag = true;
cerr << "processing hierarchical rules\n";
}
else if (strcmp(argv[i],"--OnlyDirect") == 0) {
onlyDirectFlag = true;
cerr << "outputing in correct phrase table format (no merging with inverse)\n";
}
else if (strcmp(argv[i],"--WordAlignment") == 0) {
wordAlignmentFlag = true;
fileNameWordAlignment = argv[++i];
cerr << "outputing word alignment in file " << fileNameWordAlignment << endl;
}
else if (strcmp(argv[i],"--NoLex") == 0) {
lexFlag = false;
cerr << "not computing lexical translation score\n";
}
else if (strcmp(argv[i],"--GoodTuring") == 0) {
goodTuringFlag = true;
cerr << "using Good Turing discounting\n";
}
else if (strcmp(argv[i],"--LogProb") == 0) {
logProbFlag = true;
cerr << "using log-probabilities\n";
}
else if (strcmp(argv[i],"--NegLogProb") == 0) {
logProbFlag = true;
negLogProb = -1;
cerr << "using negative log-probabilities\n";
}
else {
cerr << "ERROR: unknown option " << argv[i] << endl;
exit(1);
}
}
// lexical translation table
if (lexFlag)
lexTable.load(fileNameLex);
// compute count of counts for Good Turing discounting
if (goodTuringFlag)
computeCountOfCounts(fileNameExtract);
// sorted phrase extraction file
Bz2LineReader extractFile(fileNameExtract);
// output file: phrase translation table
Bz2LineWriter phraseTableFile(fileNamePhraseTable);
// output word alignment file
if (!inverseFlag && wordAlignmentFlag) {
wordAlignmentFile.open(fileNameWordAlignment);
if (wordAlignmentFile.fail()) {
cerr << "ERROR: could not open word alignment file "
<< fileNameWordAlignment << endl;
exit(1);
}
}
// loop through all extracted phrase translations
int lastSource = -1;
vector< PhraseAlignment > phrasePairsWithSameF;
int i=0;
string lastLine = "";
PhraseAlignment *lastPhrasePair = NULL;
for (string line = extractFile.readLine(); !line.empty(); line = extractFile.readLine()) {
if (line.empty()) break;
if ((++i)%10000000 == 0) cerr << "[p. score:" << i << "]" << flush;
else if (i % 100000 == 0) cerr << "." << flush;
// identical to last line? just add count
if (lastSource >= 0 && line == lastLine) {
lastPhrasePair->addToCount(line);
continue;
}
lastLine = line;
// create new phrase pair
PhraseAlignment phrasePair;
//.........这里部分代码省略.........
示例13: computeCountOfCounts
void computeCountOfCounts(const string& fileNameExtract) {
if (fileNameExtract == "-") {
cerr << "The ‘GoodTuring Discounting’ option may not be used with piped input!" << endl;
exit(9);
}
cerr << "computing counts of counts";
for (size_t i=1; i<=GT_MAX; countOfCounts[i++] = 0);
Bz2LineReader extractFile(fileNameExtract);
// loop through all extracted phrase translations
int i=0;
string lastLine;
PhraseAlignment *lastPhrasePair = NULL;
for (string line = extractFile.readLine(); !line.empty(); line = extractFile.readLine()) {
if (line.empty()) break;
if ((++i)%10000000 == 0) cerr << "[" << i << "]" << endl;
else if (i % 100000 == 0) cerr << "," << flush;
// identical to last line? just add count
if (line == lastLine) {
lastPhrasePair->addToCount(line);
continue;
}
lastLine = line;
// create new phrase pair
PhraseAlignment *phrasePair = new PhraseAlignment();
vector<string> lineVector = tokenize(line.c_str());
phrasePair->create(lineVector, i);
if (i == 1) {
lastPhrasePair = phrasePair;
continue;
}
// only differs in count? just add count
if (lastPhrasePair->match( *phrasePair )) {
lastPhrasePair->count += phrasePair->count;
phrasePair->clear();
delete(phrasePair);
continue;
}
// periodically house cleaning
if (phrasePair->source != lastPhrasePair->source) {
phraseTableT.clear(); // these would get too big
phraseTableS.clear(); // these would get too big
// process line again, since phrase tables flushed
phrasePair->clear();
phrasePair->create(lineVector, i);
}
int count = lastPhrasePair->count + 0.99999;
if(count <= GT_MAX)
++countOfCounts[ count ];
lastPhrasePair->clear();
delete( lastPhrasePair );
lastPhrasePair = phrasePair;
}
discountFactor[0] = 0.01; // floor
cerr << "\n";
for(int i=1;i<GT_MAX; ++i) {
discountFactor[i] = ((float)i+1)/(float)i*(((float)countOfCounts[i+1]+0.1) / ((float)countOfCounts[i]+0.1));
cerr << "count " << i << ": " << countOfCounts[ i ] << ", discount factor: " << discountFactor[i];
// some smoothing...
if (discountFactor[i]>1)
discountFactor[i] = 1;
if (discountFactor[i]<discountFactor[i-1])
discountFactor[i] = discountFactor[i-1];
cerr << " -> " << discountFactor[i]*i << endl;
}
}