本文整理汇总了C++中Vocabulary::getWord方法的典型用法代码示例。如果您正苦于以下问题:C++ Vocabulary::getWord方法的具体用法?C++ Vocabulary::getWord怎么用?C++ Vocabulary::getWord使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Vocabulary
的用法示例。
在下文中一共展示了Vocabulary::getWord方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: MatchesAlignment
// Check for equal non-terminal alignment in case of SCFG rules.
// Precondition: otherTargetToSourceAlignment has the same size as m_targetToSourceAlignments.begin()->first
bool ExtractionPhrasePair::MatchesAlignment( ALIGNMENT *otherTargetToSourceAlignment ) const
{
if (!hierarchicalFlag) return true;
// all or none of the phrasePair's word alignment matrices match, so just pick one
const ALIGNMENT *thisTargetToSourceAlignment = m_targetToSourceAlignments.begin()->first;
assert(m_phraseTarget->size() == thisTargetToSourceAlignment->size() + 1);
assert(thisTargetToSourceAlignment->size() == otherTargetToSourceAlignment->size());
// loop over all symbols but the left hand side of the rule
for (size_t i=0; i<thisTargetToSourceAlignment->size()-1; ++i) {
if (isNonTerminal( vcbT.getWord( m_phraseTarget->at(i) ) )) {
size_t thisAlign = *(thisTargetToSourceAlignment->at(i).begin());
size_t otherAlign = *(otherTargetToSourceAlignment->at(i).begin());
if (thisTargetToSourceAlignment->at(i).size() != 1 ||
otherTargetToSourceAlignment->at(i).size() != 1 ||
thisAlign != otherAlign) {
return false;
}
}
}
return true;
}
示例2: computeUnalignedFWPenalty
double computeUnalignedFWPenalty( const PHRASE &phraseS, const PHRASE &phraseT, PhraseAlignment *alignment )
{
// unaligned word counter
double unaligned = 1.0;
// only checking target words - source words are caught when computing inverse
for(int ti=0; ti<alignment->alignedToT.size(); ti++) {
const set< size_t > & srcIndices = alignment->alignedToT[ ti ];
if (srcIndices.empty() && functionWordList.find( vcbT.getWord( phraseT[ ti ] ) ) != functionWordList.end()) {
unaligned *= 2.718;
}
}
return unaligned;
}
示例3: MapBackToStr
inline void MapBackToStr(const vector<WORD_ID>& wid, vector<WORD>& tok, Vocabulary& vocab, vector<size_t>& NT_index){
tok.resize(wid.size());
NT_index.clear();
for(int i = 0; i< wid.size(); i++){
if(!ShouldIgnore(wid[i],vocab)){
tok[i] = vocab.getWord(wid[i]);
}
if(IsNT(wid[i],vocab)){
NT_index.push_back(i);
}
}
}
示例4: printTargetPhrase
void printTargetPhrase(const PHRASE &phraseS, const PHRASE &phraseT,
const PhraseAlignment &bestAlignment, ostream &out)
{
// output target symbols, except root, in rule table format
for (int i = 0; i < phraseT.size()-1; ++i) {
const std::string &word = vcbT.getWord(phraseT[i]);
if (!stringToTreeFlag || !isNonTerminal(word)) {
out << word << " ";
continue;
}
// get corresponding source non-terminal and output pair
std::set<size_t> alignmentPoints = bestAlignment.alignedToT[i];
assert(alignmentPoints.size() == 1);
int j = *(alignmentPoints.begin());
if (inverseFlag) {
out << word << vcbS.getWord(phraseS[j]) << " ";
} else {
out << vcbS.getWord(phraseS[j]) << word << " ";
}
}
// output target root symbol
out << vcbT.getWord(phraseT.back());
}
示例5: match
// check if two word alignments between a phrase pairs "match"
// i.e. they do not differ in the alignment of non-termimals
bool PhraseAlignment::match( const PhraseAlignment& other )
{
if (other.target != target || other.source != source) return false;
if (!hierarchicalFlag) return true;
PHRASE phraseT = phraseTableT.getPhrase( target );
assert(phraseT.size() == alignedToT.size() + 1);
assert(alignedToT.size() == other.alignedToT.size());
// loop over all words (note: 0 = left hand side of rule)
for(size_t i=0;i<phraseT.size()-1;++i)
if (isNonTerminal( vcbT.getWord( phraseT[i] ) )) {
if (alignedToT[i].size() != 1 ||
other.alignedToT[i].size() != 1 ||
*(alignedToT[i].begin()) != *(other.alignedToT[i].begin()))
return false;
}
return true;
}
示例6: processPhrasePairs
void processPhrasePairs( vector< PhraseAlignment > &phrasePair )
{
if (phrasePair.size() == 0) return;
map<int, int> countE;
map<int, int> alignmentE;
int totalCount = 0;
int currentCount = 0;
int maxSameCount = 0;
int maxSame = -1;
int old = -1;
for(size_t i=0; i<phrasePair.size(); i++) {
if (i>0) {
if (phrasePair[old].english == phrasePair[i].english) {
if (! phrasePair[i].equals( phrasePair[old] )) {
if (currentCount > maxSameCount) {
maxSameCount = currentCount;
maxSame = i-1;
}
currentCount = 0;
}
} else {
// wrap up old E
if (currentCount > maxSameCount) {
maxSameCount = currentCount;
maxSame = i-1;
}
alignmentE[ phrasePair[old].english ] = maxSame;
// if (maxSameCount != totalCount)
// cout << "max count is " << maxSameCount << "/" << totalCount << endl;
// get ready for new E
totalCount = 0;
currentCount = 0;
maxSameCount = 0;
maxSame = -1;
}
}
countE[ phrasePair[i].english ]++;
old = i;
currentCount++;
totalCount++;
}
// wrap up old E
if (currentCount > maxSameCount) {
maxSameCount = currentCount;
maxSame = phrasePair.size()-1;
}
alignmentE[ phrasePair[old].english ] = maxSame;
// if (maxSameCount != totalCount)
// cout << "max count is " << maxSameCount << "/" << totalCount << endl;
// output table
typedef map< int, int >::iterator II;
PHRASE phraseF = phraseTableF.getPhrase( phrasePair[0].foreign );
size_t index = 0;
for(II i = countE.begin(); i != countE.end(); i++) {
//cout << "\tp( " << i->first << " | " << phrasePair[0].foreign << " ; " << phraseF.size() << " ) = ...\n";
//cerr << index << endl;
// foreign phrase (unless inverse)
if (! inverseFlag) {
for(size_t j=0; j<phraseF.size(); j++) {
phraseTableFile << vcbF.getWord( phraseF[j] );
phraseTableFile << " ";
}
phraseTableFile << "||| ";
}
// english phrase
PHRASE phraseE = phraseTableE.getPhrase( i->first );
for(size_t j=0; j<phraseE.size(); j++) {
phraseTableFile << vcbE.getWord( phraseE[j] );
phraseTableFile << " ";
}
phraseTableFile << "||| ";
// foreign phrase (if inverse)
if (inverseFlag) {
for(size_t j=0; j<phraseF.size(); j++) {
phraseTableFile << vcbF.getWord( phraseF[j] );
phraseTableFile << " ";
}
phraseTableFile << "||| ";
}
// phrase pair frequency
phraseTableFile << i->second;
//source phrase pair frequency
phraseTableFile << " " << phrasePair.size();
// source phrase length
phraseTableFile << " " << phraseF.size();
// target phrase length
phraseTableFile << " " << phraseE.size();
phraseTableFile << endl;
//.........这里部分代码省略.........
示例7: outputPhrasePair
void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float totalCount, int distinctCount, ostream &phraseTableFile )
{
if (phrasePair.size() == 0) return;
PhraseAlignment *bestAlignment = findBestAlignment( phrasePair );
// compute count
float count = 0;
for(size_t i=0; i<phrasePair.size(); i++) {
count += phrasePair[i]->count;
}
// collect count of count statistics
if (goodTuringFlag || kneserNeyFlag) {
totalDistinct++;
int countInt = count + 0.99999;
if(countInt <= COC_MAX)
countOfCounts[ countInt ]++;
}
// compute PCFG score
float pcfgScore;
if (pcfgFlag && !inverseFlag) {
float pcfgSum = 0;
for(size_t i=0; i<phrasePair.size(); ++i) {
pcfgSum += phrasePair[i]->pcfgSum;
}
pcfgScore = pcfgSum / count;
}
// output phrases
const PHRASE &phraseS = phrasePair[0]->GetSource();
const PHRASE &phraseT = phrasePair[0]->GetTarget();
// do not output if hierarchical and count below threshold
if (hierarchicalFlag && count < minCountHierarchical) {
for(int j=0; j<phraseS.size()-1; j++) {
if (isNonTerminal(vcbS.getWord( phraseS[j] )))
return;
}
}
// source phrase (unless inverse)
if (! inverseFlag) {
printSourcePhrase(phraseS, phraseT, *bestAlignment, phraseTableFile);
phraseTableFile << " ||| ";
}
// target phrase
printTargetPhrase(phraseS, phraseT, *bestAlignment, phraseTableFile);
phraseTableFile << " ||| ";
// source phrase (if inverse)
if (inverseFlag) {
printSourcePhrase(phraseS, phraseT, *bestAlignment, phraseTableFile);
phraseTableFile << " ||| ";
}
// lexical translation probability
if (lexFlag) {
double lexScore = computeLexicalTranslation( phraseS, phraseT, bestAlignment);
phraseTableFile << ( logProbFlag ? negLogProb*log(lexScore) : lexScore );
}
// unaligned word penalty
if (unalignedFlag) {
double penalty = computeUnalignedPenalty( phraseS, phraseT, bestAlignment);
phraseTableFile << " " << ( logProbFlag ? negLogProb*log(penalty) : penalty );
}
// unaligned function word penalty
if (unalignedFWFlag) {
double penalty = computeUnalignedFWPenalty( phraseS, phraseT, bestAlignment);
phraseTableFile << " " << ( logProbFlag ? negLogProb*log(penalty) : penalty );
}
if (pcfgFlag && !inverseFlag) {
// target-side PCFG score
phraseTableFile << " " << pcfgScore;
}
phraseTableFile << " ||| ";
// alignment info for non-terminals
if (! inverseFlag) {
if (hierarchicalFlag) {
// always output alignment if hiero style, but only for non-terms
assert(phraseT.size() == bestAlignment->alignedToT.size() + 1);
for(int j = 0; j < phraseT.size() - 1; j++) {
if (isNonTerminal(vcbT.getWord( phraseT[j] ))) {
if (bestAlignment->alignedToT[ j ].size() != 1) {
cerr << "Error: unequal numbers of non-terminals. Make sure the text does not contain words in square brackets (like [xxx])." << endl;
phraseTableFile.flush();
assert(bestAlignment->alignedToT[ j ].size() == 1);
}
int sourcePos = *(bestAlignment->alignedToT[ j ].begin());
phraseTableFile << sourcePos << "-" << j << " ";
}
}
} else if (wordAlignmentFlag) {
//.........这里部分代码省略.........
示例8: outputPhrasePair
void outputPhrasePair( vector< PhraseAlignment* > &phrasePair, float totalCount )
{
if (phrasePair.size() == 0) return;
PhraseAlignment *bestAlignment = findBestAlignment( phrasePair );
// compute count
float count = 0;
for(size_t i=0;i<phrasePair.size();i++)
{
count += phrasePair[i]->count;
}
PHRASE phraseS = phraseTableS.getPhrase( phrasePair[0]->GetSource() );
PHRASE phraseT = phraseTableT.getPhrase( phrasePair[0]->GetTarget() );
// labels (if hierarchical)
// source phrase (unless inverse)
if (! inverseFlag)
{
for(int j=0;j<phraseS.size();j++)
{
phraseTableFile << vcbS.getWord( phraseS[j] );
phraseTableFile << " ";
}
phraseTableFile << "||| ";
}
// target phrase
for(int j=0;j<phraseT.size();j++)
{
phraseTableFile << vcbT.getWord( phraseT[j] );
phraseTableFile << " ";
}
phraseTableFile << "||| ";
// source phrase (if inverse)
if (inverseFlag)
{
for(int j=0;j<phraseS.size();j++)
{
phraseTableFile << vcbS.getWord( phraseS[j] );
phraseTableFile << " ";
}
phraseTableFile << "||| ";
}
// phrase translation probability
if (goodTuringFlag && count<GT_MAX)
count *= discountFactor[(int)(count+0.99999)];
double condScore = count / totalCount;
phraseTableFile << ( logProbFlag ? negLogProb*log(condScore) : condScore );
// lexical translation probability
if (lexFlag)
{
double lexScore = computeLexicalTranslation( phraseS, phraseT, bestAlignment);
phraseTableFile << " " << ( logProbFlag ? negLogProb*log(lexScore) : lexScore );
}
phraseTableFile << " ||| ";
// alignment info for non-terminals
if (! inverseFlag)
{
if (hierarchicalFlag)
{ // always output alignment if hiero style, but only for non-terms
assert(phraseT.size() == bestAlignment->alignedToT.size() + 1);
for(int j = 0; j < phraseT.size() - 1; j++)
{
if (isNonTerminal(vcbT.getWord( phraseT[j] )))
{
assert(bestAlignment->alignedToT[ j ].size() == 1);
int sourcePos = *(bestAlignment->alignedToT[ j ].begin());
phraseTableFile << sourcePos << "-" << j << " ";
}
}
}
else if (wordAlignmentFlag)
{ // alignment info in pb model
for(int j=0;j<bestAlignment->alignedToT.size();j++)
{
const set< size_t > &aligned = bestAlignment->alignedToT[j];
for (set< size_t >::const_iterator p(aligned.begin()); p != aligned.end(); ++p)
{
phraseTableFile << *p << "-" << j << " ";
}
}
}
}
phraseTableFile << " ||| " << totalCount;
phraseTableFile << endl;
}
示例9: outputPhrasePair
void outputPhrasePair(vector<PhraseAlignment*> &phrasePair, float totalCount, Bz2LineWriter& phraseTableFile) {
if (phrasePair.size() == 0)
return;
PhraseAlignment *bestAlignment = findBestAlignment( phrasePair );
// compute count
float count = 0.;
for(size_t i=0; i<phrasePair.size(); count += phrasePair[i++]->count);
PHRASE phraseS = phraseTableS.getPhrase( phrasePair[0]->source );
PHRASE phraseT = phraseTableT.getPhrase( phrasePair[0]->target );
// labels (if hierarchical)
// source phrase (unless inverse)
if (!inverseFlag) {
for (size_t j=0; j<phraseS.size(); phraseTableFile.writeLine(vcbS.getWord(phraseS[j++]) + " "));
phraseTableFile.writeLine("||| ");
}
// target phrase
for (size_t j=0; j<phraseT.size(); phraseTableFile.writeLine(vcbT.getWord(phraseT[j++]) + " "));
phraseTableFile.writeLine("||| ");
// source phrase (if inverse)
if (inverseFlag) {
for (size_t j=0; j<phraseS.size(); phraseTableFile.writeLine(vcbS.getWord(phraseS[j++]) + " "));
phraseTableFile.writeLine("||| ");
}
// alignment info for non-terminals
if (!inverseFlag && hierarchicalFlag) {
assert(phraseT.size() == bestAlignment->alignedToT.size() + 1);
for(size_t j = 0; j < phraseT.size() - 1; ++j)
if (isNonTerminal(vcbT.getWord( phraseT[j] ))) {
assert(bestAlignment->alignedToT[ j ].size() == 1);
stringstream data;
data << *(bestAlignment->alignedToT[j].begin()) << "-" << j << " ";
phraseTableFile.writeLine(data.str());
}
phraseTableFile.writeLine("||| ");
}
// phrase translation probability
if (goodTuringFlag && count<GT_MAX)
count *= discountFactor[(int)(count+0.99999)];
{
stringstream data;
data << (logProbFlag ? negLogProb*log(count / totalCount) : count / totalCount);
phraseTableFile.writeLine(data.str());
}
// lexical translation probability
if (lexFlag) {
stringstream data;
data << " " << (logProbFlag ?
negLogProb*log(computeLexicalTranslation(phraseS, phraseT, bestAlignment)) :
computeLexicalTranslation(phraseS, phraseT, bestAlignment));
phraseTableFile.writeLine(data.str());
}
{
stringstream data;
data << " ||| " << totalCount << endl;
phraseTableFile.writeLine(data.str());
}
// optional output of word alignments
if (!inverseFlag && wordAlignmentFlag) {
// source phrase
for(size_t j=0;j<phraseS.size(); wordAlignmentFile << vcbS.getWord(phraseS[j++]) << " ");
wordAlignmentFile << "||| ";
// target phrase
for(size_t j=0;j<phraseT.size(); wordAlignmentFile << vcbT.getWord(phraseT[j++]) << " ");
wordAlignmentFile << "|||";
// alignment
for(size_t j=0;j<bestAlignment->alignedToT.size(); ++j) {
const set< size_t > &aligned = bestAlignment->alignedToT[j];
for (set< size_t >::const_iterator p(aligned.begin()); p != aligned.end(); wordAlignmentFile << " " << *(p++) << "-" << j);
}
wordAlignmentFile << endl;
}
}