本文整理汇总了C++中TargetPhrase::Evaluate方法的典型用法代码示例。如果您正苦于以下问题:C++ TargetPhrase::Evaluate方法的具体用法?C++ TargetPhrase::Evaluate怎么用?C++ TargetPhrase::Evaluate使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类TargetPhrase
的用法示例。
在下文中一共展示了TargetPhrase::Evaluate方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: outStream
std::vector<TargetPhrase*> PhraseDictionaryTransliteration::CreateTargetPhrases(const Phrase &sourcePhrase, const string &outDir) const
{
std::vector<TargetPhrase*> ret;
string outPath = outDir + "/out.txt";
ifstream outStream(outPath.c_str());
string line;
while (getline(outStream, line)) {
vector<string> toks;
Tokenize(toks, line, "\t");
UTIL_THROW_IF2(toks.size() != 2, "Error in transliteration output file. Expecting word\tscore");
TargetPhrase *tp = new TargetPhrase();
Word &word = tp->AddWord();
word.CreateFromString(Output, m_output, toks[0], false);
float score = Scan<float>(toks[1]);
tp->GetScoreBreakdown().PlusEquals(this, score);
// score of all other ff when this rule is being loaded
tp->Evaluate(sourcePhrase, GetFeaturesToApply());
ret.push_back(tp);
}
outStream.close();
return ret;
}
示例2: phrase
const TargetPhraseCollection*
PhraseDictionaryDynSuffixArray::
GetTargetPhraseCollectionLEGACY(const Phrase& src) const
{
typedef map<SAPhrase, vector<float> >::value_type pstat_entry;
map<SAPhrase, vector<float> > pstats; // phrase (pair) statistics
m_biSA->GatherCands(src,pstats);
TargetPhraseCollection *ret = new TargetPhraseCollection();
BOOST_FOREACH(pstat_entry & e, pstats) {
TargetPhrase* tp = m_biSA->GetMosesFactorIDs(e.first, src);
tp->GetScoreBreakdown().Assign(this,e.second);
tp->Evaluate(src);
ret->Add(tp);
}
示例3: CHECK
TargetPhrase *SkeletonPT::CreateTargetPhrase(const Phrase &sourcePhrase) const
{
// create a target phrase from the 1st word of the source, prefix with 'SkeletonPT:'
CHECK(sourcePhrase.GetSize());
CHECK(m_output.size() == 1);
string str = sourcePhrase.GetWord(0).GetFactor(0)->GetString().as_string();
str = "SkeletonPT:" + str;
TargetPhrase *tp = new TargetPhrase();
Word &word = tp->AddWord();
word.CreateFromString(Output, m_output, str, false);
// score for this phrase table
vector<float> scores(m_numScoreComponents, 1.3);
tp->GetScoreBreakdown().PlusEquals(this, scores);
// score of all other ff when this rule is being loaded
tp->Evaluate(sourcePhrase, GetFeaturesToApply());
return tp;
}
示例4: Load
//.........这里部分代码省略.........
vector<float> scoreVector;
StringPiece line;
std::string hiero_before, hiero_after;
double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan");
while(true) {
try {
line = in.ReadLine();
} catch (const util::EndOfFileException &e) {
break;
}
if (format == HieroFormat) { // inefficiently reformat line
hiero_before.assign(line.data(), line.size());
ReformatHieroRule(hiero_before, hiero_after);
line = hiero_after;
}
util::TokenIter<util::MultiCharacter> pipes(line, "|||");
StringPiece sourcePhraseString(*pipes);
StringPiece targetPhraseString(*++pipes);
StringPiece scoreString(*++pipes);
StringPiece alignString;
if (++pipes) {
StringPiece temp(*pipes);
alignString = temp;
}
if (++pipes) {
StringPiece str(*pipes); //counts
}
bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos);
if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
TRACE_ERR( ruleTable.GetFilePath() << ":" << count << ": pt entry contains empty target, skipping\n");
continue;
}
scoreVector.clear();
for (util::TokenIter<util::AnyCharacter, true> s(scoreString, " \t"); s; ++s) {
int processed;
float score = converter.StringToFloat(s->data(), s->length(), &processed);
UTIL_THROW_IF(isnan(score), util::Exception, "Bad score " << *s << " on line " << count);
scoreVector.push_back(FloorScore(TransformScore(score)));
}
const size_t numScoreComponents = ruleTable.GetNumScoreComponents();
if (scoreVector.size() != numScoreComponents) {
stringstream strme;
strme << "Size of scoreVector != number (" << scoreVector.size() << "!="
<< numScoreComponents << ") of score components on line " << count;
UserMessage::Add(strme.str());
abort();
}
// parse source & find pt node
// constituent labels
Word *sourceLHS;
Word *targetLHS;
// create target phrase obj
TargetPhrase *targetPhrase = new TargetPhrase();
targetPhrase->CreateFromString(Output, output, targetPhraseString, factorDelimiter, &targetLHS);
// source
Phrase sourcePhrase;
sourcePhrase.CreateFromString(Input, input, sourcePhraseString, factorDelimiter, &sourceLHS);
// rest of target phrase
targetPhrase->SetAlignmentInfo(alignString);
targetPhrase->SetTargetLHS(targetLHS);
//targetPhrase->SetDebugOutput(string("New Format pt ") + line);
if (++pipes) {
StringPiece sparseString(*pipes);
targetPhrase->SetSparseScore(&ruleTable, sparseString);
}
if (++pipes) {
StringPiece propertiesString(*pipes);
targetPhrase->SetProperties(propertiesString);
}
targetPhrase->GetScoreBreakdown().Assign(&ruleTable, scoreVector);
targetPhrase->Evaluate(sourcePhrase, ruleTable.GetFeaturesToApply());
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(ruleTable, sourcePhrase, *targetPhrase, sourceLHS);
phraseColl.Add(targetPhrase);
count++;
}
// sort and prune each target phrase collection
SortAndPrune(ruleTable);
return true;
}
示例5: InitializeForInput
//.........这里部分代码省略.........
// copied from class LoaderStandard
PrintUserTime("Start loading fuzzy-match phrase model");
const StaticData &staticData = StaticData::Instance();
const std::string& factorDelimiter = staticData.GetFactorDelimiter();
string lineOrig;
size_t count = 0;
while(getline(inStream, lineOrig)) {
const string *line;
if (format == HieroFormat) { // reformat line
UTIL_THROW(util::Exception, "Cannot be Hiero format");
//line = ReformatHieroRule(lineOrig);
} else {
// do nothing to format of line
line = &lineOrig;
}
vector<string> tokens;
vector<float> scoreVector;
TokenizeMultiCharSeparator(tokens, *line , "|||" );
if (tokens.size() != 4 && tokens.size() != 5) {
stringstream strme;
strme << "Syntax error at " << ptFileName << ":" << count;
UserMessage::Add(strme.str());
abort();
}
const string &sourcePhraseString = tokens[0]
, &targetPhraseString = tokens[1]
, &scoreString = tokens[2]
, &alignString = tokens[3];
bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos);
if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
TRACE_ERR( ptFileName << ":" << count << ": pt entry contains empty target, skipping\n");
continue;
}
Tokenize<float>(scoreVector, scoreString);
const size_t numScoreComponents = GetNumScoreComponents();
if (scoreVector.size() != numScoreComponents) {
stringstream strme;
strme << "Size of scoreVector != number (" << scoreVector.size() << "!="
<< numScoreComponents << ") of score components on line " << count;
UserMessage::Add(strme.str());
abort();
}
UTIL_THROW_IF2(scoreVector.size() != numScoreComponents,
"Number of scores incorrectly specified");
// parse source & find pt node
// constituent labels
Word *sourceLHS;
Word *targetLHS;
// source
Phrase sourcePhrase( 0);
sourcePhrase.CreateFromString(Input, m_input, sourcePhraseString, factorDelimiter, &sourceLHS);
// create target phrase obj
TargetPhrase *targetPhrase = new TargetPhrase();
targetPhrase->CreateFromString(Output, m_output, targetPhraseString, factorDelimiter, &targetLHS);
// rest of target phrase
targetPhrase->SetAlignmentInfo(alignString);
targetPhrase->SetTargetLHS(targetLHS);
//targetPhrase->SetDebugOutput(string("New Format pt ") + line);
// component score, for n-best output
std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),TransformScore);
std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),FloorScore);
targetPhrase->GetScoreBreakdown().Assign(this, scoreVector);
targetPhrase->Evaluate(sourcePhrase, GetFeaturesToApply());
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(rootNode, sourcePhrase, *targetPhrase, sourceLHS);
phraseColl.Add(targetPhrase);
count++;
if (format == HieroFormat) { // reformat line
delete line;
} else {
// do nothing
}
}
// sort and prune each target phrase collection
SortAndPrune(rootNode);
//removedirectoryrecursively(dirName);
}
示例6: ProcessAndStripXMLTags
//.........这里部分代码省略.........
else endPos = atoi(ij[1].c_str()) + 1;
}
VERBOSE(3,"XML TAG " << tagName << " (" << tagContent << ") spanning " << startPos << " to " << (endPos-1) << " complete, commence processing" << endl);
// special tag: wall
if (tagName == "wall") {
size_t start = (startPos == 0) ? 0 : startPos-1;
for(size_t pos = start; pos < endPos; pos++)
walls.push_back( pos );
}
// special tag: zone
else if (tagName == "zone") {
if (startPos >= endPos) {
TRACE_ERR("ERROR: zone must span at least one word: " << line << endl);
return false;
}
reorderingConstraint.SetZone( startPos, endPos-1 );
}
// default: opening tag that specifies translation options
else {
if (startPos >= endPos) {
TRACE_ERR("ERROR: tag " << tagName << " must span at least one word: " << line << endl);
return false;
}
// specified translations -> vector of phrases
// multiple translations may be specified, separated by "||"
vector<string> altTexts = TokenizeMultiCharSeparator(ParseXmlTagAttribute(tagContent,"translation"), "||");
if( altTexts.size() == 1 && altTexts[0] == "" )
altTexts.pop_back(); // happens when nothing specified
// deal with legacy annotations: "translation" was called "english"
vector<string> moreAltTexts = TokenizeMultiCharSeparator(ParseXmlTagAttribute(tagContent,"english"), "||");
if (moreAltTexts.size()>1 || moreAltTexts[0] != "") {
for(vector<string>::iterator translation=moreAltTexts.begin();
translation != moreAltTexts.end();
translation++) {
string t = *translation;
altTexts.push_back( t );
}
}
// specified probabilities for the translations -> vector of probs
vector<string> altProbs = TokenizeMultiCharSeparator(ParseXmlTagAttribute(tagContent,"prob"), "||");
if( altProbs.size() == 1 && altProbs[0] == "" )
altProbs.pop_back(); // happens when nothing specified
// report what we have processed so far
VERBOSE(3,"XML TAG NAME IS: '" << tagName << "'" << endl);
VERBOSE(3,"XML TAG TRANSLATION IS: '" << altTexts[0] << "'" << endl);
VERBOSE(3,"XML TAG PROB IS: '" << altProbs[0] << "'" << endl);
VERBOSE(3,"XML TAG SPAN IS: " << startPos << "-" << (endPos-1) << endl);
if (altProbs.size() > 0 && altTexts.size() != altProbs.size()) {
TRACE_ERR("ERROR: Unequal number of probabilities and translation alternatives: " << line << endl);
return false;
}
// store translation options into members
if (StaticData::Instance().GetXmlInputType() != XmlIgnore) {
// only store options if we aren't ignoring them
for (size_t i=0; i<altTexts.size(); ++i) {
Phrase sourcePhrase; // TODO don't know what the source phrase is
// set default probability
float probValue = 1;
if (altProbs.size() > 0) probValue = Scan<float>(altProbs[i]);
// convert from prob to log-prob
float scoreValue = FloorScore(TransformScore(probValue));
WordsRange range(startPos,endPos-1); // span covered by phrase
TargetPhrase targetPhrase;
targetPhrase.CreateFromString(Output, outputFactorOrder,altTexts[i],factorDelimiter, NULL);
targetPhrase.SetXMLScore(scoreValue);
targetPhrase.Evaluate(sourcePhrase);
XmlOption *option = new XmlOption(range,targetPhrase);
CHECK(option);
res.push_back(option);
}
altTexts.clear();
altProbs.clear();
}
}
}
}
}
// we are done. check if there are tags that are still open
if (tagStack.size() > 0) {
TRACE_ERR("ERROR: some opened tags were never closed: " << line << endl);
return false;
}
// return de-xml'ed sentence in line
line = cleanLine;
return true;
}
示例7: Process
void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range, ChartParserCallback &to)
{
// unknown word, add as trans opt
const StaticData &staticData = StaticData::Instance();
const UnknownWordPenaltyProducer &unknownWordPenaltyProducer = UnknownWordPenaltyProducer::Instance();
size_t isDigit = 0;
if (staticData.GetDropUnknown()) {
const Factor *f = sourceWord[0]; // TODO hack. shouldn't know which factor is surface
const StringPiece s = f->GetString();
isDigit = s.find_first_of("0123456789");
if (isDigit == string::npos)
isDigit = 0;
else
isDigit = 1;
// modify the starting bitmap
}
Phrase* unksrc = new Phrase(1);
unksrc->AddWord() = sourceWord;
Word &newWord = unksrc->GetWord(0);
newWord.SetIsOOV(true);
m_unksrcs.push_back(unksrc);
//TranslationOption *transOpt;
if (! staticData.GetDropUnknown() || isDigit) {
// loop
const UnknownLHSList &lhsList = staticData.GetUnknownLHS();
UnknownLHSList::const_iterator iterLHS;
for (iterLHS = lhsList.begin(); iterLHS != lhsList.end(); ++iterLHS) {
const string &targetLHSStr = iterLHS->first;
float prob = iterLHS->second;
// lhs
//const Word &sourceLHS = staticData.GetInputDefaultNonTerminal();
Word *targetLHS = new Word(true);
targetLHS->CreateFromString(Output, staticData.GetOutputFactorOrder(), targetLHSStr, true);
UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor for target LHS");
// add to dictionary
TargetPhrase *targetPhrase = new TargetPhrase();
Word &targetWord = targetPhrase->AddWord();
targetWord.CreateUnknownWord(sourceWord);
// scores
float unknownScore = FloorScore(TransformScore(prob));
targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, unknownScore);
targetPhrase->Evaluate(*unksrc);
targetPhrase->SetTargetLHS(targetLHS);
targetPhrase->SetAlignmentInfo("0-0");
if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled()) {
targetPhrase->SetProperty("Tree","[ " + (*targetLHS)[0]->GetString().as_string() + " "+sourceWord[0]->GetString().as_string()+" ]");
}
// chart rule
to.AddPhraseOOV(*targetPhrase, m_cacheTargetPhraseCollection, range);
} // for (iterLHS
} else {
// drop source word. create blank trans opt
float unknownScore = FloorScore(-numeric_limits<float>::infinity());
TargetPhrase *targetPhrase = new TargetPhrase();
// loop
const UnknownLHSList &lhsList = staticData.GetUnknownLHS();
UnknownLHSList::const_iterator iterLHS;
for (iterLHS = lhsList.begin(); iterLHS != lhsList.end(); ++iterLHS) {
const string &targetLHSStr = iterLHS->first;
//float prob = iterLHS->second;
Word *targetLHS = new Word(true);
targetLHS->CreateFromString(Output, staticData.GetOutputFactorOrder(), targetLHSStr, true);
UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor for target LHS");
targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, unknownScore);
targetPhrase->Evaluate(*unksrc);
targetPhrase->SetTargetLHS(targetLHS);
// chart rule
to.AddPhraseOOV(*targetPhrase, m_cacheTargetPhraseCollection, range);
}
}
}
示例8: ProcessAndStripXMLTags
//.........这里部分代码省略.........
}
// assemble remaining information about tag
size_t startPos = openedTag.second.first;
string tagContent = openedTag.second.second;
size_t endPos = wordPos;
// span attribute overwrites position
string span = ParseXmlTagAttribute(tagContent,"span");
if (! span.empty()) {
vector<string> ij = Tokenize(span, "-");
if (ij.size() != 1 && ij.size() != 2) {
TRACE_ERR("ERROR: span attribute must be of the form \"i-j\" or \"i\": " << line << endl);
return false;
}
startPos = atoi(ij[0].c_str());
if (ij.size() == 1) endPos = startPos + 1;
else endPos = atoi(ij[1].c_str()) + 1;
}
VERBOSE(3,"XML TAG " << tagName << " (" << tagContent << ") spanning " << startPos << " to " << (endPos-1) << " complete, commence processing" << endl);
if (startPos >= endPos) {
TRACE_ERR("ERROR: tag " << tagName << " must span at least one word: " << line << endl);
return false;
}
// may be either a input span label ("label"), or a specified output translation "translation"
string label = ParseXmlTagAttribute(tagContent,"label");
string translation = ParseXmlTagAttribute(tagContent,"translation");
// specified label
if (translation.length() == 0 && label.length() > 0) {
WordsRange range(startPos,endPos-1); // really?
XMLParseOutput item(label, range);
sourceLabels.push_back(item);
}
// specified translations -> vector of phrases, separated by "||"
if (translation.length() > 0 && StaticData::Instance().GetXmlInputType() != XmlIgnore) {
vector<string> altTexts = TokenizeMultiCharSeparator(translation, "||");
vector<string> altLabel = TokenizeMultiCharSeparator(label, "||");
vector<string> altProbs = TokenizeMultiCharSeparator(ParseXmlTagAttribute(tagContent,"prob"), "||");
//TRACE_ERR("number of translations: " << altTexts.size() << endl);
for (size_t i=0; i<altTexts.size(); ++i) {
// set target phrase
TargetPhrase targetPhrase;
targetPhrase.CreateFromString(Output, outputFactorOrder,altTexts[i],factorDelimiter, NULL);
// set constituent label
string targetLHSstr;
if (altLabel.size() > i && altLabel[i].size() > 0) {
targetLHSstr = altLabel[i];
} else {
const UnknownLHSList &lhsList = StaticData::Instance().GetUnknownLHS();
UnknownLHSList::const_iterator iterLHS = lhsList.begin();
targetLHSstr = iterLHS->first;
}
Word *targetLHS = new Word(true);
targetLHS->CreateFromString(Output, outputFactorOrder, targetLHSstr, true);
CHECK(targetLHS->GetFactor(0) != NULL);
targetPhrase.SetTargetLHS(targetLHS);
// not tested
Phrase sourcePhrase = this->GetSubString(WordsRange(startPos,endPos-1));
// get probability
float probValue = 1;
if (altProbs.size() > i && altProbs[i].size() > 0) {
probValue = Scan<float>(altProbs[i]);
}
// convert from prob to log-prob
float scoreValue = FloorScore(TransformScore(probValue));
targetPhrase.SetXMLScore(scoreValue);
targetPhrase.Evaluate(sourcePhrase);
// set span and create XmlOption
WordsRange range(startPos+1,endPos);
XmlOption *option = new XmlOption(range,targetPhrase);
CHECK(option);
xmlOptions.push_back(option);
VERBOSE(2,"xml translation = [" << range << "] " << targetLHSstr << " -> " << altTexts[i] << " prob: " << probValue << endl);
}
altTexts.clear();
altProbs.clear();
}
}
}
}
// we are done. check if there are tags that are still open
if (tagStack.size() > 0) {
TRACE_ERR("ERROR: some opened tags were never closed: " << line << endl);
return false;
}
// return de-xml'ed sentence in line
line = cleanLine;
return true;
}
示例9: LoadRuleSection
bool RuleTableLoaderCompact::LoadRuleSection(
LineReader &reader,
const std::vector<Word> &vocab,
const std::vector<Phrase> &sourcePhrases,
const std::vector<Phrase> &targetPhrases,
const std::vector<size_t> &targetLhsIds,
const std::vector<const AlignmentInfo *> &alignmentSets,
RuleTableTrie &ruleTable)
{
// Read rule count.
reader.ReadLine();
const size_t ruleCount = std::atoi(reader.m_line.c_str());
// Read rules and add to table.
const size_t numScoreComponents = ruleTable.GetNumScoreComponents();
std::vector<float> scoreVector(numScoreComponents);
std::vector<size_t> tokenPositions;
for (size_t i = 0; i < ruleCount; ++i) {
reader.ReadLine();
tokenPositions.clear();
FindTokens(tokenPositions, reader.m_line);
const char *charLine = reader.m_line.c_str();
// The first three tokens are IDs for the source phrase, target phrase,
// and alignment set.
const int sourcePhraseId = std::atoi(charLine+tokenPositions[0]);
const int targetPhraseId = std::atoi(charLine+tokenPositions[1]);
const int alignmentSetId = std::atoi(charLine+tokenPositions[2]);
const Phrase &sourcePhrase = sourcePhrases[sourcePhraseId];
const Phrase &targetPhrasePhrase = targetPhrases[targetPhraseId];
const Word *targetLhs = new Word(vocab[targetLhsIds[targetPhraseId]]);
Word sourceLHS("X"); // TODO not implemented for compact
const AlignmentInfo *alignNonTerm = alignmentSets[alignmentSetId];
// Then there should be one score for each score component.
for (size_t j = 0; j < numScoreComponents; ++j) {
float score = std::atof(charLine+tokenPositions[3+j]);
scoreVector[j] = FloorScore(TransformScore(score));
}
if (reader.m_line[tokenPositions[3+numScoreComponents]] != ':') {
std::stringstream msg;
msg << "Size of scoreVector != number ("
<< scoreVector.size() << "!=" << numScoreComponents
<< ") of score components on line " << reader.m_lineNum;
UserMessage::Add(msg.str());
return false;
}
// The remaining columns are currently ignored.
// Create and score target phrase.
TargetPhrase *targetPhrase = new TargetPhrase(targetPhrasePhrase);
targetPhrase->SetAlignNonTerm(alignNonTerm);
targetPhrase->SetTargetLHS(targetLhs);
targetPhrase->SetSourcePhrase(sourcePhrase);
targetPhrase->Evaluate(sourcePhrase, ruleTable.GetFeaturesToApply());
// Insert rule into table.
TargetPhraseCollection &coll = GetOrCreateTargetPhraseCollection(
ruleTable, sourcePhrase, *targetPhrase, &sourceLHS);
coll.Add(targetPhrase);
}
return true;
}