本文整理汇总了C++中TargetPhrase::CreateCountInfo方法的典型用法代码示例。如果您正苦于以下问题:C++ TargetPhrase::CreateCountInfo方法的具体用法?C++ TargetPhrase::CreateCountInfo怎么用?C++ TargetPhrase::CreateCountInfo使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类TargetPhrase
的用法示例。
在下文中一共展示了TargetPhrase::CreateCountInfo方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: Load
bool PhraseDictionaryNewFormat::Load(const std::vector<FactorType> &input
, const std::vector<FactorType> &output
, std::istream &inStream
, const std::vector<float> &weight
, size_t tableLimit
, const LMList &languageModels
, float weightWP)
{
PrintUserTime("Start loading new format pt model");
const StaticData &staticData = StaticData::Instance();
const std::string& factorDelimiter = staticData.GetFactorDelimiter();
VERBOSE(2,"PhraseDictionaryNewFormat: input=" << m_inputFactors << " output=" << m_outputFactors << std::endl);
string line;
size_t count = 0;
while(getline(inStream, line))
{
vector<string> tokens;
vector<float> scoreVector;
TokenizeMultiCharSeparator(tokens, line , "|||" );
if (tokens.size() != 4 && tokens.size() != 5)
{
stringstream strme;
strme << "Syntax error at " << m_filePath << ":" << count;
UserMessage::Add(strme.str());
abort();
}
const string &sourcePhraseString = tokens[0]
, &targetPhraseString = tokens[1]
, &alignString = tokens[2]
, &scoreString = tokens[3];
bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos);
if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
TRACE_ERR( m_filePath << ":" << count << ": pt entry contains empty target, skipping\n");
continue;
}
Tokenize<float>(scoreVector, scoreString);
if (scoreVector.size() != m_numScoreComponent)
{
stringstream strme;
strme << "Size of scoreVector != number (" <<scoreVector.size() << "!=" <<m_numScoreComponent<<") of score components on line " << count;
UserMessage::Add(strme.str());
abort();
}
assert(scoreVector.size() == m_numScoreComponent);
// parse source & find pt node
// head word
Word sourceLHS, targetLHS;
// source
Phrase sourcePhrase(Input);
sourcePhrase.CreateFromStringNewFormat(Input, input, sourcePhraseString, factorDelimiter, sourceLHS);
// create target phrase obj
TargetPhrase *targetPhrase = new TargetPhrase(Output);
targetPhrase->CreateFromStringNewFormat(Output, output, targetPhraseString, factorDelimiter, targetLHS);
// alignment
list<pair<size_t,size_t> > alignmentInfo;
CreateAlignmentInfo(alignmentInfo, alignString);
// rest of target phrase
targetPhrase->SetAlignmentInfo(alignmentInfo);
targetPhrase->SetTargetLHS(targetLHS);
//targetPhrase->SetDebugOutput(string("New Format pt ") + line);
// component score, for n-best output
std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),TransformScore);
std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),FloorScore);
targetPhrase->SetScoreChart(GetFeature(), scoreVector, weight, languageModels);
// count info for backoff
if (tokens.size() >= 6)
targetPhrase->CreateCountInfo(tokens[5]);
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(sourcePhrase, *targetPhrase);
AddEquivPhrase(phraseColl, targetPhrase);
count++;
}
// cleanup cache
// sort each target phrase collection
m_collection.Sort(m_tableLimit);
return true;
}