本文整理汇总了C++中TargetPhrase::CreateFromStringNewFormat方法的典型用法代码示例。如果您正苦于以下问题:C++ TargetPhrase::CreateFromStringNewFormat方法的具体用法?C++ TargetPhrase::CreateFromStringNewFormat怎么用?C++ TargetPhrase::CreateFromStringNewFormat使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类TargetPhrase
的用法示例。
在下文中一共展示了TargetPhrase::CreateFromStringNewFormat方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: Load
bool RuleTableLoaderStandard::Load(FormatType format
, const std::vector<FactorType> &input
, const std::vector<FactorType> &output
, const std::string &inFile
, const std::vector<float> &weight
, size_t /* tableLimit */
, const LMList &languageModels
, const WordPenaltyProducer* wpProducer
, RuleTableTrie &ruleTable)
{
PrintUserTime(string("Start loading text SCFG phrase table. ") + (format==MosesFormat?"Moses ":"Hiero ") + " format");
const StaticData &staticData = StaticData::Instance();
const std::string& factorDelimiter = staticData.GetFactorDelimiter();
string lineOrig;
size_t count = 0;
std::ostream *progress = NULL;
IFVERBOSE(1) progress = &std::cerr;
util::FilePiece in(inFile.c_str(), progress);
// reused variables
vector<float> scoreVector;
StringPiece line;
std::string hiero_before, hiero_after;
while(true) {
try {
line = in.ReadLine();
} catch (const util::EndOfFileException &e) { break; }
if (format == HieroFormat) { // inefficiently reformat line
hiero_before.assign(line.data(), line.size());
ReformatHieroRule(hiero_before, hiero_after);
line = hiero_after;
}
util::TokenIter<util::MultiCharacter> pipes(line, "|||");
StringPiece sourcePhraseString(*pipes);
StringPiece targetPhraseString(*++pipes);
StringPiece scoreString(*++pipes);
StringPiece alignString(*++pipes);
// TODO(bhaddow) efficiently handle default instead of parsing this string every time.
StringPiece ruleCountString = ++pipes ? *pipes : StringPiece("1 1");
if (++pipes) {
stringstream strme;
strme << "Syntax error at " << ruleTable.GetFilePath() << ":" << count;
UserMessage::Add(strme.str());
abort();
}
bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos);
if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
TRACE_ERR( ruleTable.GetFilePath() << ":" << count << ": pt entry contains empty target, skipping\n");
continue;
}
scoreVector.clear();
for (util::TokenIter<util::AnyCharacter, true> s(scoreString, " \t"); s; ++s) {
char *err_ind;
scoreVector.push_back(strtod(s->data(), &err_ind));
UTIL_THROW_IF(err_ind == s->data(), util::Exception, "Bad score " << *s << " on line " << count);
}
const size_t numScoreComponents = ruleTable.GetFeature()->GetNumScoreComponents();
if (scoreVector.size() != numScoreComponents) {
stringstream strme;
strme << "Size of scoreVector != number (" << scoreVector.size() << "!="
<< numScoreComponents << ") of score components on line " << count;
UserMessage::Add(strme.str());
abort();
}
// parse source & find pt node
// constituent labels
Word sourceLHS, targetLHS;
// source
Phrase sourcePhrase( 0);
sourcePhrase.CreateFromStringNewFormat(Input, input, sourcePhraseString, factorDelimiter, sourceLHS);
// create target phrase obj
TargetPhrase *targetPhrase = new TargetPhrase(Output);
targetPhrase->CreateFromStringNewFormat(Output, output, targetPhraseString, factorDelimiter, targetLHS);
targetPhrase->SetSourcePhrase(sourcePhrase);
// rest of target phrase
targetPhrase->SetAlignmentInfo(alignString, sourcePhrase);
targetPhrase->SetTargetLHS(targetLHS);
targetPhrase->SetRuleCount(ruleCountString, scoreVector[0]);
//targetPhrase->SetDebugOutput(string("New Format pt ") + line);
// component score, for n-best output
std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),TransformScore);
std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),FloorScore);
targetPhrase->SetScoreChart(ruleTable.GetFeature(), scoreVector, weight, languageModels,wpProducer);
//.........这里部分代码省略.........
示例2: InitializeForInput
void PhraseDictionaryFuzzyMatch::InitializeForInput(InputType const& inputSentence)
{
char dirName[] = "/tmp/moses.XXXXXX";
char *temp = mkdtemp(dirName);
CHECK(temp);
string dirNameStr(dirName);
string inFileName(dirNameStr + "/in");
ofstream inFile(inFileName.c_str());
for (size_t i = 1; i < inputSentence.GetSize() - 1; ++i)
{
inFile << inputSentence.GetWord(i);
}
inFile << endl;
inFile.close();
long translationId = inputSentence.GetTranslationId();
string ptFileName = m_FuzzyMatchWrapper->Extract(translationId, dirNameStr);
// populate with rules for this sentence
PhraseDictionaryNodeSCFG &rootNode = m_collection[translationId];
FormatType format = MosesFormat;
// data from file
InputFileStream inStream(ptFileName);
// copied from class LoaderStandard
PrintUserTime("Start loading fuzzy-match phrase model");
const StaticData &staticData = StaticData::Instance();
const std::string& factorDelimiter = staticData.GetFactorDelimiter();
string lineOrig;
size_t count = 0;
while(getline(inStream, lineOrig)) { //mgjang std add
const string *line;
if (format == HieroFormat) { // reformat line
assert(false);
//line = ReformatHieroRule(lineOrig);
}
else
{ // do nothing to format of line
line = &lineOrig;
}
vector<string> tokens;
vector<float> scoreVector;
TokenizeMultiCharSeparator(tokens, *line , "|||" );
if (tokens.size() != 4 && tokens.size() != 5) {
stringstream strme;
strme << "Syntax error at " << ptFileName << ":" << count;
UserMessage::Add(strme.str());
LOGE("[mgjang] before abort\n");
abort();
}
const string &sourcePhraseString = tokens[0]
, &targetPhraseString = tokens[1]
, &scoreString = tokens[2]
, &alignString = tokens[3];
bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos);
if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
TRACE_ERR( ptFileName << ":" << count << ": pt entry contains empty target, skipping\n");
continue;
}
Tokenize<float>(scoreVector, scoreString);
const size_t numScoreComponents = GetFeature()->GetNumScoreComponents();
if (scoreVector.size() != numScoreComponents) {
stringstream strme;
strme << "Size of scoreVector != number (" << scoreVector.size() << "!="
<< numScoreComponents << ") of score components on line " << count;
UserMessage::Add(strme.str());
LOGE("[mgjang] before abort\n");
abort();
}
CHECK(scoreVector.size() == numScoreComponents);
// parse source & find pt node
// constituent labels
Word sourceLHS, targetLHS;
// source
Phrase sourcePhrase( 0);
sourcePhrase.CreateFromStringNewFormat(Input, *m_input, sourcePhraseString, factorDelimiter, sourceLHS);
// create target phrase obj
TargetPhrase *targetPhrase = new TargetPhrase();
targetPhrase->CreateFromStringNewFormat(Output, *m_output, targetPhraseString, factorDelimiter, targetLHS);
// rest of target phrase
targetPhrase->SetAlignmentInfo(alignString);
//.........这里部分代码省略.........
示例3: Load
bool PhraseDictionaryNewFormat::Load(const std::vector<FactorType> &input
, const std::vector<FactorType> &output
, std::istream &inStream
, const std::vector<float> &weight
, size_t tableLimit
, const LMList &languageModels
, float weightWP)
{
PrintUserTime("Start loading new format pt model");
const StaticData &staticData = StaticData::Instance();
const std::string& factorDelimiter = staticData.GetFactorDelimiter();
VERBOSE(2,"PhraseDictionaryNewFormat: input=" << m_inputFactors << " output=" << m_outputFactors << std::endl);
string line;
size_t count = 0;
while(getline(inStream, line))
{
vector<string> tokens;
vector<float> scoreVector;
TokenizeMultiCharSeparator(tokens, line , "|||" );
if (tokens.size() != 4 && tokens.size() != 5)
{
stringstream strme;
strme << "Syntax error at " << m_filePath << ":" << count;
UserMessage::Add(strme.str());
abort();
}
const string &sourcePhraseString = tokens[0]
, &targetPhraseString = tokens[1]
, &alignString = tokens[2]
, &scoreString = tokens[3];
bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos);
if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
TRACE_ERR( m_filePath << ":" << count << ": pt entry contains empty target, skipping\n");
continue;
}
Tokenize<float>(scoreVector, scoreString);
if (scoreVector.size() != m_numScoreComponent)
{
stringstream strme;
strme << "Size of scoreVector != number (" <<scoreVector.size() << "!=" <<m_numScoreComponent<<") of score components on line " << count;
UserMessage::Add(strme.str());
abort();
}
assert(scoreVector.size() == m_numScoreComponent);
// parse source & find pt node
// head word
Word sourceLHS, targetLHS;
// source
Phrase sourcePhrase(Input);
sourcePhrase.CreateFromStringNewFormat(Input, input, sourcePhraseString, factorDelimiter, sourceLHS);
// create target phrase obj
TargetPhrase *targetPhrase = new TargetPhrase(Output);
targetPhrase->CreateFromStringNewFormat(Output, output, targetPhraseString, factorDelimiter, targetLHS);
// alignment
list<pair<size_t,size_t> > alignmentInfo;
CreateAlignmentInfo(alignmentInfo, alignString);
// rest of target phrase
targetPhrase->SetAlignmentInfo(alignmentInfo);
targetPhrase->SetTargetLHS(targetLHS);
//targetPhrase->SetDebugOutput(string("New Format pt ") + line);
// component score, for n-best output
std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),TransformScore);
std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),FloorScore);
targetPhrase->SetScoreChart(GetFeature(), scoreVector, weight, languageModels);
// count info for backoff
if (tokens.size() >= 6)
targetPhrase->CreateCountInfo(tokens[5]);
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(sourcePhrase, *targetPhrase);
AddEquivPhrase(phraseColl, targetPhrase);
count++;
}
// cleanup cache
// sort each target phrase collection
m_collection.Sort(m_tableLimit);
return true;
}
示例4: Load
bool RuleTableLoaderStandard::Load(FormatType format
, const std::vector<FactorType> &input
, const std::vector<FactorType> &output
, std::istream &inStream
, const std::vector<float> &weight
, size_t /* tableLimit */
, const LMList &languageModels
, const WordPenaltyProducer* wpProducer
, RuleTableTrie &ruleTable)
{
PrintUserTime(string("Start loading text SCFG phrase table. ") + (format==MosesFormat?"Moses ":"Hiero ") + " format");
const StaticData &staticData = StaticData::Instance();
const std::string& factorDelimiter = staticData.GetFactorDelimiter();
string lineOrig;
size_t count = 0;
while(getline(inStream, lineOrig)) {
const string *line;
if (format == HieroFormat) { // reformat line
line = ReformatHieroRule(lineOrig);
}
else
{ // do nothing to format of line
line = &lineOrig;
}
vector<string> tokens;
vector<float> scoreVector;
TokenizeMultiCharSeparator(tokens, *line , "|||" );
if (tokens.size() != 4 && tokens.size() != 5) {
stringstream strme;
strme << "Syntax error at " << ruleTable.GetFilePath() << ":" << count;
UserMessage::Add(strme.str());
abort();
}
const string &sourcePhraseString = tokens[0]
, &targetPhraseString = tokens[1]
, &scoreString = tokens[2]
, &alignString = tokens[3];
bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos);
if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
TRACE_ERR( ruleTable.GetFilePath() << ":" << count << ": pt entry contains empty target, skipping\n");
continue;
}
Tokenize<float>(scoreVector, scoreString);
const size_t numScoreComponents = ruleTable.GetFeature()->GetNumScoreComponents();
if (scoreVector.size() != numScoreComponents) {
stringstream strme;
strme << "Size of scoreVector != number (" << scoreVector.size() << "!="
<< numScoreComponents << ") of score components on line " << count;
UserMessage::Add(strme.str());
abort();
}
CHECK(scoreVector.size() == numScoreComponents);
// parse source & find pt node
// constituent labels
Word sourceLHS, targetLHS;
// source
Phrase sourcePhrase( 0);
sourcePhrase.CreateFromStringNewFormat(Input, input, sourcePhraseString, factorDelimiter, sourceLHS);
// create target phrase obj
TargetPhrase *targetPhrase = new TargetPhrase(Output);
targetPhrase->CreateFromStringNewFormat(Output, output, targetPhraseString, factorDelimiter, targetLHS);
// rest of target phrase
targetPhrase->SetAlignmentInfo(alignString);
targetPhrase->SetTargetLHS(targetLHS);
//targetPhrase->SetDebugOutput(string("New Format pt ") + line);
// component score, for n-best output
std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),TransformScore);
std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),FloorScore);
targetPhrase->SetScoreChart(ruleTable.GetFeature(), scoreVector, weight, languageModels,wpProducer);
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(ruleTable, sourcePhrase, *targetPhrase, sourceLHS);
phraseColl.Add(targetPhrase);
count++;
if (format == HieroFormat) { // reformat line
delete line;
}
else
{ // do nothing
}
}
//.........这里部分代码省略.........