本文整理汇总了C++中TargetPhrase::CreateFromString方法的典型用法代码示例。如果您正苦于以下问题:C++ TargetPhrase::CreateFromString方法的具体用法?C++ TargetPhrase::CreateFromString怎么用?C++ TargetPhrase::CreateFromString使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类TargetPhrase
的用法示例。
在下文中一共展示了TargetPhrase::CreateFromString方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: Load
bool RuleTableLoaderStandard::Load(FormatType format
, const std::vector<FactorType> &input
, const std::vector<FactorType> &output
, const std::string &inFile
, size_t /* tableLimit */
, RuleTableTrie &ruleTable)
{
PrintUserTime(string("Start loading text SCFG phrase table. ") + (format==MosesFormat?"Moses ":"Hiero ") + " format");
const StaticData &staticData = StaticData::Instance();
const std::string& factorDelimiter = staticData.GetFactorDelimiter();
string lineOrig;
size_t count = 0;
std::ostream *progress = NULL;
IFVERBOSE(1) progress = &std::cerr;
util::FilePiece in(inFile.c_str(), progress);
// reused variables
vector<float> scoreVector;
StringPiece line;
std::string hiero_before, hiero_after;
double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan");
while(true) {
try {
line = in.ReadLine();
} catch (const util::EndOfFileException &e) {
break;
}
if (format == HieroFormat) { // inefficiently reformat line
hiero_before.assign(line.data(), line.size());
ReformatHieroRule(hiero_before, hiero_after);
line = hiero_after;
}
util::TokenIter<util::MultiCharacter> pipes(line, "|||");
StringPiece sourcePhraseString(*pipes);
StringPiece targetPhraseString(*++pipes);
StringPiece scoreString(*++pipes);
StringPiece alignString;
if (++pipes) {
StringPiece temp(*pipes);
alignString = temp;
}
if (++pipes) {
StringPiece str(*pipes); //counts
}
bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos);
if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
TRACE_ERR( ruleTable.GetFilePath() << ":" << count << ": pt entry contains empty target, skipping\n");
continue;
}
scoreVector.clear();
for (util::TokenIter<util::AnyCharacter, true> s(scoreString, " \t"); s; ++s) {
int processed;
float score = converter.StringToFloat(s->data(), s->length(), &processed);
UTIL_THROW_IF(isnan(score), util::Exception, "Bad score " << *s << " on line " << count);
scoreVector.push_back(FloorScore(TransformScore(score)));
}
const size_t numScoreComponents = ruleTable.GetNumScoreComponents();
if (scoreVector.size() != numScoreComponents) {
stringstream strme;
strme << "Size of scoreVector != number (" << scoreVector.size() << "!="
<< numScoreComponents << ") of score components on line " << count;
UserMessage::Add(strme.str());
abort();
}
// parse source & find pt node
// constituent labels
Word *sourceLHS;
Word *targetLHS;
// create target phrase obj
TargetPhrase *targetPhrase = new TargetPhrase();
targetPhrase->CreateFromString(Output, output, targetPhraseString, factorDelimiter, &targetLHS);
// source
Phrase sourcePhrase;
sourcePhrase.CreateFromString(Input, input, sourcePhraseString, factorDelimiter, &sourceLHS);
// rest of target phrase
targetPhrase->SetAlignmentInfo(alignString);
targetPhrase->SetTargetLHS(targetLHS);
//targetPhrase->SetDebugOutput(string("New Format pt ") + line);
if (++pipes) {
StringPiece sparseString(*pipes);
targetPhrase->SetSparseScore(&ruleTable, sparseString);
}
//.........这里部分代码省略.........
示例2: Load
bool HyperTreeLoader::Load(AllOptions const& opts,
const std::vector<FactorType> &input,
const std::vector<FactorType> &output,
const std::string &inFile,
const RuleTableFF &ff,
HyperTree &trie,
boost::unordered_set<std::size_t> &sourceTermSet)
{
PrintUserTime(std::string("Start loading HyperTree"));
sourceTermSet.clear();
std::size_t count = 0;
std::ostream *progress = NULL;
IFVERBOSE(1) progress = &std::cerr;
util::FilePiece in(inFile.c_str(), progress);
// reused variables
std::vector<float> scoreVector;
StringPiece line;
double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan");
HyperPathLoader hyperPathLoader;
Phrase dummySourcePhrase;
{
Word *lhs = NULL;
dummySourcePhrase.CreateFromString(Input, input, "hello", &lhs);
delete lhs;
}
while(true) {
try {
line = in.ReadLine();
} catch (const util::EndOfFileException &e) {
break;
}
util::TokenIter<util::MultiCharacter> pipes(line, "|||");
StringPiece sourceString(*pipes);
StringPiece targetString(*++pipes);
StringPiece scoreString(*++pipes);
StringPiece alignString;
if (++pipes) {
StringPiece temp(*pipes);
alignString = temp;
}
++pipes; // counts
scoreVector.clear();
for (util::TokenIter<util::AnyCharacter, true> s(scoreString, " \t"); s; ++s) {
int processed;
float score = converter.StringToFloat(s->data(), s->length(), &processed);
UTIL_THROW_IF2(std::isnan(score), "Bad score " << *s << " on line " << count);
scoreVector.push_back(FloorScore(TransformScore(score)));
}
const std::size_t numScoreComponents = ff.GetNumScoreComponents();
if (scoreVector.size() != numScoreComponents) {
UTIL_THROW2("Size of scoreVector != number (" << scoreVector.size() << "!="
<< numScoreComponents << ") of score components on line " << count);
}
// Source-side
HyperPath sourceFragment;
hyperPathLoader.Load(sourceString, sourceFragment);
ExtractSourceTerminalSetFromHyperPath(sourceFragment, sourceTermSet);
// Target-side
TargetPhrase *targetPhrase = new TargetPhrase(&ff);
Word *targetLHS = NULL;
targetPhrase->CreateFromString(Output, output, targetString, &targetLHS);
targetPhrase->SetTargetLHS(targetLHS);
targetPhrase->SetAlignmentInfo(alignString);
if (++pipes) {
StringPiece sparseString(*pipes);
targetPhrase->SetSparseScore(&ff, sparseString);
}
if (++pipes) {
StringPiece propertiesString(*pipes);
targetPhrase->SetProperties(propertiesString);
}
targetPhrase->GetScoreBreakdown().Assign(&ff, scoreVector);
targetPhrase->EvaluateInIsolation(dummySourcePhrase,
ff.GetFeaturesToApply());
// Add rule to trie.
TargetPhraseCollection::shared_ptr phraseColl
= GetOrCreateTargetPhraseCollection(trie, sourceFragment);
phraseColl->Add(targetPhrase);
count++;
}
//.........这里部分代码省略.........
示例3: InitializeForInput
void PhraseDictionaryFuzzyMatch::InitializeForInput(InputType const& inputSentence)
{
char dirName[] = "/tmp/moses.XXXXXX";
char *temp = mkdtemp(dirName);
UTIL_THROW_IF2(temp == NULL,
"Couldn't create temporary directory " << dirName);
string dirNameStr(dirName);
string inFileName(dirNameStr + "/in");
ofstream inFile(inFileName.c_str());
for (size_t i = 1; i < inputSentence.GetSize() - 1; ++i) {
inFile << inputSentence.GetWord(i);
}
inFile << endl;
inFile.close();
long translationId = inputSentence.GetTranslationId();
string ptFileName = m_FuzzyMatchWrapper->Extract(translationId, dirNameStr);
// populate with rules for this sentence
PhraseDictionaryNodeMemory &rootNode = m_collection[translationId];
FormatType format = MosesFormat;
// data from file
InputFileStream inStream(ptFileName);
// copied from class LoaderStandard
PrintUserTime("Start loading fuzzy-match phrase model");
const StaticData &staticData = StaticData::Instance();
const std::string& factorDelimiter = staticData.GetFactorDelimiter();
string lineOrig;
size_t count = 0;
while(getline(inStream, lineOrig)) {
const string *line;
if (format == HieroFormat) { // reformat line
UTIL_THROW(util::Exception, "Cannot be Hiero format");
//line = ReformatHieroRule(lineOrig);
} else {
// do nothing to format of line
line = &lineOrig;
}
vector<string> tokens;
vector<float> scoreVector;
TokenizeMultiCharSeparator(tokens, *line , "|||" );
if (tokens.size() != 4 && tokens.size() != 5) {
stringstream strme;
strme << "Syntax error at " << ptFileName << ":" << count;
UserMessage::Add(strme.str());
abort();
}
const string &sourcePhraseString = tokens[0]
, &targetPhraseString = tokens[1]
, &scoreString = tokens[2]
, &alignString = tokens[3];
bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos);
if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
TRACE_ERR( ptFileName << ":" << count << ": pt entry contains empty target, skipping\n");
continue;
}
Tokenize<float>(scoreVector, scoreString);
const size_t numScoreComponents = GetNumScoreComponents();
if (scoreVector.size() != numScoreComponents) {
stringstream strme;
strme << "Size of scoreVector != number (" << scoreVector.size() << "!="
<< numScoreComponents << ") of score components on line " << count;
UserMessage::Add(strme.str());
abort();
}
UTIL_THROW_IF2(scoreVector.size() != numScoreComponents,
"Number of scores incorrectly specified");
// parse source & find pt node
// constituent labels
Word *sourceLHS;
Word *targetLHS;
// source
Phrase sourcePhrase( 0);
sourcePhrase.CreateFromString(Input, m_input, sourcePhraseString, factorDelimiter, &sourceLHS);
// create target phrase obj
TargetPhrase *targetPhrase = new TargetPhrase();
targetPhrase->CreateFromString(Output, m_output, targetPhraseString, factorDelimiter, &targetLHS);
// rest of target phrase
//.........这里部分代码省略.........
示例4: Load
bool RuleTrieLoader::Load(const std::vector<FactorType> &input,
const std::vector<FactorType> &output,
const std::string &inFile,
const RuleTableFF &ff,
RuleTrie &trie)
{
PrintUserTime(std::string("Start loading text phrase table. Moses format"));
const StaticData &staticData = StaticData::Instance();
// const std::string &factorDelimiter = staticData.GetFactorDelimiter();
std::size_t count = 0;
std::ostream *progress = NULL;
IFVERBOSE(1) progress = &std::cerr;
util::FilePiece in(inFile.c_str(), progress);
// reused variables
std::vector<float> scoreVector;
StringPiece line;
double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan");
while(true) {
try {
line = in.ReadLine();
} catch (const util::EndOfFileException &e) {
break;
}
util::TokenIter<util::MultiCharacter> pipes(line, "|||");
StringPiece sourcePhraseString(*pipes);
StringPiece targetPhraseString(*++pipes);
StringPiece scoreString(*++pipes);
StringPiece alignString;
if (++pipes) {
StringPiece temp(*pipes);
alignString = temp;
}
if (++pipes) {
StringPiece str(*pipes); //counts
}
bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == std::string::npos);
if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
TRACE_ERR( ff.GetFilePath() << ":" << count << ": pt entry contains empty target, skipping\n");
continue;
}
scoreVector.clear();
for (util::TokenIter<util::AnyCharacter, true> s(scoreString, " \t"); s; ++s) {
int processed;
float score = converter.StringToFloat(s->data(), s->length(), &processed);
UTIL_THROW_IF2(std::isnan(score), "Bad score " << *s << " on line " << count);
scoreVector.push_back(FloorScore(TransformScore(score)));
}
const std::size_t numScoreComponents = ff.GetNumScoreComponents();
if (scoreVector.size() != numScoreComponents) {
UTIL_THROW2("Size of scoreVector != number (" << scoreVector.size() << "!="
<< numScoreComponents << ") of score components on line " << count);
}
// parse source & find pt node
// constituent labels
Word *sourceLHS = NULL;
Word *targetLHS;
// create target phrase obj
TargetPhrase *targetPhrase = new TargetPhrase(&ff);
// targetPhrase->CreateFromString(Output, output, targetPhraseString, factorDelimiter, &targetLHS);
targetPhrase->CreateFromString(Output, output, targetPhraseString, &targetLHS);
// source
Phrase sourcePhrase;
// sourcePhrase.CreateFromString(Input, input, sourcePhraseString, factorDelimiter, &sourceLHS);
sourcePhrase.CreateFromString(Input, input, sourcePhraseString, &sourceLHS);
// rest of target phrase
targetPhrase->SetAlignmentInfo(alignString);
targetPhrase->SetTargetLHS(targetLHS);
//targetPhrase->SetDebugOutput(string("New Format pt ") + line);
if (++pipes) {
StringPiece sparseString(*pipes);
targetPhrase->SetSparseScore(&ff, sparseString);
}
if (++pipes) {
StringPiece propertiesString(*pipes);
targetPhrase->SetProperties(propertiesString);
}
targetPhrase->GetScoreBreakdown().Assign(&ff, scoreVector);
targetPhrase->EvaluateInIsolation(sourcePhrase, ff.GetFeaturesToApply());
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(
trie, *sourceLHS, sourcePhrase);
//.........这里部分代码省略.........
示例5: ProcessAndStripXMLTags
//.........这里部分代码省略.........
else endPos = atoi(ij[1].c_str()) + 1;
}
VERBOSE(3,"XML TAG " << tagName << " (" << tagContent << ") spanning " << startPos << " to " << (endPos-1) << " complete, commence processing" << endl);
// special tag: wall
if (tagName == "wall") {
size_t start = (startPos == 0) ? 0 : startPos-1;
for(size_t pos = start; pos < endPos; pos++)
walls.push_back( pos );
}
// special tag: zone
else if (tagName == "zone") {
if (startPos >= endPos) {
TRACE_ERR("ERROR: zone must span at least one word: " << line << endl);
return false;
}
reorderingConstraint.SetZone( startPos, endPos-1 );
}
// default: opening tag that specifies translation options
else {
if (startPos >= endPos) {
TRACE_ERR("ERROR: tag " << tagName << " must span at least one word: " << line << endl);
return false;
}
// specified translations -> vector of phrases
// multiple translations may be specified, separated by "||"
vector<string> altTexts = TokenizeMultiCharSeparator(ParseXmlTagAttribute(tagContent,"translation"), "||");
if( altTexts.size() == 1 && altTexts[0] == "" )
altTexts.pop_back(); // happens when nothing specified
// deal with legacy annotations: "translation" was called "english"
vector<string> moreAltTexts = TokenizeMultiCharSeparator(ParseXmlTagAttribute(tagContent,"english"), "||");
if (moreAltTexts.size()>1 || moreAltTexts[0] != "") {
for(vector<string>::iterator translation=moreAltTexts.begin();
translation != moreAltTexts.end();
translation++) {
string t = *translation;
altTexts.push_back( t );
}
}
// specified probabilities for the translations -> vector of probs
vector<string> altProbs = TokenizeMultiCharSeparator(ParseXmlTagAttribute(tagContent,"prob"), "||");
if( altProbs.size() == 1 && altProbs[0] == "" )
altProbs.pop_back(); // happens when nothing specified
// report what we have processed so far
VERBOSE(3,"XML TAG NAME IS: '" << tagName << "'" << endl);
VERBOSE(3,"XML TAG TRANSLATION IS: '" << altTexts[0] << "'" << endl);
VERBOSE(3,"XML TAG PROB IS: '" << altProbs[0] << "'" << endl);
VERBOSE(3,"XML TAG SPAN IS: " << startPos << "-" << (endPos-1) << endl);
if (altProbs.size() > 0 && altTexts.size() != altProbs.size()) {
TRACE_ERR("ERROR: Unequal number of probabilities and translation alternatives: " << line << endl);
return false;
}
// store translation options into members
if (StaticData::Instance().GetXmlInputType() != XmlIgnore) {
// only store options if we aren't ignoring them
for (size_t i=0; i<altTexts.size(); ++i) {
Phrase sourcePhrase; // TODO don't know what the source phrase is
// set default probability
float probValue = 1;
if (altProbs.size() > 0) probValue = Scan<float>(altProbs[i]);
// convert from prob to log-prob
float scoreValue = FloorScore(TransformScore(probValue));
WordsRange range(startPos,endPos-1); // span covered by phrase
TargetPhrase targetPhrase;
targetPhrase.CreateFromString(Output, outputFactorOrder,altTexts[i],factorDelimiter, NULL);
targetPhrase.SetXMLScore(scoreValue);
targetPhrase.Evaluate(sourcePhrase);
XmlOption *option = new XmlOption(range,targetPhrase);
CHECK(option);
res.push_back(option);
}
altTexts.clear();
altProbs.clear();
}
}
}
}
}
// we are done. check if there are tags that are still open
if (tagStack.size() > 0) {
TRACE_ERR("ERROR: some opened tags were never closed: " << line << endl);
return false;
}
// return de-xml'ed sentence in line
line = cleanLine;
return true;
}
示例6: ProcessAndStripXMLTags
//.........这里部分代码省略.........
}
// assemble remaining information about tag
size_t startPos = openedTag.second.first;
string tagContent = openedTag.second.second;
size_t endPos = wordPos;
// span attribute overwrites position
string span = ParseXmlTagAttribute(tagContent,"span");
if (! span.empty()) {
vector<string> ij = Tokenize(span, "-");
if (ij.size() != 1 && ij.size() != 2) {
TRACE_ERR("ERROR: span attribute must be of the form \"i-j\" or \"i\": " << line << endl);
return false;
}
startPos = atoi(ij[0].c_str());
if (ij.size() == 1) endPos = startPos + 1;
else endPos = atoi(ij[1].c_str()) + 1;
}
VERBOSE(3,"XML TAG " << tagName << " (" << tagContent << ") spanning " << startPos << " to " << (endPos-1) << " complete, commence processing" << endl);
if (startPos >= endPos) {
TRACE_ERR("ERROR: tag " << tagName << " must span at least one word: " << line << endl);
return false;
}
// may be either a input span label ("label"), or a specified output translation "translation"
string label = ParseXmlTagAttribute(tagContent,"label");
string translation = ParseXmlTagAttribute(tagContent,"translation");
// specified label
if (translation.length() == 0 && label.length() > 0) {
WordsRange range(startPos,endPos-1); // really?
XMLParseOutput item(label, range);
sourceLabels.push_back(item);
}
// specified translations -> vector of phrases, separated by "||"
if (translation.length() > 0 && StaticData::Instance().GetXmlInputType() != XmlIgnore) {
vector<string> altTexts = TokenizeMultiCharSeparator(translation, "||");
vector<string> altLabel = TokenizeMultiCharSeparator(label, "||");
vector<string> altProbs = TokenizeMultiCharSeparator(ParseXmlTagAttribute(tagContent,"prob"), "||");
//TRACE_ERR("number of translations: " << altTexts.size() << endl);
for (size_t i=0; i<altTexts.size(); ++i) {
// set target phrase
TargetPhrase targetPhrase;
targetPhrase.CreateFromString(Output, outputFactorOrder,altTexts[i],factorDelimiter, NULL);
// set constituent label
string targetLHSstr;
if (altLabel.size() > i && altLabel[i].size() > 0) {
targetLHSstr = altLabel[i];
} else {
const UnknownLHSList &lhsList = StaticData::Instance().GetUnknownLHS();
UnknownLHSList::const_iterator iterLHS = lhsList.begin();
targetLHSstr = iterLHS->first;
}
Word *targetLHS = new Word(true);
targetLHS->CreateFromString(Output, outputFactorOrder, targetLHSstr, true);
CHECK(targetLHS->GetFactor(0) != NULL);
targetPhrase.SetTargetLHS(targetLHS);
// not tested
Phrase sourcePhrase = this->GetSubString(WordsRange(startPos,endPos-1));
// get probability
float probValue = 1;
if (altProbs.size() > i && altProbs[i].size() > 0) {
probValue = Scan<float>(altProbs[i]);
}
// convert from prob to log-prob
float scoreValue = FloorScore(TransformScore(probValue));
targetPhrase.SetXMLScore(scoreValue);
targetPhrase.Evaluate(sourcePhrase);
// set span and create XmlOption
WordsRange range(startPos+1,endPos);
XmlOption *option = new XmlOption(range,targetPhrase);
CHECK(option);
xmlOptions.push_back(option);
VERBOSE(2,"xml translation = [" << range << "] " << targetLHSstr << " -> " << altTexts[i] << " prob: " << probValue << endl);
}
altTexts.clear();
altProbs.clear();
}
}
}
}
// we are done. check if there are tags that are still open
if (tagStack.size() > 0) {
TRACE_ERR("ERROR: some opened tags were never closed: " << line << endl);
return false;
}
// return de-xml'ed sentence in line
line = cleanLine;
return true;
}