本文整理汇总了C++中Phrase::CreateFromString方法的典型用法代码示例。如果您正苦于以下问题:C++ Phrase::CreateFromString方法的具体用法?C++ Phrase::CreateFromString怎么用?C++ Phrase::CreateFromString使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Phrase
的用法示例。
在下文中一共展示了Phrase::CreateFromString方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: outputTopN
//Find top n translations of source, and send them to output
static void outputTopN(const StringPiece& sourcePhraseString, PhraseDictionary* phraseTable, const std::vector<FactorType> &input, ostream& out) {
//get list of target phrases
Phrase sourcePhrase;
sourcePhrase.CreateFromString(Input,input,sourcePhraseString,NULL);
InputPath inputPath(sourcePhrase, NonTerminalSet(), WordsRange(0,sourcePhrase.GetSize()-1),NULL,NULL);
InputPathList inputPaths;
inputPaths.push_back(&inputPath);
phraseTable->GetTargetPhraseCollectionBatch(inputPaths);
const TargetPhraseCollection* targetPhrases = inputPath.GetTargetPhrases(*phraseTable);
//print phrases
const std::vector<FactorType>& output = StaticData::Instance().GetOutputFactorOrder();
if (targetPhrases) {
//if (targetPhrases->GetSize() > 10) cerr << "src " << sourcePhrase << " tgt count " << targetPhrases->GetSize() << endl;
for (TargetPhraseCollection::const_iterator i = targetPhrases->begin(); i != targetPhrases->end(); ++i) {
const TargetPhrase* targetPhrase = *i;
out << sourcePhrase.GetStringRep(input);
out << " ||| ";
out << targetPhrase->GetStringRep(output);
out << " ||| ";
const ScoreComponentCollection scores = targetPhrase->GetScoreBreakdown();
vector<float> phraseScores = scores.GetScoresForProducer(phraseTable);
for (size_t j = 0; j < phraseScores.size(); ++j) {
out << exp(phraseScores[j]) << " ";
}
out << "||| ";
const AlignmentInfo& align = targetPhrase->GetAlignTerm();
for (AlignmentInfo::const_iterator j = align.begin(); j != align.end(); ++j) {
out << j->first << "-" << j->second << " ";
}
out << endl;
}
}
}
示例2: Load
bool RuleTableLoaderStandard::Load(FormatType format
, const std::vector<FactorType> &input
, const std::vector<FactorType> &output
, const std::string &inFile
, size_t /* tableLimit */
, RuleTableTrie &ruleTable)
{
PrintUserTime(string("Start loading text SCFG phrase table. ") + (format==MosesFormat?"Moses ":"Hiero ") + " format");
const StaticData &staticData = StaticData::Instance();
const std::string& factorDelimiter = staticData.GetFactorDelimiter();
string lineOrig;
size_t count = 0;
std::ostream *progress = NULL;
IFVERBOSE(1) progress = &std::cerr;
util::FilePiece in(inFile.c_str(), progress);
// reused variables
vector<float> scoreVector;
StringPiece line;
std::string hiero_before, hiero_after;
double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan");
while(true) {
try {
line = in.ReadLine();
} catch (const util::EndOfFileException &e) {
break;
}
if (format == HieroFormat) { // inefficiently reformat line
hiero_before.assign(line.data(), line.size());
ReformatHieroRule(hiero_before, hiero_after);
line = hiero_after;
}
util::TokenIter<util::MultiCharacter> pipes(line, "|||");
StringPiece sourcePhraseString(*pipes);
StringPiece targetPhraseString(*++pipes);
StringPiece scoreString(*++pipes);
StringPiece alignString;
if (++pipes) {
StringPiece temp(*pipes);
alignString = temp;
}
if (++pipes) {
StringPiece str(*pipes); //counts
}
bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos);
if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
TRACE_ERR( ruleTable.GetFilePath() << ":" << count << ": pt entry contains empty target, skipping\n");
continue;
}
scoreVector.clear();
for (util::TokenIter<util::AnyCharacter, true> s(scoreString, " \t"); s; ++s) {
int processed;
float score = converter.StringToFloat(s->data(), s->length(), &processed);
UTIL_THROW_IF(isnan(score), util::Exception, "Bad score " << *s << " on line " << count);
scoreVector.push_back(FloorScore(TransformScore(score)));
}
const size_t numScoreComponents = ruleTable.GetNumScoreComponents();
if (scoreVector.size() != numScoreComponents) {
stringstream strme;
strme << "Size of scoreVector != number (" << scoreVector.size() << "!="
<< numScoreComponents << ") of score components on line " << count;
UserMessage::Add(strme.str());
abort();
}
// parse source & find pt node
// constituent labels
Word *sourceLHS;
Word *targetLHS;
// create target phrase obj
TargetPhrase *targetPhrase = new TargetPhrase();
targetPhrase->CreateFromString(Output, output, targetPhraseString, factorDelimiter, &targetLHS);
// source
Phrase sourcePhrase;
sourcePhrase.CreateFromString(Input, input, sourcePhraseString, factorDelimiter, &sourceLHS);
// rest of target phrase
targetPhrase->SetAlignmentInfo(alignString);
targetPhrase->SetTargetLHS(targetLHS);
//targetPhrase->SetDebugOutput(string("New Format pt ") + line);
if (++pipes) {
StringPiece sparseString(*pipes);
targetPhrase->SetSparseScore(&ruleTable, sparseString);
}
//.........这里部分代码省略.........
示例3: Load
bool HyperTreeLoader::Load(AllOptions const& opts,
const std::vector<FactorType> &input,
const std::vector<FactorType> &output,
const std::string &inFile,
const RuleTableFF &ff,
HyperTree &trie,
boost::unordered_set<std::size_t> &sourceTermSet)
{
PrintUserTime(std::string("Start loading HyperTree"));
sourceTermSet.clear();
std::size_t count = 0;
std::ostream *progress = NULL;
IFVERBOSE(1) progress = &std::cerr;
util::FilePiece in(inFile.c_str(), progress);
// reused variables
std::vector<float> scoreVector;
StringPiece line;
double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan");
HyperPathLoader hyperPathLoader;
Phrase dummySourcePhrase;
{
Word *lhs = NULL;
dummySourcePhrase.CreateFromString(Input, input, "hello", &lhs);
delete lhs;
}
while(true) {
try {
line = in.ReadLine();
} catch (const util::EndOfFileException &e) {
break;
}
util::TokenIter<util::MultiCharacter> pipes(line, "|||");
StringPiece sourceString(*pipes);
StringPiece targetString(*++pipes);
StringPiece scoreString(*++pipes);
StringPiece alignString;
if (++pipes) {
StringPiece temp(*pipes);
alignString = temp;
}
++pipes; // counts
scoreVector.clear();
for (util::TokenIter<util::AnyCharacter, true> s(scoreString, " \t"); s; ++s) {
int processed;
float score = converter.StringToFloat(s->data(), s->length(), &processed);
UTIL_THROW_IF2(std::isnan(score), "Bad score " << *s << " on line " << count);
scoreVector.push_back(FloorScore(TransformScore(score)));
}
const std::size_t numScoreComponents = ff.GetNumScoreComponents();
if (scoreVector.size() != numScoreComponents) {
UTIL_THROW2("Size of scoreVector != number (" << scoreVector.size() << "!="
<< numScoreComponents << ") of score components on line " << count);
}
// Source-side
HyperPath sourceFragment;
hyperPathLoader.Load(sourceString, sourceFragment);
ExtractSourceTerminalSetFromHyperPath(sourceFragment, sourceTermSet);
// Target-side
TargetPhrase *targetPhrase = new TargetPhrase(&ff);
Word *targetLHS = NULL;
targetPhrase->CreateFromString(Output, output, targetString, &targetLHS);
targetPhrase->SetTargetLHS(targetLHS);
targetPhrase->SetAlignmentInfo(alignString);
if (++pipes) {
StringPiece sparseString(*pipes);
targetPhrase->SetSparseScore(&ff, sparseString);
}
if (++pipes) {
StringPiece propertiesString(*pipes);
targetPhrase->SetProperties(propertiesString);
}
targetPhrase->GetScoreBreakdown().Assign(&ff, scoreVector);
targetPhrase->EvaluateInIsolation(dummySourcePhrase,
ff.GetFeaturesToApply());
// Add rule to trie.
TargetPhraseCollection::shared_ptr phraseColl
= GetOrCreateTargetPhraseCollection(trie, sourceFragment);
phraseColl->Add(targetPhrase);
count++;
}
//.........这里部分代码省略.........
示例4: Load
bool RuleTrieLoader::Load(const std::vector<FactorType> &input,
const std::vector<FactorType> &output,
const std::string &inFile,
const RuleTableFF &ff,
RuleTrie &trie)
{
PrintUserTime(std::string("Start loading text phrase table. Moses format"));
const StaticData &staticData = StaticData::Instance();
// const std::string &factorDelimiter = staticData.GetFactorDelimiter();
std::size_t count = 0;
std::ostream *progress = NULL;
IFVERBOSE(1) progress = &std::cerr;
util::FilePiece in(inFile.c_str(), progress);
// reused variables
std::vector<float> scoreVector;
StringPiece line;
double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan");
while(true) {
try {
line = in.ReadLine();
} catch (const util::EndOfFileException &e) {
break;
}
util::TokenIter<util::MultiCharacter> pipes(line, "|||");
StringPiece sourcePhraseString(*pipes);
StringPiece targetPhraseString(*++pipes);
StringPiece scoreString(*++pipes);
StringPiece alignString;
if (++pipes) {
StringPiece temp(*pipes);
alignString = temp;
}
if (++pipes) {
StringPiece str(*pipes); //counts
}
bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == std::string::npos);
if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
TRACE_ERR( ff.GetFilePath() << ":" << count << ": pt entry contains empty target, skipping\n");
continue;
}
scoreVector.clear();
for (util::TokenIter<util::AnyCharacter, true> s(scoreString, " \t"); s; ++s) {
int processed;
float score = converter.StringToFloat(s->data(), s->length(), &processed);
UTIL_THROW_IF2(std::isnan(score), "Bad score " << *s << " on line " << count);
scoreVector.push_back(FloorScore(TransformScore(score)));
}
const std::size_t numScoreComponents = ff.GetNumScoreComponents();
if (scoreVector.size() != numScoreComponents) {
UTIL_THROW2("Size of scoreVector != number (" << scoreVector.size() << "!="
<< numScoreComponents << ") of score components on line " << count);
}
// parse source & find pt node
// constituent labels
Word *sourceLHS = NULL;
Word *targetLHS;
// create target phrase obj
TargetPhrase *targetPhrase = new TargetPhrase(&ff);
// targetPhrase->CreateFromString(Output, output, targetPhraseString, factorDelimiter, &targetLHS);
targetPhrase->CreateFromString(Output, output, targetPhraseString, &targetLHS);
// source
Phrase sourcePhrase;
// sourcePhrase.CreateFromString(Input, input, sourcePhraseString, factorDelimiter, &sourceLHS);
sourcePhrase.CreateFromString(Input, input, sourcePhraseString, &sourceLHS);
// rest of target phrase
targetPhrase->SetAlignmentInfo(alignString);
targetPhrase->SetTargetLHS(targetLHS);
//targetPhrase->SetDebugOutput(string("New Format pt ") + line);
if (++pipes) {
StringPiece sparseString(*pipes);
targetPhrase->SetSparseScore(&ff, sparseString);
}
if (++pipes) {
StringPiece propertiesString(*pipes);
targetPhrase->SetProperties(propertiesString);
}
targetPhrase->GetScoreBreakdown().Assign(&ff, scoreVector);
targetPhrase->EvaluateInIsolation(sourcePhrase, ff.GetFeaturesToApply());
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(
trie, *sourceLHS, sourcePhrase);
//.........这里部分代码省略.........
示例5: main
int main(int argc, char **argv)
{
int nscores = 5;
std::string ttable = "";
bool useAlignments = false;
bool reportCounts = false;
for(int i = 1; i < argc; i++) {
if(!strcmp(argv[i], "-n")) {
if(i + 1 == argc)
usage();
nscores = atoi(argv[++i]);
} else if(!strcmp(argv[i], "-t")) {
if(i + 1 == argc)
usage();
ttable = argv[++i];
} else if(!strcmp(argv[i], "-a")) {
useAlignments = true;
} else if (!strcmp(argv[i], "-c")) {
reportCounts = true;
}
else
usage();
}
if(ttable == "")
usage();
std::vector<FactorType> input(1, 0);
std::vector<FactorType> output(1, 0);
std::vector<float> weight(nscores, 0);
LMList lmList;
Parameter *parameter = new Parameter();
const_cast<std::vector<std::string>&>(parameter->GetParam("factor-delimiter")).resize(1, "||dummy_string||");
const_cast<std::vector<std::string>&>(parameter->GetParam("input-factors")).resize(1, "0");
const_cast<std::vector<std::string>&>(parameter->GetParam("verbose")).resize(1, "0");
const_cast<std::vector<std::string>&>(parameter->GetParam("weight-w")).resize(1, "0");
const_cast<std::vector<std::string>&>(parameter->GetParam("weight-d")).resize(1, "0");
StaticData::InstanceNonConst().LoadData(parameter);
SparsePhraseDictionaryFeature *spdf = NULL;
PhraseDictionaryFeature pdf(Compact, spdf, nscores, nscores, input, output, ttable, weight, 0, 0, "", "");
PhraseDictionaryCompact pdc(nscores, Compact, &pdf, false, useAlignments);
bool ret = pdc.Load(input, output, ttable, weight, 0, lmList, 0);
assert(ret);
std::string line;
while(getline(std::cin, line)) {
Phrase sourcePhrase;
sourcePhrase.CreateFromString(input, line, "||dummy_string||");
TargetPhraseVectorPtr decodedPhraseColl
= pdc.GetTargetPhraseCollectionRaw(sourcePhrase);
if(decodedPhraseColl != NULL) {
if(reportCounts)
std::cout << sourcePhrase << decodedPhraseColl->size() << std::endl;
else
for(TargetPhraseVector::iterator it = decodedPhraseColl->begin(); it != decodedPhraseColl->end(); it++) {
TargetPhrase &tp = *it;
std::cout << sourcePhrase << "||| ";
std::cout << static_cast<const Phrase&>(tp) << "|||";
if(useAlignments)
std::cout << " " << tp.GetAlignTerm() << "|||";
std::vector<float> scores = tp.GetScoreBreakdown().GetScoresForProducer(&pdf);
for(size_t i = 0; i < scores.size(); i++)
std::cout << " " << exp(scores[i]);
std::cout << std::endl;
}
}
else if(reportCounts)
std::cout << sourcePhrase << 0 << std::endl;
std::cout.flush();
}
}
示例6: testCalcScore
void testCalcScore() {
double p_the = -1.383059;
double p_licenses = -2.360783;
double p_for = -1.661813;
double p_most = -2.360783;
// double p_software = -1.62042;
double p_the_licenses = -0.9625873;
double p_licenses_for = -1.661557;
double p_for_most = -0.4526253;
// double p_most_software = -1.70295;
double p_the_licenses_for = p_the_licenses + p_licenses_for;
// double p_licenses_for_most = p_licenses_for + p_for_most;
// the
{
Phrase phrase;
BOOST_CHECK( phrase.GetSize() == 0 );
std::vector<FactorType> outputFactorOrder;
outputFactorOrder.push_back(0);
phrase.CreateFromString(
outputFactorOrder,
"the",
StaticData::Instance().GetFactorDelimiter());
BOOST_CHECK( phrase.GetSize() == 1 );
float fullScore;
float ngramScore;
size_t oovCount;
backwardLM->CalcScore(phrase, fullScore, ngramScore, oovCount);
BOOST_CHECK( oovCount == 0 );
SLOPPY_CHECK_CLOSE( TransformLMScore(p_the), fullScore, 0.01);
SLOPPY_CHECK_CLOSE( TransformLMScore( 0.0 ), ngramScore, 0.01);
}
// the licenses
{
Phrase phrase;
BOOST_CHECK( phrase.GetSize() == 0 );
std::vector<FactorType> outputFactorOrder;
outputFactorOrder.push_back(0);
phrase.CreateFromString(
outputFactorOrder,
"the licenses",
StaticData::Instance().GetFactorDelimiter());
BOOST_CHECK( phrase.GetSize() == 2 );
float fullScore;
float ngramScore;
size_t oovCount;
backwardLM->CalcScore(phrase, fullScore, ngramScore, oovCount);
BOOST_CHECK( oovCount == 0 );
SLOPPY_CHECK_CLOSE( TransformLMScore(p_licenses + p_the_licenses), fullScore, 0.01);
SLOPPY_CHECK_CLOSE( TransformLMScore( 0.0 ), ngramScore, 0.01);
}
// the licenses for
{
Phrase phrase;
BOOST_CHECK( phrase.GetSize() == 0 );
std::vector<FactorType> outputFactorOrder;
outputFactorOrder.push_back(0);
phrase.CreateFromString(
outputFactorOrder,
"the licenses for",
StaticData::Instance().GetFactorDelimiter());
BOOST_CHECK( phrase.GetSize() == 3 );
float fullScore;
float ngramScore;
size_t oovCount;
backwardLM->CalcScore(phrase, fullScore, ngramScore, oovCount);
BOOST_CHECK( oovCount == 0 );
SLOPPY_CHECK_CLOSE( TransformLMScore( p_the_licenses_for ), ngramScore, 0.01);
SLOPPY_CHECK_CLOSE( TransformLMScore(p_for + p_licenses_for + p_the_licenses), fullScore, 0.01);
}
// the licenses for most
{
Phrase phrase;
BOOST_CHECK( phrase.GetSize() == 0 );
std::vector<FactorType> outputFactorOrder;
outputFactorOrder.push_back(0);
phrase.CreateFromString(
//.........这里部分代码省略.........
示例7: testEvaluate
void testEvaluate() {
FFState *nextState;
FFState *prevState = const_cast< FFState * >(backwardLM->EmptyHypothesisState( *dummyInput ));
double p_most = -2.360783;
double p_for = -1.661813;
double p_licenses = -2.360783;
double p_the = -1.383059;
double p_eos = -1.457693;
double p_most_for = -0.4526253;
double p_for_licenses = -1.661557;
double p_licenses_the = -0.9625873;
double p_the_eos = -1.940311;
// the
{
Phrase phrase;
BOOST_CHECK( phrase.GetSize() == 0 );
std::vector<FactorType> outputFactorOrder;
outputFactorOrder.push_back(0);
phrase.CreateFromString(
outputFactorOrder,
"the",
StaticData::Instance().GetFactorDelimiter());
BOOST_CHECK( phrase.GetSize() == 1 );
float score;
nextState = backwardLM->Evaluate(phrase, prevState, score);
// p(the) * p(</s> | the) / p(</s>)
SLOPPY_CHECK_CLOSE( (p_the + p_the_eos - p_eos), score, 0.01);
delete prevState;
prevState = nextState;
}
// the licenses
{
Phrase phrase;
BOOST_CHECK( phrase.GetSize() == 0 );
std::vector<FactorType> outputFactorOrder;
outputFactorOrder.push_back(0);
phrase.CreateFromString(
outputFactorOrder,
"licenses",
StaticData::Instance().GetFactorDelimiter());
BOOST_CHECK( phrase.GetSize() == 1 );
float score;
nextState = backwardLM->Evaluate(phrase, prevState, score);
// p(licenses) * p(licenses | the) / p(the)
SLOPPY_CHECK_CLOSE( (p_licenses + p_licenses_the - p_the), score, 0.01);
delete prevState;
prevState = nextState;
}
// the licenses for
{
Phrase phrase;
BOOST_CHECK( phrase.GetSize() == 0 );
std::vector<FactorType> outputFactorOrder;
outputFactorOrder.push_back(0);
phrase.CreateFromString(
outputFactorOrder,
"for",
StaticData::Instance().GetFactorDelimiter());
BOOST_CHECK( phrase.GetSize() == 1 );
float score;
nextState = backwardLM->Evaluate(phrase, prevState, score);
// p(for) * p(for | licenses) / p(licenses)
SLOPPY_CHECK_CLOSE( (p_for + p_for_licenses - p_licenses), score, 0.01);
delete prevState;
prevState = nextState;
}
// the licenses for most
{
Phrase phrase;
BOOST_CHECK( phrase.GetSize() == 0 );
//.........这里部分代码省略.........