本文整理汇总了C++中Word::CreateFromString方法的典型用法代码示例。如果您正苦于以下问题:C++ Word::CreateFromString方法的具体用法?C++ Word::CreateFromString怎么用?C++ Word::CreateFromString使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Word
的用法示例。
在下文中一共展示了Word::CreateFromString方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: span
Syntax::F2S::Forest::Vertex *ForestInput::ParseVertex(
const StringPiece &s, const std::vector<FactorType>& factorOrder)
{
using Syntax::F2S::Forest;
Word symbol;
std::size_t pos = s.rfind('[');
if (pos == std::string::npos) {
symbol.CreateFromString(Input, factorOrder, s, false);
// Create vertex: caller will fill in span.
Range span(0, 0);
return new Forest::Vertex(Syntax::PVertex(span, symbol));
}
symbol.CreateFromString(Input, factorOrder, s.substr(0, pos), true);
std::size_t begin = pos + 1;
pos = s.find(',', begin+1);
std::string tmp;
s.substr(begin, pos-begin).CopyToString(&tmp);
std::size_t start = std::atoi(tmp.c_str());
s.substr(pos+1, s.size()-pos-2).CopyToString(&tmp);
std::size_t end = std::atoi(tmp.c_str());
// Create vertex: offset span by 1 to allow for <s> in first position.
Range span(start+1, end+1);
return new Forest::Vertex(Syntax::PVertex(span, symbol));
}
示例2: Tokenize
void Tokenize(OnDiskPt::Phrase &phrase
, const std::string &token, bool addSourceNonTerm, bool addTargetNonTerm
, OnDiskPt::OnDiskWrapper &onDiskWrapper)
{
bool nonTerm = false;
size_t tokSize = token.size();
int comStr =token.compare(0, 1, "[");
if (comStr == 0)
{
comStr = token.compare(tokSize - 1, 1, "]");
nonTerm = comStr == 0;
}
if (nonTerm)
{ // non-term
size_t splitPos = token.find_first_of("[", 2);
string wordStr = token.substr(0, splitPos);
if (splitPos == string::npos)
{ // lhs - only 1 word
Word *word = new Word();
word->CreateFromString(wordStr, onDiskWrapper.GetVocab());
phrase.AddWord(word);
}
else
{ // source & target non-terms
if (addSourceNonTerm)
{
Word *word = new Word();
word->CreateFromString(wordStr, onDiskWrapper.GetVocab());
phrase.AddWord(word);
}
wordStr = token.substr(splitPos, tokSize - splitPos);
if (addTargetNonTerm)
{
Word *word = new Word();
word->CreateFromString(wordStr, onDiskWrapper.GetVocab());
phrase.AddWord(word);
}
}
}
else
{ // term
Word *word = new Word();
word->CreateFromString(token, onDiskWrapper.GetVocab());
phrase.AddWord(word);
}
}
示例3: Lookup
void UnknownWordPenalty::Lookup(const std::vector<InputPath*> &inputPathQueue)
{
Scores *estimatedFutureScore = new Scores();
for (size_t i = 0; i < inputPathQueue.size(); ++i) {
InputPath &path = *inputPathQueue[i];
PhraseTableLookup &ptLookup = path.GetPtLookup(m_ptId);
const Phrase &source = path.GetPhrase();
if (source.GetSize() == 1) {
const Word &sourceWord = source.GetWord(0);
string str = sourceWord.ToString();
str = "UNK:" + str + ":UNK";
Word targetWord;
targetWord.CreateFromString(str);
TargetPhrase *tp = new TargetPhrase(1);
tp->Set(0, targetWord);
tp->GetScores().Add(*this, LOWEST_SCORE);
FeatureFunction::Evaluate(source, *tp, *estimatedFutureScore);
TargetPhrases *tpColl = new TargetPhrases();
m_targetPhrases.push_back(tpColl);
tpColl->Add(tp);
ptLookup.Set(tpColl, NULL);
} else {
ptLookup.Set(NULL, NULL);
}
}
}
示例4: Load
bool Vocab::Load(FileHandler* vcbin, const FactorDirection& direction,
const FactorList& factors, bool closed)
{
// load vocab id -> word mapping
m_words2ids.clear(); // reset mapping
m_ids2words.clear();
std::string line, word_str;
wordID_t id;
std::istream &ret = getline(*vcbin, line);
UTIL_THROW_IF2(!ret, "Couldn't read file");
std::istringstream first(line.c_str());
uint32_t vcbsize(0);
first >> vcbsize;
uint32_t loadedsize = 0;
while (loadedsize++ < vcbsize && getline(*vcbin, line)) {
std::istringstream entry(line.c_str());
entry >> word_str;
Word word;
word.CreateFromString( direction, factors, word_str, false); // TODO set correctly isNonTerminal
entry >> id;
// may be no id (i.e. file may just be a word list)
if (id == 0 && word != GetkOOVWord())
id = m_ids2words.size() + 1; // assign ids sequentially starting from 1
UTIL_THROW_IF2(m_ids2words.count(id) != 0 || m_words2ids.count(word) != 0,
"Error");
m_ids2words[id] = word;
m_words2ids[word] = id;
}
m_closed = closed; // once loaded fix vocab ?
std::cerr << "Loaded vocab with " << m_ids2words.size() << " words." << std::endl;
return true;
}
示例5: GetWordID
wordID_t Vocab::GetWordID(const std::string& word_str) {
FactorList factors;
factors.push_back(0);
Word word;
word.CreateFromString(Input, factors, word_str, false);
return GetWordID(word);
}
示例6: GetWordID
// get wordID_t index for word represented as string
wordID_t Vocab::GetWordID(const std::string& word_str,
const FactorDirection& direction, const FactorList& factors, bool isNonTerminal)
{
// get id for factored string
Word word;
word.CreateFromString( direction, factors, word_str, isNonTerminal);
return GetWordID( word);
}
示例7: CreateFromStringNewFormat
void Phrase::CreateFromStringNewFormat(FactorDirection direction
, const std::vector<FactorType> &factorOrder
, const std::string &phraseString
, const std::string &factorDelimiter
, Word &lhs)
{
m_arity = 0;
// parse
vector<string> annotatedWordVector;
Tokenize(annotatedWordVector, phraseString);
// KOMMA|none ART|Def.Z NN|Neut.NotGen.Sg VVFIN|none
// to
// "KOMMA|none" "ART|Def.Z" "NN|Neut.NotGen.Sg" "VVFIN|none"
for (size_t phrasePos = 0 ; phrasePos < annotatedWordVector.size() - 1 ; phrasePos++)
{
string &annotatedWord = annotatedWordVector[phrasePos];
bool isNonTerminal;
if (annotatedWord.substr(0, 1) == "[" && annotatedWord.substr(annotatedWord.size()-1, 1) == "]")
{ // non-term
isNonTerminal = true;
size_t nextPos = annotatedWord.find("[", 1);
assert(nextPos != string::npos);
if (direction == Input)
annotatedWord = annotatedWord.substr(1, nextPos - 2);
else
annotatedWord = annotatedWord.substr(nextPos + 1, annotatedWord.size() - nextPos - 2);
m_arity++;
}
else
{
isNonTerminal = false;
}
Word &word = AddWord();
word.CreateFromString(direction, factorOrder, annotatedWord, isNonTerminal);
}
// lhs
string &annotatedWord = annotatedWordVector.back();
assert(annotatedWord.substr(0, 1) == "[" && annotatedWord.substr(annotatedWord.size()-1, 1) == "]");
annotatedWord = annotatedWord.substr(1, annotatedWord.size() - 2);
lhs.CreateFromString(direction, factorOrder, annotatedWord, true);
assert(lhs.IsNonTerminal());
}
示例8: InitSpecialWord
const Word Vocab::InitSpecialWord( const std::string& word_str)
{
FactorList factors;
factors.push_back(0); // store the special word string as the first factor
Word word;
// define special word as Input word with one factor and isNonTerminal=false
word.CreateFromString( Input, factors, word_str, false ); // Input is enum defined in ../typedef.h
// TODO not sure if this will work properly:
// - word comparison can fail because the last parameter (isNonTerminal)
// in function CreateFromString may not match properly created words
// - special word is Input word but what about Output words?
// - currently Input/Output variable is not stored in class Word, but in the future???
return word;
}
示例9: DecodeCollection
//.........这里部分代码省略.........
}
else if (type == 2)
{
size_t rank = DecodeREncSymbol2Rank(symbol);
size_t srcPos = DecodeREncSymbol2Position(symbol);
if(srcPos >= sourceWords.size())
return TargetPhraseVectorPtr();
wordString = GetTargetSymbol(GetTranslation(sourceWords[srcPos], rank));
if(m_phraseDictionary.m_useAlignmentInfo)
{
size_t trgPos = targetPhrase->GetSize();
alignment.insert(AlignPoint(srcPos, trgPos));
}
}
else if(type == 3)
{
size_t rank = DecodeREncSymbol3(symbol);
size_t srcPos = targetPhrase->GetSize();
if(srcPos >= sourceWords.size())
return TargetPhraseVectorPtr();
wordString = GetTargetSymbol(GetTranslation(sourceWords[srcPos], rank));
if(m_phraseDictionary.m_useAlignmentInfo)
{
size_t trgPos = srcPos;
alignment.insert(AlignPoint(srcPos, trgPos));
}
}
Word word;
word.CreateFromString(Output, *m_output, wordString, false);
targetPhrase->AddWord(word);
}
else if(m_coding == PREnc)
{
// if the symbol is just a word
if(GetPREncType(symbol) == 1)
{
unsigned decodedSymbol = DecodePREncSymbol1(symbol);
Word word;
word.CreateFromString(Output, *m_output,
GetTargetSymbol(decodedSymbol), false);
targetPhrase->AddWord(word);
}
// if the symbol is a subphrase pointer
else
{
int left = DecodePREncSymbol2Left(symbol);
int right = DecodePREncSymbol2Right(symbol);
unsigned rank = DecodePREncSymbol2Rank(symbol);
int srcStart = left + targetPhrase->GetSize();
int srcEnd = srcSize - right - 1;
// false positive consistency check
if(0 > srcStart || srcStart > srcEnd || unsigned(srcEnd) >= srcSize)
return TargetPhraseVectorPtr();
// false positive consistency check
if(m_maxRank && rank > m_maxRank)
return TargetPhraseVectorPtr();
示例10: Process
void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range, ChartParserCallback &to)
{
// unknown word, add as trans opt
const StaticData &staticData = StaticData::Instance();
const UnknownWordPenaltyProducer &unknownWordPenaltyProducer = UnknownWordPenaltyProducer::Instance();
size_t isDigit = 0;
if (staticData.GetDropUnknown()) {
const Factor *f = sourceWord[0]; // TODO hack. shouldn't know which factor is surface
const StringPiece s = f->GetString();
isDigit = s.find_first_of("0123456789");
if (isDigit == string::npos)
isDigit = 0;
else
isDigit = 1;
// modify the starting bitmap
}
Phrase* unksrc = new Phrase(1);
unksrc->AddWord() = sourceWord;
Word &newWord = unksrc->GetWord(0);
newWord.SetIsOOV(true);
m_unksrcs.push_back(unksrc);
//TranslationOption *transOpt;
if (! staticData.GetDropUnknown() || isDigit) {
// loop
const UnknownLHSList &lhsList = staticData.GetUnknownLHS();
UnknownLHSList::const_iterator iterLHS;
for (iterLHS = lhsList.begin(); iterLHS != lhsList.end(); ++iterLHS) {
const string &targetLHSStr = iterLHS->first;
float prob = iterLHS->second;
// lhs
//const Word &sourceLHS = staticData.GetInputDefaultNonTerminal();
Word *targetLHS = new Word(true);
targetLHS->CreateFromString(Output, staticData.GetOutputFactorOrder(), targetLHSStr, true);
UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor for target LHS");
// add to dictionary
TargetPhrase *targetPhrase = new TargetPhrase();
Word &targetWord = targetPhrase->AddWord();
targetWord.CreateUnknownWord(sourceWord);
// scores
float unknownScore = FloorScore(TransformScore(prob));
targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, unknownScore);
targetPhrase->Evaluate(*unksrc);
targetPhrase->SetTargetLHS(targetLHS);
targetPhrase->SetAlignmentInfo("0-0");
if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled()) {
targetPhrase->SetProperty("Tree","[ " + (*targetLHS)[0]->GetString().as_string() + " "+sourceWord[0]->GetString().as_string()+" ]");
}
// chart rule
to.AddPhraseOOV(*targetPhrase, m_cacheTargetPhraseCollection, range);
} // for (iterLHS
} else {
// drop source word. create blank trans opt
float unknownScore = FloorScore(-numeric_limits<float>::infinity());
TargetPhrase *targetPhrase = new TargetPhrase();
// loop
const UnknownLHSList &lhsList = staticData.GetUnknownLHS();
UnknownLHSList::const_iterator iterLHS;
for (iterLHS = lhsList.begin(); iterLHS != lhsList.end(); ++iterLHS) {
const string &targetLHSStr = iterLHS->first;
//float prob = iterLHS->second;
Word *targetLHS = new Word(true);
targetLHS->CreateFromString(Output, staticData.GetOutputFactorOrder(), targetLHSStr, true);
UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor for target LHS");
targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, unknownScore);
targetPhrase->Evaluate(*unksrc);
targetPhrase->SetTargetLHS(targetLHS);
// chart rule
to.AddPhraseOOV(*targetPhrase, m_cacheTargetPhraseCollection, range);
}
}
}
示例11: ProcessAndStripXMLTags
//.........这里部分代码省略.........
}
// assemble remaining information about tag
size_t startPos = openedTag.second.first;
string tagContent = openedTag.second.second;
size_t endPos = wordPos;
// span attribute overwrites position
string span = ParseXmlTagAttribute(tagContent,"span");
if (! span.empty()) {
vector<string> ij = Tokenize(span, "-");
if (ij.size() != 1 && ij.size() != 2) {
TRACE_ERR("ERROR: span attribute must be of the form \"i-j\" or \"i\": " << line << endl);
return false;
}
startPos = atoi(ij[0].c_str());
if (ij.size() == 1) endPos = startPos + 1;
else endPos = atoi(ij[1].c_str()) + 1;
}
VERBOSE(3,"XML TAG " << tagName << " (" << tagContent << ") spanning " << startPos << " to " << (endPos-1) << " complete, commence processing" << endl);
if (startPos >= endPos) {
TRACE_ERR("ERROR: tag " << tagName << " must span at least one word: " << line << endl);
return false;
}
// may be either a input span label ("label"), or a specified output translation "translation"
string label = ParseXmlTagAttribute(tagContent,"label");
string translation = ParseXmlTagAttribute(tagContent,"translation");
// specified label
if (translation.length() == 0 && label.length() > 0) {
WordsRange range(startPos,endPos-1); // really?
XMLParseOutput item(label, range);
sourceLabels.push_back(item);
}
// specified translations -> vector of phrases, separated by "||"
if (translation.length() > 0 && StaticData::Instance().GetXmlInputType() != XmlIgnore) {
vector<string> altTexts = TokenizeMultiCharSeparator(translation, "||");
vector<string> altLabel = TokenizeMultiCharSeparator(label, "||");
vector<string> altProbs = TokenizeMultiCharSeparator(ParseXmlTagAttribute(tagContent,"prob"), "||");
//TRACE_ERR("number of translations: " << altTexts.size() << endl);
for (size_t i=0; i<altTexts.size(); ++i) {
// set target phrase
TargetPhrase targetPhrase;
targetPhrase.CreateFromString(Output, outputFactorOrder,altTexts[i],factorDelimiter, NULL);
// set constituent label
string targetLHSstr;
if (altLabel.size() > i && altLabel[i].size() > 0) {
targetLHSstr = altLabel[i];
} else {
const UnknownLHSList &lhsList = StaticData::Instance().GetUnknownLHS();
UnknownLHSList::const_iterator iterLHS = lhsList.begin();
targetLHSstr = iterLHS->first;
}
Word *targetLHS = new Word(true);
targetLHS->CreateFromString(Output, outputFactorOrder, targetLHSstr, true);
CHECK(targetLHS->GetFactor(0) != NULL);
targetPhrase.SetTargetLHS(targetLHS);
// not tested
Phrase sourcePhrase = this->GetSubString(WordsRange(startPos,endPos-1));
// get probability
float probValue = 1;
if (altProbs.size() > i && altProbs[i].size() > 0) {
probValue = Scan<float>(altProbs[i]);
}
// convert from prob to log-prob
float scoreValue = FloorScore(TransformScore(probValue));
targetPhrase.SetXMLScore(scoreValue);
targetPhrase.Evaluate(sourcePhrase);
// set span and create XmlOption
WordsRange range(startPos+1,endPos);
XmlOption *option = new XmlOption(range,targetPhrase);
CHECK(option);
xmlOptions.push_back(option);
VERBOSE(2,"xml translation = [" << range << "] " << targetLHSstr << " -> " << altTexts[i] << " prob: " << probValue << endl);
}
altTexts.clear();
altProbs.clear();
}
}
}
}
// we are done. check if there are tags that are still open
if (tagStack.size() > 0) {
TRACE_ERR("ERROR: some opened tags were never closed: " << line << endl);
return false;
}
// return de-xml'ed sentence in line
line = cleanLine;
return true;
}
示例12: pvertex
//! populate this InputType with data from in stream
int ForestInput::
Read(std::istream &in,
std::vector<FactorType> const& factorOrder,
AllOptions const& opts)
{
using Syntax::F2S::Forest;
m_forest = boost::make_shared<Forest>();
m_rootVertex = NULL;
m_vertexSet.clear();
std::string line;
if (std::getline(in, line, '\n').eof()) {
return 0;
}
// The first line contains the sentence number. We ignore this and skip
// straight to the second line, which contains the sentence string.
std::string sentence;
std::getline(in, sentence);
// If the next line is blank then there was a parse failure. Otherwise,
// the next line and any subsequent non-blank lines contain hyperedges.
std::getline(in, line);
if (line == "") {
// Parse failure. We treat this as an empty sentence.
sentence = "";
// The next line will be blank too.
std::getline(in, line);
} else {
do {
ParseHyperedgeLine(line, factorOrder);
std::getline(in, line);
} while (line != "");
}
// Do base class Read().
// TODO Check if this is actually necessary. TreeInput does it, but I'm
// not sure ForestInput needs to.
std::stringstream strme;
strme << "<s> " << sentence << " </s>" << std::endl;
Sentence::Read(strme, factorOrder, opts);
// Find the maximum end position of any vertex (0 if forest is empty).
std::size_t maxEnd = FindMaxEnd(*m_forest);
// Determine which vertices are the top vertices.
std::vector<Forest::Vertex *> topVertices;
if (!m_forest->vertices.empty()) {
FindTopVertices(*m_forest, topVertices);
assert(topVertices.size() >= 1);
}
// Add <s> vertex.
Forest::Vertex *startSymbol = NULL;
{
Word symbol;
symbol.CreateFromString(Input, factorOrder, "<s>", false);
Syntax::PVertex pvertex(Range(0, 0), symbol);
startSymbol = new Forest::Vertex(pvertex);
m_forest->vertices.push_back(startSymbol);
}
// Add </s> vertex.
Forest::Vertex *endSymbol = NULL;
{
Word symbol;
symbol.CreateFromString(Input, factorOrder, "</s>", false);
Syntax::PVertex pvertex(Range(maxEnd+1, maxEnd+1), symbol);
endSymbol = new Forest::Vertex(pvertex);
m_forest->vertices.push_back(endSymbol);
}
// Add root vertex.
{
Word symbol;
symbol.CreateFromString(Input, factorOrder, "Q", true);
Syntax::PVertex pvertex(Range(0, maxEnd+1), symbol);
m_rootVertex = new Forest::Vertex(pvertex);
m_forest->vertices.push_back(m_rootVertex);
}
// Add root's incoming hyperedges.
if (topVertices.empty()) {
Forest::Hyperedge *e = new Forest::Hyperedge();
e->head = m_rootVertex;
e->tail.push_back(startSymbol);
e->tail.push_back(endSymbol);
m_rootVertex->incoming.push_back(e);
} else {
// Add a hyperedge between [Q] and each top vertex.
for (std::vector<Forest::Vertex *>::const_iterator
p = topVertices.begin(); p != topVertices.end(); ++p) {
Forest::Hyperedge *e = new Forest::Hyperedge();
e->head = m_rootVertex;
e->tail.push_back(startSymbol);
e->tail.push_back(*p);
e->tail.push_back(endSymbol);
m_rootVertex->incoming.push_back(e);
//.........这里部分代码省略.........