当前位置: 首页>>代码示例>>C++>>正文


C++ Word::CreateFromString方法代码示例

本文整理汇总了C++中Word::CreateFromString方法的典型用法代码示例。如果您正苦于以下问题:C++ Word::CreateFromString方法的具体用法?C++ Word::CreateFromString怎么用?C++ Word::CreateFromString使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Word的用法示例。


在下文中一共展示了Word::CreateFromString方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: span

Syntax::F2S::Forest::Vertex *ForestInput::ParseVertex(
  const StringPiece &s, const std::vector<FactorType>& factorOrder)
{
  using Syntax::F2S::Forest;

  Word symbol;
  std::size_t pos = s.rfind('[');
  if (pos == std::string::npos) {
    symbol.CreateFromString(Input, factorOrder, s, false);
    // Create vertex: caller will fill in span.
    Range span(0, 0);
    return new Forest::Vertex(Syntax::PVertex(span, symbol));
  }
  symbol.CreateFromString(Input, factorOrder, s.substr(0, pos), true);
  std::size_t begin = pos + 1;
  pos = s.find(',', begin+1);
  std::string tmp;
  s.substr(begin, pos-begin).CopyToString(&tmp);
  std::size_t start = std::atoi(tmp.c_str());
  s.substr(pos+1, s.size()-pos-2).CopyToString(&tmp);
  std::size_t end = std::atoi(tmp.c_str());
  // Create vertex: offset span by 1 to allow for <s> in first position.
  Range span(start+1, end+1);
  return new Forest::Vertex(Syntax::PVertex(span, symbol));
}
开发者ID:hschreib,项目名称:mosesdecoder,代码行数:25,代码来源:ForestInput.cpp

示例2: Tokenize

void Tokenize(OnDiskPt::Phrase &phrase
							, const std::string &token, bool addSourceNonTerm, bool addTargetNonTerm
							, OnDiskPt::OnDiskWrapper &onDiskWrapper)
{
	
	bool nonTerm = false;
	size_t tokSize = token.size();
	int comStr =token.compare(0, 1, "[");
	
	if (comStr == 0)
	{
		comStr = token.compare(tokSize - 1, 1, "]");
		nonTerm = comStr == 0;
	}
	
	if (nonTerm)
	{ // non-term
		size_t splitPos		= token.find_first_of("[", 2);
		string wordStr	= token.substr(0, splitPos);

		if (splitPos == string::npos)
		{ // lhs - only 1 word
			Word *word = new Word();
			word->CreateFromString(wordStr, onDiskWrapper.GetVocab());
			phrase.AddWord(word);
		}
		else
		{ // source & target non-terms
			if (addSourceNonTerm)
			{
				Word *word = new Word();
				word->CreateFromString(wordStr, onDiskWrapper.GetVocab());
				phrase.AddWord(word);
			}
			
			wordStr = token.substr(splitPos, tokSize - splitPos);
			if (addTargetNonTerm)
			{
				Word *word = new Word();
				word->CreateFromString(wordStr, onDiskWrapper.GetVocab());
				phrase.AddWord(word);
			}
			
		}
	}
	else
	{ // term
		Word *word = new Word();
		word->CreateFromString(token, onDiskWrapper.GetVocab());
		phrase.AddWord(word);
	}	
}
开发者ID:obo,项目名称:Moses-Extensions-at-UFAL,代码行数:52,代码来源:Main.cpp

示例3: Lookup

void UnknownWordPenalty::Lookup(const std::vector<InputPath*> &inputPathQueue)
{
  Scores *estimatedFutureScore = new Scores();

  for (size_t i = 0; i < inputPathQueue.size(); ++i) {
    InputPath &path = *inputPathQueue[i];
    PhraseTableLookup &ptLookup = path.GetPtLookup(m_ptId);

    const Phrase &source = path.GetPhrase();
    if (source.GetSize() == 1) {
      const Word &sourceWord = source.GetWord(0);
      string str = sourceWord.ToString();
      str = "UNK:" + str + ":UNK";

      Word targetWord;
      targetWord.CreateFromString(str);

      TargetPhrase *tp = new TargetPhrase(1);
      tp->Set(0, targetWord);
      tp->GetScores().Add(*this, LOWEST_SCORE);

      FeatureFunction::Evaluate(source, *tp, *estimatedFutureScore);

      TargetPhrases *tpColl = new TargetPhrases();
      m_targetPhrases.push_back(tpColl);
      tpColl->Add(tp);

      ptLookup.Set(tpColl, NULL);
    } else {
      ptLookup.Set(NULL, NULL);
    }
  }
}
开发者ID:arvs,项目名称:mosesdecoder,代码行数:33,代码来源:UnknownWordPenalty.cpp

示例4: Load

bool Vocab::Load(FileHandler* vcbin, const FactorDirection& direction,
                 const FactorList& factors, bool closed)
{
  // load vocab id -> word mapping
  m_words2ids.clear();	// reset mapping
  m_ids2words.clear();
  std::string line, word_str;
  wordID_t id;

  std::istream &ret = getline(*vcbin, line);
  UTIL_THROW_IF2(!ret, "Couldn't read file");
  std::istringstream first(line.c_str());
  uint32_t vcbsize(0);
  first >> vcbsize;
  uint32_t loadedsize = 0;
  while (loadedsize++ < vcbsize && getline(*vcbin, line)) {
    std::istringstream entry(line.c_str());
    entry >> word_str;
    Word word;
    word.CreateFromString( direction, factors, word_str, false); // TODO set correctly isNonTerminal
    entry >> id;
    // may be no id (i.e. file may just be a word list)
    if (id == 0 && word != GetkOOVWord())
      id = m_ids2words.size() + 1;	// assign ids sequentially starting from 1
    UTIL_THROW_IF2(m_ids2words.count(id) != 0 || m_words2ids.count(word) != 0,
                   "Error");

    m_ids2words[id] = word;
    m_words2ids[word] = id;
  }
  m_closed = closed;	// once loaded fix vocab ?
  std::cerr << "Loaded vocab with " << m_ids2words.size() << " words." << std::endl;
  return true;
}
开发者ID:sleepcry,项目名称:mosesdecoder,代码行数:34,代码来源:vocab.cpp

示例5: GetWordID

	wordID_t Vocab::GetWordID(const std::string& word_str) {
		FactorList factors;
		factors.push_back(0); 
		Word word;
		word.CreateFromString(Input, factors, word_str, false); 
    return GetWordID(word);
  }
开发者ID:poetzhangzi,项目名称:test,代码行数:7,代码来源:vocab.cpp

示例6: GetWordID

// get wordID_t index for word represented as string
wordID_t Vocab::GetWordID(const std::string& word_str,
                          const FactorDirection& direction, const FactorList& factors, bool isNonTerminal)
{
  // get id for factored string
  Word word;
  word.CreateFromString( direction, factors, word_str, isNonTerminal);
  return GetWordID( word);
}
开发者ID:sleepcry,项目名称:mosesdecoder,代码行数:9,代码来源:vocab.cpp

示例7: CreateFromStringNewFormat

void Phrase::CreateFromStringNewFormat(FactorDirection direction
																			 , const std::vector<FactorType> &factorOrder
																			 , const std::string &phraseString
																			 , const std::string &factorDelimiter
																			 , Word &lhs)
{
	m_arity = 0;
	
	// parse
	vector<string> annotatedWordVector;
	Tokenize(annotatedWordVector, phraseString);
	// KOMMA|none ART|Def.Z NN|Neut.NotGen.Sg VVFIN|none 
	//		to
	// "KOMMA|none" "ART|Def.Z" "NN|Neut.NotGen.Sg" "VVFIN|none"
	
	for (size_t phrasePos = 0 ; phrasePos < annotatedWordVector.size() -  1 ; phrasePos++)
	{
		string &annotatedWord = annotatedWordVector[phrasePos];
		bool isNonTerminal;
		if (annotatedWord.substr(0, 1) == "[" && annotatedWord.substr(annotatedWord.size()-1, 1) == "]")
		{ // non-term
			isNonTerminal = true;
			
			size_t nextPos = annotatedWord.find("[", 1);
			assert(nextPos != string::npos);
			
			if (direction == Input)
				annotatedWord = annotatedWord.substr(1, nextPos - 2);
			else
				annotatedWord = annotatedWord.substr(nextPos + 1, annotatedWord.size() - nextPos - 2);
			
			m_arity++;
		}
		else
		{
			isNonTerminal = false;
		}
		
		Word &word = AddWord();
		word.CreateFromString(direction, factorOrder, annotatedWord, isNonTerminal);		
		
	}
	
	// lhs
	string &annotatedWord = annotatedWordVector.back();
	assert(annotatedWord.substr(0, 1) == "[" && annotatedWord.substr(annotatedWord.size()-1, 1) == "]");
	annotatedWord = annotatedWord.substr(1, annotatedWord.size() - 2);
	
	lhs.CreateFromString(direction, factorOrder, annotatedWord, true);		
	assert(lhs.IsNonTerminal());
}
开发者ID:poetzhangzi,项目名称:test,代码行数:51,代码来源:Phrase.cpp

示例8: InitSpecialWord

const Word Vocab::InitSpecialWord( const std::string& word_str)
{
  FactorList factors;
  factors.push_back(0); // store the special word string as the first factor
  Word word;
  // define special word as Input word with one factor and isNonTerminal=false
  word.CreateFromString( Input, factors, word_str, false ); // Input is enum defined in ../typedef.h
  // TODO not sure if this will work properly:
  // 	- word comparison can fail because the last parameter (isNonTerminal)
  // 		in function CreateFromString may not match properly created words
  // 	- special word is Input word but what about Output words?
  // 		- currently Input/Output variable is not stored in class Word, but in the future???
  return word;
}
开发者ID:sleepcry,项目名称:mosesdecoder,代码行数:14,代码来源:vocab.cpp

示例9: DecodeCollection


//.........这里部分代码省略.........
          }
          else if (type == 2)
          {
            size_t rank = DecodeREncSymbol2Rank(symbol);
            size_t srcPos = DecodeREncSymbol2Position(symbol);
            
            if(srcPos >= sourceWords.size())
              return TargetPhraseVectorPtr();  
            
            wordString = GetTargetSymbol(GetTranslation(sourceWords[srcPos], rank));
            if(m_phraseDictionary.m_useAlignmentInfo)
            {
              size_t trgPos = targetPhrase->GetSize();
              alignment.insert(AlignPoint(srcPos, trgPos));
            }
          }
          else if(type == 3)
          {
            size_t rank = DecodeREncSymbol3(symbol);
            size_t srcPos = targetPhrase->GetSize();
            
            if(srcPos >= sourceWords.size())
              return TargetPhraseVectorPtr();  
                            
            wordString = GetTargetSymbol(GetTranslation(sourceWords[srcPos], rank));   
            if(m_phraseDictionary.m_useAlignmentInfo)
            {
              size_t trgPos = srcPos;
              alignment.insert(AlignPoint(srcPos, trgPos));
            }
          }
          
          Word word;
          word.CreateFromString(Output, *m_output, wordString, false);
          targetPhrase->AddWord(word);
        }
        else if(m_coding == PREnc)
        {
          // if the symbol is just a word
          if(GetPREncType(symbol) == 1)
          {
            unsigned decodedSymbol = DecodePREncSymbol1(symbol);
     
            Word word;
            word.CreateFromString(Output, *m_output,
                                  GetTargetSymbol(decodedSymbol), false);
            targetPhrase->AddWord(word);
          }
          // if the symbol is a subphrase pointer
          else
          {
            int left = DecodePREncSymbol2Left(symbol);
            int right = DecodePREncSymbol2Right(symbol);
            unsigned rank = DecodePREncSymbol2Rank(symbol);
            
            int srcStart = left + targetPhrase->GetSize();
            int srcEnd   = srcSize - right - 1;
            
            // false positive consistency check
            if(0 > srcStart || srcStart > srcEnd || unsigned(srcEnd) >= srcSize)
              return TargetPhraseVectorPtr();
            
            // false positive consistency check
            if(m_maxRank && rank > m_maxRank)
                return TargetPhraseVectorPtr();
            
开发者ID:Avmb,项目名称:mosesdecoder,代码行数:66,代码来源:PhraseDecoder.cpp

示例10: Process

void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range, ChartParserCallback &to)
{
  // unknown word, add as trans opt
  const StaticData &staticData = StaticData::Instance();
  const UnknownWordPenaltyProducer &unknownWordPenaltyProducer = UnknownWordPenaltyProducer::Instance();

  size_t isDigit = 0;
  if (staticData.GetDropUnknown()) {
    const Factor *f = sourceWord[0]; // TODO hack. shouldn't know which factor is surface
    const StringPiece s = f->GetString();
    isDigit = s.find_first_of("0123456789");
    if (isDigit == string::npos)
      isDigit = 0;
    else
      isDigit = 1;
    // modify the starting bitmap
  }

  Phrase* unksrc = new Phrase(1);
  unksrc->AddWord() = sourceWord;
  Word &newWord = unksrc->GetWord(0);
  newWord.SetIsOOV(true);

  m_unksrcs.push_back(unksrc);

  //TranslationOption *transOpt;
  if (! staticData.GetDropUnknown() || isDigit) {
    // loop
    const UnknownLHSList &lhsList = staticData.GetUnknownLHS();
    UnknownLHSList::const_iterator iterLHS;
    for (iterLHS = lhsList.begin(); iterLHS != lhsList.end(); ++iterLHS) {
      const string &targetLHSStr = iterLHS->first;
      float prob = iterLHS->second;

      // lhs
      //const Word &sourceLHS = staticData.GetInputDefaultNonTerminal();
      Word *targetLHS = new Word(true);

      targetLHS->CreateFromString(Output, staticData.GetOutputFactorOrder(), targetLHSStr, true);
      UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor for target LHS");

      // add to dictionary
      TargetPhrase *targetPhrase = new TargetPhrase();
      Word &targetWord = targetPhrase->AddWord();
      targetWord.CreateUnknownWord(sourceWord);

      // scores
      float unknownScore = FloorScore(TransformScore(prob));

      targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, unknownScore);
      targetPhrase->Evaluate(*unksrc);

      targetPhrase->SetTargetLHS(targetLHS);
      targetPhrase->SetAlignmentInfo("0-0");
      if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled()) {
        targetPhrase->SetProperty("Tree","[ " + (*targetLHS)[0]->GetString().as_string() + " "+sourceWord[0]->GetString().as_string()+" ]");
      }

      // chart rule
      to.AddPhraseOOV(*targetPhrase, m_cacheTargetPhraseCollection, range);
    } // for (iterLHS
  } else {
    // drop source word. create blank trans opt
    float unknownScore = FloorScore(-numeric_limits<float>::infinity());

    TargetPhrase *targetPhrase = new TargetPhrase();
    // loop
    const UnknownLHSList &lhsList = staticData.GetUnknownLHS();
    UnknownLHSList::const_iterator iterLHS;
    for (iterLHS = lhsList.begin(); iterLHS != lhsList.end(); ++iterLHS) {
      const string &targetLHSStr = iterLHS->first;
      //float prob = iterLHS->second;

      Word *targetLHS = new Word(true);
      targetLHS->CreateFromString(Output, staticData.GetOutputFactorOrder(), targetLHSStr, true);
      UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor for target LHS");

      targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, unknownScore);
      targetPhrase->Evaluate(*unksrc);

      targetPhrase->SetTargetLHS(targetLHS);

      // chart rule
      to.AddPhraseOOV(*targetPhrase, m_cacheTargetPhraseCollection, range);
    }
  }
}
开发者ID:BinaryBlob,项目名称:mosesdecoder,代码行数:87,代码来源:ChartParser.cpp

示例11: ProcessAndStripXMLTags


//.........这里部分代码省略.........
        }

        // assemble remaining information about tag
        size_t startPos = openedTag.second.first;
        string tagContent = openedTag.second.second;
        size_t endPos = wordPos;

        // span attribute overwrites position
        string span = ParseXmlTagAttribute(tagContent,"span");
        if (! span.empty()) {
          vector<string> ij = Tokenize(span, "-");
          if (ij.size() != 1 && ij.size() != 2) {
            TRACE_ERR("ERROR: span attribute must be of the form \"i-j\" or \"i\": " << line << endl);
            return false;
          }
          startPos = atoi(ij[0].c_str());
          if (ij.size() == 1) endPos = startPos + 1;
          else endPos = atoi(ij[1].c_str()) + 1;
        }

        VERBOSE(3,"XML TAG " << tagName << " (" << tagContent << ") spanning " << startPos << " to " << (endPos-1) << " complete, commence processing" << endl);

        if (startPos >= endPos) {
          TRACE_ERR("ERROR: tag " << tagName << " must span at least one word: " << line << endl);
          return false;
        }

        // may be either a input span label ("label"), or a specified output translation "translation"
        string label = ParseXmlTagAttribute(tagContent,"label");
        string translation = ParseXmlTagAttribute(tagContent,"translation");

        // specified label
        if (translation.length() == 0 && label.length() > 0) {
          WordsRange range(startPos,endPos-1); // really?
          XMLParseOutput item(label, range);
          sourceLabels.push_back(item);
        }

        // specified translations -> vector of phrases, separated by "||"
        if (translation.length() > 0 && StaticData::Instance().GetXmlInputType() != XmlIgnore) {
          vector<string> altTexts = TokenizeMultiCharSeparator(translation, "||");
          vector<string> altLabel = TokenizeMultiCharSeparator(label, "||");
          vector<string> altProbs = TokenizeMultiCharSeparator(ParseXmlTagAttribute(tagContent,"prob"), "||");
          //TRACE_ERR("number of translations: " << altTexts.size() << endl);
          for (size_t i=0; i<altTexts.size(); ++i) {
            // set target phrase
            TargetPhrase targetPhrase;
            targetPhrase.CreateFromString(Output, outputFactorOrder,altTexts[i],factorDelimiter, NULL);

            // set constituent label
            string targetLHSstr;
            if (altLabel.size() > i && altLabel[i].size() > 0) {
              targetLHSstr = altLabel[i];
            } else {
              const UnknownLHSList &lhsList = StaticData::Instance().GetUnknownLHS();
              UnknownLHSList::const_iterator iterLHS = lhsList.begin();
              targetLHSstr = iterLHS->first;
            }
            Word *targetLHS = new Word(true);
            targetLHS->CreateFromString(Output, outputFactorOrder, targetLHSstr, true);
            CHECK(targetLHS->GetFactor(0) != NULL);
            targetPhrase.SetTargetLHS(targetLHS);

            // not tested
            Phrase sourcePhrase = this->GetSubString(WordsRange(startPos,endPos-1));

            // get probability
            float probValue = 1;
            if (altProbs.size() > i && altProbs[i].size() > 0) {
              probValue = Scan<float>(altProbs[i]);
            }
            // convert from prob to log-prob
            float scoreValue = FloorScore(TransformScore(probValue));
            targetPhrase.SetXMLScore(scoreValue);
            targetPhrase.Evaluate(sourcePhrase);

            // set span and create XmlOption
            WordsRange range(startPos+1,endPos);
            XmlOption *option = new XmlOption(range,targetPhrase);
            CHECK(option);
            xmlOptions.push_back(option);

            VERBOSE(2,"xml translation = [" << range << "] " << targetLHSstr << " -> " << altTexts[i] << " prob: " << probValue << endl);
          }
          altTexts.clear();
          altProbs.clear();
        }
      }
    }
  }
  // we are done. check if there are tags that are still open
  if (tagStack.size() > 0) {
    TRACE_ERR("ERROR: some opened tags were never closed: " << line << endl);
    return false;
  }

  // return de-xml'ed sentence in line
  line = cleanLine;
  return true;
}
开发者ID:akartbayev,项目名称:mosesdecoder,代码行数:101,代码来源:TreeInput.cpp

示例12: pvertex

//! populate this InputType with data from in stream
int ForestInput::
Read(std::istream &in,
     std::vector<FactorType> const& factorOrder,
     AllOptions const& opts)
{
  using Syntax::F2S::Forest;

  m_forest = boost::make_shared<Forest>();
  m_rootVertex = NULL;
  m_vertexSet.clear();

  std::string line;
  if (std::getline(in, line, '\n').eof()) {
    return 0;
  }

  // The first line contains the sentence number.  We ignore this and skip
  // straight to the second line, which contains the sentence string.
  std::string sentence;
  std::getline(in, sentence);

  // If the next line is blank then there was a parse failure.  Otherwise,
  // the next line and any subsequent non-blank lines contain hyperedges.
  std::getline(in, line);
  if (line == "") {
    // Parse failure.  We treat this as an empty sentence.
    sentence = "";
    // The next line will be blank too.
    std::getline(in, line);
  } else {
    do {
      ParseHyperedgeLine(line, factorOrder);
      std::getline(in, line);
    } while (line != "");
  }

  // Do base class Read().
  // TODO Check if this is actually necessary.  TreeInput does it, but I'm
  // not sure ForestInput needs to.
  std::stringstream strme;
  strme << "<s> " << sentence << " </s>" << std::endl;
  Sentence::Read(strme, factorOrder, opts);

  // Find the maximum end position of any vertex (0 if forest is empty).
  std::size_t maxEnd = FindMaxEnd(*m_forest);

  // Determine which vertices are the top vertices.
  std::vector<Forest::Vertex *> topVertices;
  if (!m_forest->vertices.empty()) {
    FindTopVertices(*m_forest, topVertices);
    assert(topVertices.size() >= 1);
  }

  // Add <s> vertex.
  Forest::Vertex *startSymbol = NULL;
  {
    Word symbol;
    symbol.CreateFromString(Input, factorOrder, "<s>", false);
    Syntax::PVertex pvertex(Range(0, 0), symbol);
    startSymbol = new Forest::Vertex(pvertex);
    m_forest->vertices.push_back(startSymbol);
  }

  // Add </s> vertex.
  Forest::Vertex *endSymbol = NULL;
  {
    Word symbol;
    symbol.CreateFromString(Input, factorOrder, "</s>", false);
    Syntax::PVertex pvertex(Range(maxEnd+1, maxEnd+1), symbol);
    endSymbol = new Forest::Vertex(pvertex);
    m_forest->vertices.push_back(endSymbol);
  }

  // Add root vertex.
  {
    Word symbol;
    symbol.CreateFromString(Input, factorOrder, "Q", true);
    Syntax::PVertex pvertex(Range(0, maxEnd+1), symbol);
    m_rootVertex = new Forest::Vertex(pvertex);
    m_forest->vertices.push_back(m_rootVertex);
  }

  // Add root's incoming hyperedges.
  if (topVertices.empty()) {
    Forest::Hyperedge *e = new Forest::Hyperedge();
    e->head = m_rootVertex;
    e->tail.push_back(startSymbol);
    e->tail.push_back(endSymbol);
    m_rootVertex->incoming.push_back(e);
  } else {
    // Add a hyperedge between [Q] and each top vertex.
    for (std::vector<Forest::Vertex *>::const_iterator
         p = topVertices.begin(); p != topVertices.end(); ++p) {
      Forest::Hyperedge *e = new Forest::Hyperedge();
      e->head = m_rootVertex;
      e->tail.push_back(startSymbol);
      e->tail.push_back(*p);
      e->tail.push_back(endSymbol);
      m_rootVertex->incoming.push_back(e);
//.........这里部分代码省略.........
开发者ID:hschreib,项目名称:mosesdecoder,代码行数:101,代码来源:ForestInput.cpp


注:本文中的Word::CreateFromString方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。