当前位置: 首页>>代码示例>>C++>>正文


C++ Sentence::clear方法代码示例

本文整理汇总了C++中Sentence::clear方法的典型用法代码示例。如果您正苦于以下问题:C++ Sentence::clear方法的具体用法?C++ Sentence::clear怎么用?C++ Sentence::clear使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Sentence的用法示例。


在下文中一共展示了Sentence::clear方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: readCorpus

void readCorpus(const string &fn, SentenceCollection &sc) {
  ifstream f(fn.c_str());
  if (!f.is_open()) {
    cerr << "can't open \"" << fn << endl;
    throw;
  }

  string s;
  Sentence sent;
  while (getline(f, s)) {
    //cerr << "reading line \"" << s << "\"" << endl;
    if ("sent" == s) {
      Token t("SentBegin", makeVariants("SBEG"));
      sent.push_back(t);
      continue;
    } else if ("/sent" == s) {
      Token t("SentEnd", makeVariants("SEND"));
      sent.push_back(t);
      sc.push_back(sent);
      sent.clear();
      continue;
    } else {
      vector<string> fields;
      split(s, '\t', fields);
      if (fields.size() < 2) 
        continue;      

      int id;
      string word;

      stringstream ss(s);
      ss >> id >> word;

      set<MorphInterp> variants;
      for (size_t i = 2; i < fields.size(); i++) {
        if (0 == fields[i].size())
          continue;

        stringstream ss(fields[i]);
        unsigned int lemmaId;
        string lemma;
        ss >> lemmaId >> lemma;
        
        string sgrm;
        string t;
        while (ss >> t) { 
          if (sgrm.size() > 0) sgrm += " ";
          sgrm += t; 
        }

        MorphInterp ts(lemmaId, sgrm); 
        if (0 == ts.size()) {
          cerr << "\"" << s << "\" - \"" << sgrm << "\"" << sgrm.size() << endl;
          throw;
        }
        variants.insert(ts);
      }

      Token t(word, variants);
      sent.push_back(t, id);
    }
  }

  if (sent.size() > 0) 
    sc.push_back(sent);
}
开发者ID:OpenCorpora,项目名称:opencorpora,代码行数:66,代码来源:corpora_io.cpp

示例2: letter

    void test1()
    {
        Grammar::Rules rules;
        Grammar::Symbols symbols;

        const SymbolValue LETTER = 0;
        const SymbolValue WORD = 1;
        const SymbolValue SPACES = 3;
        const SymbolValue PHRASE = 4;
        const SymbolValue ORWORD = 5;
        const SymbolValue SELECT = 6;

        Symbol letter(NONTERMINAL, LETTER);
        Symbol word(NONTERMINAL, WORD);
        Symbol spaces(NONTERMINAL, SPACES);
        Symbol phrase(NONTERMINAL, PHRASE);
        Symbol orword(NONTERMINAL, ORWORD);
        Symbol select(NONTERMINAL, SELECT);

        symbols.push_back(letter);
        symbols.push_back(word);
        symbols.push_back(spaces);
        symbols.push_back(Symbol('a'));
        symbols.push_back(Symbol('b'));
        symbols.push_back(Symbol('c'));
        symbols.push_back(Symbol('d'));

        rules.push_back( Rule(letter, sentence("a")) );
        rules.push_back( Rule(letter, sentence("b")) );
        rules.push_back( Rule(letter, sentence("c")) );
        rules.push_back( Rule(letter, sentence("d")) );
        rules.push_back( Rule(letter, sentence("r")) );
        rules.push_back( Rule(letter, sentence("o")) );
        rules.push_back( Rule(spaces, sentence(" ")) );

        Sentence sentence;
        sentence.push_back(spaces);
        sentence.push_back(Symbol(' '));
        rules.push_back( Rule(spaces, sentence) );
        sentence.clear();

        sentence.push_back(letter);
        rules.push_back( Rule(word, sentence) );
        sentence.clear();

        sentence.push_back(word);
        sentence.push_back(letter);
        rules.push_back( Rule(word, sentence) );
        sentence.clear();

        sentence.push_back(spaces);
        sentence.push_back(Symbol('o'));
        sentence.push_back(Symbol('r'));
        sentence.push_back(spaces);
        rules.push_back( Rule(orword, sentence) );
        sentence.clear();

        sentence.push_back(word);
        sentence.push_back(orword);
        sentence.push_back(word);
        rules.push_back( Rule(select, sentence) );
        sentence.clear();

        sentence.push_back(select);
        rules.push_back( Rule(phrase, sentence) );
        sentence.clear();

        Parser parser(Grammar(symbols, rules, phrase));

        CPPUNIT_ASSERT(parser.parse("abbb or bc"));
        CPPUNIT_ASSERT(parser.parse("abbb  or  bc"));
        CPPUNIT_ASSERT(parser.parse("or or cdddddd"));
        CPPUNIT_ASSERT(parser.parse("cdddddd or or"));
        CPPUNIT_ASSERT(!parser.parse("aa or"));
        CPPUNIT_ASSERT(!parser.parse("or aa"));
        CPPUNIT_ASSERT(!parser.parse("abbb or bc or"));
    }
开发者ID:xffox,项目名称:piclex,代码行数:77,代码来源:ParserTest.cpp

示例3: if

vector <Sentence> Dataset<DType>::tokenize()
{
  // however, this function can only be dataset specific
  // it can not be reused with other datasets
  fstream dataFile;
  dataFile.open(filePath_, ios::in);
  string word;

  // result: stores the dataset sentence by sentence
  vector <Sentence> result;

  // repeated_list: stores all words appeared in the dataset
  // with their numbers of occurences
  map<string, int> dictionary;
  Sentence sentence;


  while(!dataFile.eof())
    {
      dataFile >> word;
      if (word == "Book")
	{
	  string line;
	  getline(dataFile, line);
	  continue;
	}
      char terminator = word.back();
      transform(word.begin(), word.end(), word.begin(), ::tolower);
      // judge whether it is the last word of a sentence
      if (terminator != '.' && terminator != ';' && terminator != '?')
	{
	  // the word is a normal word
	  if ( (terminator >= 97 && terminator <= 122) ||
	       (terminator >= 65 && terminator <= 90))
	    {
	      sentence.add_word(word);
	      dictionary[word] += 1;
	    }
	  else if (terminator >= '0' && terminator <= '9')
	    {
	      continue;
	    }
	  else
	    {
	      sentence.add_word(word.substr(0, word.length() - 1));
	      dictionary[word.substr(0, word.length() - 1)] += 1;
	    }
	}
      else
	{
	  sentence.add_word(word.substr(0, word.length() - 1));
	  dictionary[word.substr(0, word.length() - 1)] += 1;
	  result.push_back(sentence);

	  sentence.clear();
	}

    }


  // select voc_size_ most frequently used words
  // to form our vocabulary
  vector <pair<string ,int>> dict;
  for(auto it = dictionary.begin(); it != dictionary.end(); ++it)
    {
      dict.push_back(*it);
    }
  auto cmp = [](pair<string, int> const &a, pair <string, int> const &b)
    {
      return a.second >= b.second;
    };
  sort(dict.begin(), dict.end(), cmp);

  for(size_t i = voc_size_; i < dict.size(); ++i)
    dictionary.erase(dict[i].first);

  // dictionary: stores the most frequently used voc_size_ words
  // we remap them to DType (without word2vec)
  int val = 3;
  for(auto it = dictionary.begin(); it != dictionary.end(); ++it)
    {
      word_to_index_[it -> first] = static_cast<DType>(val);
      index_to_word_[static_cast<DType>(val)] = it -> first;
      val++;
    }

  dataFile.close();

  return result;
}
开发者ID:xuehy,项目名称:toy-RNN,代码行数:90,代码来源:dataset.cpp


注:本文中的Sentence::clear方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。