本文整理汇总了C++中Sentence::clear方法的典型用法代码示例。如果您正苦于以下问题:C++ Sentence::clear方法的具体用法?C++ Sentence::clear怎么用?C++ Sentence::clear使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Sentence
的用法示例。
在下文中一共展示了Sentence::clear方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: readCorpus
void readCorpus(const string &fn, SentenceCollection &sc) {
ifstream f(fn.c_str());
if (!f.is_open()) {
cerr << "can't open \"" << fn << endl;
throw;
}
string s;
Sentence sent;
while (getline(f, s)) {
//cerr << "reading line \"" << s << "\"" << endl;
if ("sent" == s) {
Token t("SentBegin", makeVariants("SBEG"));
sent.push_back(t);
continue;
} else if ("/sent" == s) {
Token t("SentEnd", makeVariants("SEND"));
sent.push_back(t);
sc.push_back(sent);
sent.clear();
continue;
} else {
vector<string> fields;
split(s, '\t', fields);
if (fields.size() < 2)
continue;
int id;
string word;
stringstream ss(s);
ss >> id >> word;
set<MorphInterp> variants;
for (size_t i = 2; i < fields.size(); i++) {
if (0 == fields[i].size())
continue;
stringstream ss(fields[i]);
unsigned int lemmaId;
string lemma;
ss >> lemmaId >> lemma;
string sgrm;
string t;
while (ss >> t) {
if (sgrm.size() > 0) sgrm += " ";
sgrm += t;
}
MorphInterp ts(lemmaId, sgrm);
if (0 == ts.size()) {
cerr << "\"" << s << "\" - \"" << sgrm << "\"" << sgrm.size() << endl;
throw;
}
variants.insert(ts);
}
Token t(word, variants);
sent.push_back(t, id);
}
}
if (sent.size() > 0)
sc.push_back(sent);
}
示例2: letter
void test1()
{
Grammar::Rules rules;
Grammar::Symbols symbols;
const SymbolValue LETTER = 0;
const SymbolValue WORD = 1;
const SymbolValue SPACES = 3;
const SymbolValue PHRASE = 4;
const SymbolValue ORWORD = 5;
const SymbolValue SELECT = 6;
Symbol letter(NONTERMINAL, LETTER);
Symbol word(NONTERMINAL, WORD);
Symbol spaces(NONTERMINAL, SPACES);
Symbol phrase(NONTERMINAL, PHRASE);
Symbol orword(NONTERMINAL, ORWORD);
Symbol select(NONTERMINAL, SELECT);
symbols.push_back(letter);
symbols.push_back(word);
symbols.push_back(spaces);
symbols.push_back(Symbol('a'));
symbols.push_back(Symbol('b'));
symbols.push_back(Symbol('c'));
symbols.push_back(Symbol('d'));
rules.push_back( Rule(letter, sentence("a")) );
rules.push_back( Rule(letter, sentence("b")) );
rules.push_back( Rule(letter, sentence("c")) );
rules.push_back( Rule(letter, sentence("d")) );
rules.push_back( Rule(letter, sentence("r")) );
rules.push_back( Rule(letter, sentence("o")) );
rules.push_back( Rule(spaces, sentence(" ")) );
Sentence sentence;
sentence.push_back(spaces);
sentence.push_back(Symbol(' '));
rules.push_back( Rule(spaces, sentence) );
sentence.clear();
sentence.push_back(letter);
rules.push_back( Rule(word, sentence) );
sentence.clear();
sentence.push_back(word);
sentence.push_back(letter);
rules.push_back( Rule(word, sentence) );
sentence.clear();
sentence.push_back(spaces);
sentence.push_back(Symbol('o'));
sentence.push_back(Symbol('r'));
sentence.push_back(spaces);
rules.push_back( Rule(orword, sentence) );
sentence.clear();
sentence.push_back(word);
sentence.push_back(orword);
sentence.push_back(word);
rules.push_back( Rule(select, sentence) );
sentence.clear();
sentence.push_back(select);
rules.push_back( Rule(phrase, sentence) );
sentence.clear();
Parser parser(Grammar(symbols, rules, phrase));
CPPUNIT_ASSERT(parser.parse("abbb or bc"));
CPPUNIT_ASSERT(parser.parse("abbb or bc"));
CPPUNIT_ASSERT(parser.parse("or or cdddddd"));
CPPUNIT_ASSERT(parser.parse("cdddddd or or"));
CPPUNIT_ASSERT(!parser.parse("aa or"));
CPPUNIT_ASSERT(!parser.parse("or aa"));
CPPUNIT_ASSERT(!parser.parse("abbb or bc or"));
}
示例3: if
vector <Sentence> Dataset<DType>::tokenize()
{
// however, this function can only be dataset specific
// it can not be reused with other datasets
fstream dataFile;
dataFile.open(filePath_, ios::in);
string word;
// result: stores the dataset sentence by sentence
vector <Sentence> result;
// repeated_list: stores all words appeared in the dataset
// with their numbers of occurences
map<string, int> dictionary;
Sentence sentence;
while(!dataFile.eof())
{
dataFile >> word;
if (word == "Book")
{
string line;
getline(dataFile, line);
continue;
}
char terminator = word.back();
transform(word.begin(), word.end(), word.begin(), ::tolower);
// judge whether it is the last word of a sentence
if (terminator != '.' && terminator != ';' && terminator != '?')
{
// the word is a normal word
if ( (terminator >= 97 && terminator <= 122) ||
(terminator >= 65 && terminator <= 90))
{
sentence.add_word(word);
dictionary[word] += 1;
}
else if (terminator >= '0' && terminator <= '9')
{
continue;
}
else
{
sentence.add_word(word.substr(0, word.length() - 1));
dictionary[word.substr(0, word.length() - 1)] += 1;
}
}
else
{
sentence.add_word(word.substr(0, word.length() - 1));
dictionary[word.substr(0, word.length() - 1)] += 1;
result.push_back(sentence);
sentence.clear();
}
}
// select voc_size_ most frequently used words
// to form our vocabulary
vector <pair<string ,int>> dict;
for(auto it = dictionary.begin(); it != dictionary.end(); ++it)
{
dict.push_back(*it);
}
auto cmp = [](pair<string, int> const &a, pair <string, int> const &b)
{
return a.second >= b.second;
};
sort(dict.begin(), dict.end(), cmp);
for(size_t i = voc_size_; i < dict.size(); ++i)
dictionary.erase(dict[i].first);
// dictionary: stores the most frequently used voc_size_ words
// we remap them to DType (without word2vec)
int val = 3;
for(auto it = dictionary.begin(); it != dictionary.end(); ++it)
{
word_to_index_[it -> first] = static_cast<DType>(val);
index_to_word_[static_cast<DType>(val)] = it -> first;
val++;
}
dataFile.close();
return result;
}