本文整理汇总了C++中Sentence::add_word方法的典型用法代码示例。如果您正苦于以下问题:C++ Sentence::add_word方法的具体用法?C++ Sentence::add_word怎么用?C++ Sentence::add_word使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Sentence
的用法示例。
在下文中一共展示了Sentence::add_word方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: if
vector <Sentence> Dataset<DType>::tokenize()
{
// however, this function can only be dataset specific
// it can not be reused with other datasets
fstream dataFile;
dataFile.open(filePath_, ios::in);
string word;
// result: stores the dataset sentence by sentence
vector <Sentence> result;
// repeated_list: stores all words appeared in the dataset
// with their numbers of occurences
map<string, int> dictionary;
Sentence sentence;
while(!dataFile.eof())
{
dataFile >> word;
if (word == "Book")
{
string line;
getline(dataFile, line);
continue;
}
char terminator = word.back();
transform(word.begin(), word.end(), word.begin(), ::tolower);
// judge whether it is the last word of a sentence
if (terminator != '.' && terminator != ';' && terminator != '?')
{
// the word is a normal word
if ( (terminator >= 97 && terminator <= 122) ||
(terminator >= 65 && terminator <= 90))
{
sentence.add_word(word);
dictionary[word] += 1;
}
else if (terminator >= '0' && terminator <= '9')
{
continue;
}
else
{
sentence.add_word(word.substr(0, word.length() - 1));
dictionary[word.substr(0, word.length() - 1)] += 1;
}
}
else
{
sentence.add_word(word.substr(0, word.length() - 1));
dictionary[word.substr(0, word.length() - 1)] += 1;
result.push_back(sentence);
sentence.clear();
}
}
// select voc_size_ most frequently used words
// to form our vocabulary
vector <pair<string ,int>> dict;
for(auto it = dictionary.begin(); it != dictionary.end(); ++it)
{
dict.push_back(*it);
}
auto cmp = [](pair<string, int> const &a, pair <string, int> const &b)
{
return a.second >= b.second;
};
sort(dict.begin(), dict.end(), cmp);
for(size_t i = voc_size_; i < dict.size(); ++i)
dictionary.erase(dict[i].first);
// dictionary: stores the most frequently used voc_size_ words
// we remap them to DType (without word2vec)
int val = 3;
for(auto it = dictionary.begin(); it != dictionary.end(); ++it)
{
word_to_index_[it -> first] = static_cast<DType>(val);
index_to_word_[static_cast<DType>(val)] = it -> first;
val++;
}
dataFile.close();
return result;
}