本文整理汇总了C++中Vocabulary::total_cnt方法的典型用法代码示例。如果您正苦于以下问题:C++ Vocabulary::total_cnt方法的具体用法?C++ Vocabulary::total_cnt怎么用?C++ Vocabulary::total_cnt使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Vocabulary
的用法示例。
在下文中一共展示了Vocabulary::total_cnt方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: parse_sentence
int parse_sentence(const string& sentence, const Vocabulary& vocab, real subsample_thres, unsigned* p_seed, vector<uint64_t>* words) {
istringstream iss(sentence);
uint64_t total_cnt = vocab.total_cnt();
int word_cnt = 0;
string word;
while (iss >> word) {
uint64_t word_id;
if (!vocab.find_word_id(word, &word_id)) {
continue;
}
++word_cnt;
if (subsample_thres > 0) {
double t = subsample_thres * total_cnt / vocab.get_word_cnt(word_id);
double remain_prob = (sqrt(1 / t) + 1) * t; // not the same as the paper, which is sqrt(t)
if (remain_prob < static_cast<real>(rand_r(p_seed)) / RAND_MAX) {
continue;
}
}
words->push_back(word_id);
}
return word_cnt;
}
示例2: main
//.........这里部分代码省略.........
}
else if (arg == "-min-count") {
min_count = atoi(val);
}
else if (arg == "-alpha") {
train_para.alpha = atof(val);
}
else if (arg == "-save-vocab") {
save_vocab_file = val;
}
else if (arg == "-read-vocab") {
read_vocab_file = val;
}
else {
cerr << "unknow argument: '" << arg << "'" << endl;
return -1;
}
}
if (train_para.alpha < 0) {
if (train_para.type == CBOW) {
train_para.alpha = 0.05;
}
else {
train_para.alpha = 0.025;
}
}
cerr << "parameters:" << endl
<< "size = " << hidden_layer_size << endl
<< "type = " << ((train_para.type==CBOW)?"cbow":"skip-gram") << endl
<< "algo = " << ((train_para.algo==HIER_SOFTMAX)?"hs":"neg sampling") << endl
<< "neg sampling cnt = " << train_para.neg_sample_cnt << endl
<< "window = " << train_para.window_size << endl
<< "subsample thres = " << train_para.subsample_thres << endl
<< "thread = " << train_para.thread_cnt << endl
<< "iter = " << train_para.iter_cnt << endl
<< "min count = " << min_count << endl
<< "alpha = " << train_para.alpha << endl
<< "save vocab = " << save_vocab_file << endl
<< "read vocab = " << read_vocab_file << endl
<< "training file = " << train_file << endl
<< "word vector file = " << vector_file << endl
<< endl;
print_log("start ...");
ifstream ifs_train(train_file.c_str());
if (!ifs_train) {
cerr << "can't open: " << train_file << endl;
return -1;
}
Vocabulary vocab;
HuffmanTree* huffman_tree = NULL;
vocab.parse(ifs_train, min_count);
cerr << "vocab size = " << vocab.size() << ", total words count = " << vocab.total_cnt() << endl;
print_log("calc vocab finished ...");
ifs_train.close();
if (!save_vocab_file.empty()) {
ofstream ofs_vocab(save_vocab_file.c_str());
if (!ofs_vocab) {
cerr << "can't write to " << save_vocab_file << endl;
return -1;
}
vocab.save(ofs_vocab);
print_log("save vocab finished ...");
}
if (train_para.algo == NEG_SAMPLING) {
vocab.init_sampling_table();
print_log("init sampling table finished ...");
}
else if (train_para.algo == HIER_SOFTMAX) {
huffman_tree = new HuffmanTree(vocab.vocab());
print_log("grow huffman tree finished ...");
}
Net net(vocab.size(), hidden_layer_size);
print_log("net init finished ...");
if (!train(train_file, vocab, *huffman_tree, net, train_para)) {
cerr << "training failed" << endl;
return -1;
}
print_log("training finished ...");
ofstream ofs_result(vector_file.c_str());
if (!ofs_result) {
cerr << "can't write to " << vector_file << endl;
return -1;
}
save_word_vec(ofs_result, net, vocab);
ofs_result.close();
print_log("saving word vector finished ...");
delete huffman_tree;
}