本文整理汇总了C++中Vocabulary::size方法的典型用法代码示例。如果您正苦于以下问题:C++ Vocabulary::size方法的具体用法?C++ Vocabulary::size怎么用?C++ Vocabulary::size使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Vocabulary
的用法示例。
在下文中一共展示了Vocabulary::size方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: make_da
void EmbeddedDA::make_da(std::string &pathtotreefile, ValueArrayIndex *value_array_index, Vocabulary &vocab)
{
TreeFile tf(pathtotreefile, vocab);
size_t unigram_type = vocab.size();
resize_array(unigram_type*20);
da_array[0].base.base_val = 0;
size_t total = tf.get_totalsize();
logger << "EmbeddedDA[" << daid << "] total=" << total << Logger::endi;
size_t order = tf.get_ngramorder();
int *words = new int[unigram_type+1];
float *values = new float[unigram_type+1];
int *history = new int[order-1];
size_t wordssize = 0;
size_t historysize = 0;
size_t terminal_pos=(size_t)-1;
memset(history, 0, sizeof(int)*(order-1));
size_t tenpercent = total / 10;
for(size_t i = 0; i < total; i++){
if((i+1) % tenpercent == 0){
logger << "EmbeddedDA[" << daid << "] " << (i+1)/tenpercent << "0% done." << Logger::endi;
}
unsigned short n;
VocabId *ngram;
float value;
tf.get_ngram(n,ngram,value);
if(n==2 && ngram[0]==1 && ngram[1]%datotal!=daid){
delete [] ngram;
continue;
}else if(ngram[0]%datotal!=daid && ngram[0]!=1){
delete [] ngram;
continue;
}
if(historysize != (size_t)n-1
|| memcmp(history, ngram, sizeof(int)*(n-1))!=0){
unsigned now=0;
for(size_t j = 0; j < historysize; j++){
now = get_pos(history[j], now);
}
if(historysize!=0 && history[historysize-1]==1){ // context ends for <#>.
det_base(words, values, wordssize, now);
}else{
det_base(words, NULL, wordssize, now);
if(historysize!=0 && terminal_pos!=(size_t)-1){
unsigned terminal=get_terminal(now);
value_id[terminal] = value_array_index->lookup(values[terminal_pos]);
}
}
memcpy(history, ngram, sizeof(int)*(n-1));
historysize = n-1;
wordssize=0;
terminal_pos=(size_t)-1;
}
if(ngram[n-1]==1){
terminal_pos=wordssize;
}
words[wordssize]=ngram[n-1];
values[wordssize]=value;
wordssize++;
delete [] ngram;
}
unsigned now=0;
for(size_t j = 0; j < historysize; j++){
now = get_pos(history[j], now);
}
if(historysize!=0 && history[historysize-1]==1){
det_base(words, values, wordssize, now);
}else{
det_base(words, NULL, wordssize, now);
if(historysize!=0 && terminal_pos!=(size_t)-1){
unsigned terminal=get_terminal(now);
value_id[terminal] = value_array_index->lookup(values[terminal_pos]);
}
}
replace_value();
delete [] history;
delete [] words;
delete [] values;
}
示例2: main
int main(int argc, char **argv) {
uint64_t hidden_layer_size = 100;
int min_count = 5;
TrainPara train_para;
string save_vocab_file;
string read_vocab_file;
string train_file;
string vector_file;
if (argc < 3) {
cerr << usage << endl;
return -1;
}
train_file = argv[argc - 2];
vector_file = argv[argc - 1];
for (int i = 1; i < argc - 2; i += 2) {
string arg = argv[i];
const char* val = argv[i + 1];
if (arg == "-size") {
hidden_layer_size = atoi(val);
}
else if (arg == "-type") {
if (string(val) == "cbow") {
train_para.type = CBOW;
}
else if (string(val) == "skip-gram") {
train_para.type = SKIP_GRAM;
}
else {
cerr << "unknown -type: " << val << endl;;
return -1;
}
}
else if (arg == "-algo") {
if (string(val) == "ns") {
train_para.algo = NEG_SAMPLING;
}
else if (string(val) == "hs") {
train_para.algo = HIER_SOFTMAX;
}
else {
cerr << "unknown -algo: " << val << endl;;
return -1;
}
}
else if (arg == "-neg-sample") {
train_para.neg_sample_cnt = atoi(val);
}
else if (arg == "-window") {
train_para.window_size = atoi(val);
}
else if (arg == "-subsample") {
train_para.subsample_thres = atof(val);
}
else if (arg == "-thread") {
train_para.thread_cnt = atoi(val);
}
else if (arg == "-iter") {
train_para.iter_cnt = atoi(val);
}
else if (arg == "-min-count") {
min_count = atoi(val);
}
else if (arg == "-alpha") {
train_para.alpha = atof(val);
}
else if (arg == "-save-vocab") {
save_vocab_file = val;
}
else if (arg == "-read-vocab") {
read_vocab_file = val;
}
else {
cerr << "unknow argument: '" << arg << "'" << endl;
return -1;
}
}
if (train_para.alpha < 0) {
if (train_para.type == CBOW) {
train_para.alpha = 0.05;
}
else {
train_para.alpha = 0.025;
}
}
cerr << "parameters:" << endl
<< "size = " << hidden_layer_size << endl
<< "type = " << ((train_para.type==CBOW)?"cbow":"skip-gram") << endl
<< "algo = " << ((train_para.algo==HIER_SOFTMAX)?"hs":"neg sampling") << endl
<< "neg sampling cnt = " << train_para.neg_sample_cnt << endl
<< "window = " << train_para.window_size << endl
<< "subsample thres = " << train_para.subsample_thres << endl
<< "thread = " << train_para.thread_cnt << endl
<< "iter = " << train_para.iter_cnt << endl
<< "min count = " << min_count << endl
<< "alpha = " << train_para.alpha << endl
//.........这里部分代码省略.........