当前位置: 首页>>代码示例>>C++>>正文


C++ Vocabulary::init_sampling_table方法代码示例

本文整理汇总了C++中Vocabulary::init_sampling_table方法的典型用法代码示例。如果您正苦于以下问题:C++ Vocabulary::init_sampling_table方法的具体用法?C++ Vocabulary::init_sampling_table怎么用?C++ Vocabulary::init_sampling_table使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Vocabulary的用法示例。


在下文中一共展示了Vocabulary::init_sampling_table方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: main


//.........这里部分代码省略.........
        }
        else if (arg == "-min-count") {
            min_count = atoi(val);
        }
        else if (arg == "-alpha") {
            train_para.alpha = atof(val);
        }
        else if (arg == "-save-vocab") {
            save_vocab_file = val;
        }
        else if (arg == "-read-vocab") {
            read_vocab_file = val;
        }
        else {
            cerr << "unknow argument: '" << arg << "'" << endl;
            return -1;
        }
    }

    if (train_para.alpha < 0) {
        if (train_para.type == CBOW) {
            train_para.alpha = 0.05;
        }
        else {
            train_para.alpha = 0.025;
        }
    }

    cerr << "parameters:" << endl
         << "size = " << hidden_layer_size << endl
         << "type = " << ((train_para.type==CBOW)?"cbow":"skip-gram") << endl
         << "algo = " << ((train_para.algo==HIER_SOFTMAX)?"hs":"neg sampling") << endl
         << "neg sampling cnt = " << train_para.neg_sample_cnt << endl
         << "window = " << train_para.window_size << endl
         << "subsample thres = " << train_para.subsample_thres << endl
         << "thread = " << train_para.thread_cnt << endl
         << "iter = " << train_para.iter_cnt << endl
         << "min count = " << min_count << endl
         << "alpha = " << train_para.alpha << endl
         << "save vocab = " << save_vocab_file << endl
         << "read vocab = " << read_vocab_file << endl
         << "training file = " << train_file << endl
         << "word vector file = " << vector_file << endl
         << endl;
    print_log("start ...");

    ifstream ifs_train(train_file.c_str());
    if (!ifs_train) {
        cerr << "can't open: " << train_file << endl;
        return -1;
    }
    
    Vocabulary vocab;
    HuffmanTree* huffman_tree = NULL;
    vocab.parse(ifs_train, min_count);
    cerr << "vocab size = " << vocab.size() << ", total words count = " << vocab.total_cnt() << endl;
    print_log("calc vocab finished ...");
    ifs_train.close();

    if (!save_vocab_file.empty()) {
        ofstream ofs_vocab(save_vocab_file.c_str());
        if (!ofs_vocab) {
            cerr << "can't write to " << save_vocab_file << endl;
            return -1;
        }
        vocab.save(ofs_vocab);
        print_log("save vocab finished ...");
    }

    if (train_para.algo == NEG_SAMPLING) {
        vocab.init_sampling_table();
        print_log("init sampling table finished ...");
    }
    else if (train_para.algo == HIER_SOFTMAX) {
        huffman_tree = new HuffmanTree(vocab.vocab());
        print_log("grow huffman tree finished ...");
    }


    Net net(vocab.size(), hidden_layer_size);
    print_log("net init finished ...");

    if (!train(train_file, vocab, *huffman_tree, net, train_para)) {
        cerr << "training failed" << endl;
        return -1;
    }
    print_log("training finished ...");


    ofstream ofs_result(vector_file.c_str());
    if (!ofs_result) {
        cerr << "can't write to " << vector_file << endl;
        return -1;
    }
    save_word_vec(ofs_result, net, vocab);
    ofs_result.close();
    print_log("saving word vector finished ...");

    delete huffman_tree;
}
开发者ID:yong-wang,项目名称:word2vecPlus,代码行数:101,代码来源:main.cpp


注:本文中的Vocabulary::init_sampling_table方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。