当前位置: 首页>>代码示例>>C++>>正文


C++ utils::Vocab方法代码示例

本文整理汇总了C++中utils::Vocab方法的典型用法代码示例。如果您正苦于以下问题:C++ utils::Vocab方法的具体用法?C++ utils::Vocab怎么用?C++ utils::Vocab使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在utils的用法示例。


在下文中一共展示了utils::Vocab方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: main

int main (int argc,  char* argv[]) {
    GFLAGS_NAMESPACE::SetUsageMessage(
        "\n"
        "Sentiment Analysis using single LSTM\n"
        "------------------------------------\n"
        "\n"
        " @author Jonathan Raiman\n"
        " @date April 7th 2015"
    );
    GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true);

    if (FLAGS_patience > 40) FLAGS_patience = 40;

    int memory_penalty_curve_type;
    if (FLAGS_memory_penalty_curve == "flat") {
        memory_penalty_curve_type = 0;
    } else if (FLAGS_memory_penalty_curve == "linear") {
        memory_penalty_curve_type = 1;
    } else if (FLAGS_memory_penalty_curve == "square") {
        memory_penalty_curve_type = 2;
    } else {
        utils::assert2(false, "memory_penalty_curve can only be flat, linear, or square.");
    }

    auto epochs = FLAGS_epochs;
    int rampup_time = 10;

    auto sentiment_treebank = SST::load(FLAGS_train);
    auto embedding          = Mat<REAL_t>(100, 0);
    auto word_vocab         = Vocab();
    if (!FLAGS_pretrained_vectors.empty())
        glove::load(FLAGS_pretrained_vectors, &embedding, &word_vocab, 50000);
    else
        word_vocab = SST::get_vocabulary(sentiment_treebank, FLAGS_min_occurence);
    auto vocab_size     = word_vocab.size();
    auto dataset        = SST::convert_trees_to_indexed_minibatches(
        word_vocab,
        sentiment_treebank,
        FLAGS_minibatch
    );
    auto validation_set = SST::convert_trees_to_indexed_minibatches(
        word_vocab,
        SST::load(FLAGS_validation),
        FLAGS_minibatch
    );

    pool = new ThreadPool(FLAGS_j);
    // Create a model with an embedding, and several stacks:

    auto stack_size  = std::max(FLAGS_stack_size, 1);

    auto model = FLAGS_load.empty() ? StackedGatedModel<REAL_t>(
        FLAGS_pretrained_vectors.empty() ? word_vocab.size() : 0,
        FLAGS_pretrained_vectors.empty() ? FLAGS_hidden : embedding.dims(1),
        FLAGS_hidden,
        stack_size,
        SST::label_names.size(),
        (FLAGS_shortcut && stack_size > 1) ? FLAGS_shortcut : false,
        FLAGS_memory_feeds_gates,
        FLAGS_memory_penalty) : StackedGatedModel<REAL_t>::load(FLAGS_load);

    if (FLAGS_shortcut && stack_size == 1) {
        std::cout << "shortcut flag ignored: Shortcut connections only take effect with stack size > 1" << std::endl;
    }

    // don't send the input vector to the
    // decoder:
    model.input_vector_to_decoder(false);

    std::cout << "model.input_vector_to_decoder() = " << model.input_vector_to_decoder() << std::endl;

    std::cout << " Unique Trees Loaded : " << sentiment_treebank.size() << std::endl
              << "        Example tree : " << *sentiment_treebank[sentiment_treebank.size()-1] << std::endl
              << "     Vocabulary size : " << vocab_size << std::endl
              << "      minibatch size : " << FLAGS_minibatch << std::endl
              << "   number of threads : " << FLAGS_j << std::endl
              << "        Dropout type : " << (FLAGS_fast_dropout ? "fast" : "default") << std::endl
              << "        Dropout Prob : " << FLAGS_dropout << std::endl
              << " Max training epochs : " << FLAGS_epochs << std::endl
              << "   First Hidden Size : " << model.hidden_sizes[0] << std::endl
              << "           LSTM type : " << (model.memory_feeds_gates ? "Graves 2013" : "Zaremba 2014") << std::endl
              << "          Stack size : " << model.hidden_sizes.size() << std::endl
              << " # training examples : " << dataset.size() * FLAGS_minibatch - (FLAGS_minibatch - dataset[dataset.size() - 1].size()) << std::endl
              << "     validation obj. : " << (FLAGS_validation_metric == 0 ? "overall" : "root") << std::endl
              << " # layers -> decoder : " << model.decoder.matrices.size() << std::endl
              << "              Solver : " << FLAGS_solver << std::endl;
    if (FLAGS_embedding_learning_rate > 0)
        std::cout << " Embedding step size : " << FLAGS_embedding_learning_rate << std::endl;

    if (!FLAGS_pretrained_vectors.empty()) {
        std::cout << "  Pretrained Vectors : " << FLAGS_pretrained_vectors << std::endl;
        model.embedding = embedding;
    }

    vector<vector<Mat<REAL_t>>>       thread_params;
    vector<vector<Mat<REAL_t>>>       thread_embedding_params;
    vector<StackedGatedModel<REAL_t>> thread_models;
    std::tie(thread_models, thread_embedding_params, thread_params) = utils::shallow_copy_multi_params(model, FLAGS_j, [&model](const Mat<REAL_t>& mat) {
        return &mat.w().memory() == &model.embedding.w().memory();
    });
//.........这里部分代码省略.........
开发者ID:byzhang,项目名称:dali-examples,代码行数:101,代码来源:sparse_lstm_sentiment.cpp

示例2: main

int main (int argc,  char* argv[]) {
    GFLAGS_NAMESPACE::SetUsageMessage(
        "\n"
        "Named Entity Recognition using single LSTM\n"
        "------------------------------------------\n"
        "\n"
        " @author Jonathan Raiman\n"
        " @date April 7th 2015"
    );
    GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true);

    int memory_penalty_curve_type;
    if (FLAGS_memory_penalty_curve        == "flat") {
        memory_penalty_curve_type = 0;
    } else if (FLAGS_memory_penalty_curve == "linear") {
        memory_penalty_curve_type = 1;
    } else if (FLAGS_memory_penalty_curve == "square") {
        memory_penalty_curve_type = 2;
    } else {
        utils::assert2(false, "memory_penalty_curve can only be flat, linear, or square.");
    }

    auto epochs = FLAGS_epochs;
    int rampup_time = 10;

    auto ner_data       = NER::load(FLAGS_train);
    auto embedding      = Mat<REAL_t>(100, 0);
    auto word_vocab     = Vocab();
    if (!FLAGS_pretrained_vectors.empty()) {
        glove::load(FLAGS_pretrained_vectors, &embedding, &word_vocab, 50000);
    } else {
        word_vocab = Vocab(NER::get_vocabulary(ner_data, FLAGS_min_occurence), true);
    }
    auto label_vocab    = Vocab(NER::get_label_vocabulary(ner_data), false);
    auto vocab_size     = word_vocab.size();
    auto dataset        = NER::convert_to_indexed_minibatches(
        word_vocab,
        label_vocab,
        ner_data,
        FLAGS_minibatch
    );
    // No validation set yet...
    decltype(dataset) validation_set;
    {
        auto ner_valid_data = NER::load(FLAGS_validation);
        validation_set = NER::convert_to_indexed_minibatches(
            word_vocab,
            label_vocab,
            ner_valid_data,
            FLAGS_minibatch
        );
    }

    pool = new ThreadPool(FLAGS_j);
    // Create a model with an embedding, and several stacks:

    auto stack_size  = std::max(FLAGS_stack_size, 1);
    auto model = FLAGS_load.empty() ? StackedGatedModel<REAL_t>(
        FLAGS_pretrained_vectors.empty() ? word_vocab.size() : 0,
        FLAGS_pretrained_vectors.empty() ? FLAGS_hidden : embedding.dims(1),
        FLAGS_hidden,
        stack_size,
        label_vocab.size(),
        (FLAGS_shortcut && stack_size > 1) ? FLAGS_shortcut : false,
        FLAGS_memory_feeds_gates,
        FLAGS_memory_penalty) : StackedGatedModel<REAL_t>::load(FLAGS_load);

    if (FLAGS_shortcut && stack_size == 1)
        std::cout << "shortcut flag ignored: Shortcut connections only take effect with stack size > 1" << std::endl;
    // don't send the input vector to the
    // decoder:
    model.input_vector_to_decoder(false);
    if (dataset.size() == 0) utils::exit_with_message("Dataset is empty");

    std::cout << "     Vocabulary size : " << vocab_size << std::endl
              << "      minibatch size : " << FLAGS_minibatch << std::endl
              << "   number of threads : " << FLAGS_j << std::endl
              << "        Dropout type : " << (FLAGS_fast_dropout ? "fast" : "default") << std::endl
              << "        Dropout Prob : " << FLAGS_dropout << std::endl
              << " Max training epochs : " << FLAGS_epochs << std::endl
              << "   First Hidden Size : " << model.hidden_sizes[0] << std::endl
              << "           LSTM type : " << (model.memory_feeds_gates ? "Graves 2013" : "Zaremba 2014") << std::endl
              << "          Stack size : " << model.hidden_sizes.size() << std::endl
              << " # training examples : " << dataset.size() * FLAGS_minibatch - (FLAGS_minibatch - dataset[dataset.size() - 1].size()) << std::endl
              << " # layers -> decoder : " << model.decoder.matrices.size() << std::endl
              << "              Solver : " << FLAGS_solver << std::endl;
    if (FLAGS_embedding_learning_rate > 0)
        std::cout << " Embedding step size : " << FLAGS_embedding_learning_rate << std::endl;

    if (!FLAGS_pretrained_vectors.empty()) {
        std::cout << "  Pretrained Vectors : " << FLAGS_pretrained_vectors << std::endl;
        model.embedding = embedding;
    }


    vector<vector<Mat<REAL_t>>> thread_params;
    vector<vector<Mat<REAL_t>>> thread_embedding_params;
    // what needs to be optimized:
    vector<StackedGatedModel<REAL_t>> thread_models;
    std::tie(thread_models, thread_embedding_params, thread_params) = utils::shallow_copy_multi_params(model, FLAGS_j, [&model](const Mat<REAL_t>& mat) {
//.........这里部分代码省略.........
开发者ID:codeaudit,项目名称:Dali,代码行数:101,代码来源:sparse_ner.cpp


注:本文中的utils::Vocab方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。