本文整理汇总了C++中utils::Vocab方法的典型用法代码示例。如果您正苦于以下问题:C++ utils::Vocab方法的具体用法?C++ utils::Vocab怎么用?C++ utils::Vocab使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类utils
的用法示例。
在下文中一共展示了utils::Vocab方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: main
int main (int argc, char* argv[]) {
GFLAGS_NAMESPACE::SetUsageMessage(
"\n"
"Sentiment Analysis using single LSTM\n"
"------------------------------------\n"
"\n"
" @author Jonathan Raiman\n"
" @date April 7th 2015"
);
GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true);
if (FLAGS_patience > 40) FLAGS_patience = 40;
int memory_penalty_curve_type;
if (FLAGS_memory_penalty_curve == "flat") {
memory_penalty_curve_type = 0;
} else if (FLAGS_memory_penalty_curve == "linear") {
memory_penalty_curve_type = 1;
} else if (FLAGS_memory_penalty_curve == "square") {
memory_penalty_curve_type = 2;
} else {
utils::assert2(false, "memory_penalty_curve can only be flat, linear, or square.");
}
auto epochs = FLAGS_epochs;
int rampup_time = 10;
auto sentiment_treebank = SST::load(FLAGS_train);
auto embedding = Mat<REAL_t>(100, 0);
auto word_vocab = Vocab();
if (!FLAGS_pretrained_vectors.empty())
glove::load(FLAGS_pretrained_vectors, &embedding, &word_vocab, 50000);
else
word_vocab = SST::get_vocabulary(sentiment_treebank, FLAGS_min_occurence);
auto vocab_size = word_vocab.size();
auto dataset = SST::convert_trees_to_indexed_minibatches(
word_vocab,
sentiment_treebank,
FLAGS_minibatch
);
auto validation_set = SST::convert_trees_to_indexed_minibatches(
word_vocab,
SST::load(FLAGS_validation),
FLAGS_minibatch
);
pool = new ThreadPool(FLAGS_j);
// Create a model with an embedding, and several stacks:
auto stack_size = std::max(FLAGS_stack_size, 1);
auto model = FLAGS_load.empty() ? StackedGatedModel<REAL_t>(
FLAGS_pretrained_vectors.empty() ? word_vocab.size() : 0,
FLAGS_pretrained_vectors.empty() ? FLAGS_hidden : embedding.dims(1),
FLAGS_hidden,
stack_size,
SST::label_names.size(),
(FLAGS_shortcut && stack_size > 1) ? FLAGS_shortcut : false,
FLAGS_memory_feeds_gates,
FLAGS_memory_penalty) : StackedGatedModel<REAL_t>::load(FLAGS_load);
if (FLAGS_shortcut && stack_size == 1) {
std::cout << "shortcut flag ignored: Shortcut connections only take effect with stack size > 1" << std::endl;
}
// don't send the input vector to the
// decoder:
model.input_vector_to_decoder(false);
std::cout << "model.input_vector_to_decoder() = " << model.input_vector_to_decoder() << std::endl;
std::cout << " Unique Trees Loaded : " << sentiment_treebank.size() << std::endl
<< " Example tree : " << *sentiment_treebank[sentiment_treebank.size()-1] << std::endl
<< " Vocabulary size : " << vocab_size << std::endl
<< " minibatch size : " << FLAGS_minibatch << std::endl
<< " number of threads : " << FLAGS_j << std::endl
<< " Dropout type : " << (FLAGS_fast_dropout ? "fast" : "default") << std::endl
<< " Dropout Prob : " << FLAGS_dropout << std::endl
<< " Max training epochs : " << FLAGS_epochs << std::endl
<< " First Hidden Size : " << model.hidden_sizes[0] << std::endl
<< " LSTM type : " << (model.memory_feeds_gates ? "Graves 2013" : "Zaremba 2014") << std::endl
<< " Stack size : " << model.hidden_sizes.size() << std::endl
<< " # training examples : " << dataset.size() * FLAGS_minibatch - (FLAGS_minibatch - dataset[dataset.size() - 1].size()) << std::endl
<< " validation obj. : " << (FLAGS_validation_metric == 0 ? "overall" : "root") << std::endl
<< " # layers -> decoder : " << model.decoder.matrices.size() << std::endl
<< " Solver : " << FLAGS_solver << std::endl;
if (FLAGS_embedding_learning_rate > 0)
std::cout << " Embedding step size : " << FLAGS_embedding_learning_rate << std::endl;
if (!FLAGS_pretrained_vectors.empty()) {
std::cout << " Pretrained Vectors : " << FLAGS_pretrained_vectors << std::endl;
model.embedding = embedding;
}
vector<vector<Mat<REAL_t>>> thread_params;
vector<vector<Mat<REAL_t>>> thread_embedding_params;
vector<StackedGatedModel<REAL_t>> thread_models;
std::tie(thread_models, thread_embedding_params, thread_params) = utils::shallow_copy_multi_params(model, FLAGS_j, [&model](const Mat<REAL_t>& mat) {
return &mat.w().memory() == &model.embedding.w().memory();
});
//.........这里部分代码省略.........
示例2: main
int main (int argc, char* argv[]) {
GFLAGS_NAMESPACE::SetUsageMessage(
"\n"
"Named Entity Recognition using single LSTM\n"
"------------------------------------------\n"
"\n"
" @author Jonathan Raiman\n"
" @date April 7th 2015"
);
GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true);
int memory_penalty_curve_type;
if (FLAGS_memory_penalty_curve == "flat") {
memory_penalty_curve_type = 0;
} else if (FLAGS_memory_penalty_curve == "linear") {
memory_penalty_curve_type = 1;
} else if (FLAGS_memory_penalty_curve == "square") {
memory_penalty_curve_type = 2;
} else {
utils::assert2(false, "memory_penalty_curve can only be flat, linear, or square.");
}
auto epochs = FLAGS_epochs;
int rampup_time = 10;
auto ner_data = NER::load(FLAGS_train);
auto embedding = Mat<REAL_t>(100, 0);
auto word_vocab = Vocab();
if (!FLAGS_pretrained_vectors.empty()) {
glove::load(FLAGS_pretrained_vectors, &embedding, &word_vocab, 50000);
} else {
word_vocab = Vocab(NER::get_vocabulary(ner_data, FLAGS_min_occurence), true);
}
auto label_vocab = Vocab(NER::get_label_vocabulary(ner_data), false);
auto vocab_size = word_vocab.size();
auto dataset = NER::convert_to_indexed_minibatches(
word_vocab,
label_vocab,
ner_data,
FLAGS_minibatch
);
// No validation set yet...
decltype(dataset) validation_set;
{
auto ner_valid_data = NER::load(FLAGS_validation);
validation_set = NER::convert_to_indexed_minibatches(
word_vocab,
label_vocab,
ner_valid_data,
FLAGS_minibatch
);
}
pool = new ThreadPool(FLAGS_j);
// Create a model with an embedding, and several stacks:
auto stack_size = std::max(FLAGS_stack_size, 1);
auto model = FLAGS_load.empty() ? StackedGatedModel<REAL_t>(
FLAGS_pretrained_vectors.empty() ? word_vocab.size() : 0,
FLAGS_pretrained_vectors.empty() ? FLAGS_hidden : embedding.dims(1),
FLAGS_hidden,
stack_size,
label_vocab.size(),
(FLAGS_shortcut && stack_size > 1) ? FLAGS_shortcut : false,
FLAGS_memory_feeds_gates,
FLAGS_memory_penalty) : StackedGatedModel<REAL_t>::load(FLAGS_load);
if (FLAGS_shortcut && stack_size == 1)
std::cout << "shortcut flag ignored: Shortcut connections only take effect with stack size > 1" << std::endl;
// don't send the input vector to the
// decoder:
model.input_vector_to_decoder(false);
if (dataset.size() == 0) utils::exit_with_message("Dataset is empty");
std::cout << " Vocabulary size : " << vocab_size << std::endl
<< " minibatch size : " << FLAGS_minibatch << std::endl
<< " number of threads : " << FLAGS_j << std::endl
<< " Dropout type : " << (FLAGS_fast_dropout ? "fast" : "default") << std::endl
<< " Dropout Prob : " << FLAGS_dropout << std::endl
<< " Max training epochs : " << FLAGS_epochs << std::endl
<< " First Hidden Size : " << model.hidden_sizes[0] << std::endl
<< " LSTM type : " << (model.memory_feeds_gates ? "Graves 2013" : "Zaremba 2014") << std::endl
<< " Stack size : " << model.hidden_sizes.size() << std::endl
<< " # training examples : " << dataset.size() * FLAGS_minibatch - (FLAGS_minibatch - dataset[dataset.size() - 1].size()) << std::endl
<< " # layers -> decoder : " << model.decoder.matrices.size() << std::endl
<< " Solver : " << FLAGS_solver << std::endl;
if (FLAGS_embedding_learning_rate > 0)
std::cout << " Embedding step size : " << FLAGS_embedding_learning_rate << std::endl;
if (!FLAGS_pretrained_vectors.empty()) {
std::cout << " Pretrained Vectors : " << FLAGS_pretrained_vectors << std::endl;
model.embedding = embedding;
}
vector<vector<Mat<REAL_t>>> thread_params;
vector<vector<Mat<REAL_t>>> thread_embedding_params;
// what needs to be optimized:
vector<StackedGatedModel<REAL_t>> thread_models;
std::tie(thread_models, thread_embedding_params, thread_params) = utils::shallow_copy_multi_params(model, FLAGS_j, [&model](const Mat<REAL_t>& mat) {
//.........这里部分代码省略.........