本文整理汇总了C++中Corpus::size方法的典型用法代码示例。如果您正苦于以下问题:C++ Corpus::size方法的具体用法?C++ Corpus::size怎么用?C++ Corpus::size使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Corpus
的用法示例。
在下文中一共展示了Corpus::size方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: main
//Reads the corpus file, the output folder, the minimum and the maximum number of clusters and runs the EM algorithm.
int main(int argc, char **argv) {
const char* info = "printinfo";
if(strcmp(argv[2],info) == 0){
Corpus *c = new Corpus(argv[1]);
cout << "Corpus Loaded - Unique Terms = " << c->vocsize << endl;
cout << "Total Terms = " << c->terms << endl;
cout << "Total Articles = " << c->size() << endl;
double avg = (double)c->terms/(double)c->size();
cout << "avg = " << avg << endl;
std::tr1::unordered_map<string,int>::iterator it;
string outfile = "Vocabulary.txt";
ofstream out;
out.open(outfile.c_str());
for(it=c->id2word.begin(); it != c->id2word.end(); it++){
if(c->df[it->second] > 3){
out << it->first << endl;
}
}
out.close();
return 0;
}
long pi = 3.141592653589793;
if(argc < 6)
cout << "Usage: ./em Cropus_File Output_Folder min_number_of_clusters max_number_of_clusters max_em_iterations" << endl;
int key=15;
long double likelihood=0.0,L=0;
Corpus *c = new Corpus(argv[1]);
int minC = atoi(argv[3]);
int maxC = atoi(argv[4]);
int MaxIter = atoi(argv[5]);
long double likelihoods[maxC+1];
cout << "Corpus Loaded - Unique Terms = " << c->vocsize << endl;
//OMPED Iterations in order to accelerate the process
#pragma omp parallel for
for(unsigned j=minC; j <= maxC; j++){
EM *em = new EM(j,c,MaxIter,string(argv[2]));
likelihoods[j] = em->run();
em->~EM();
}
string outfile = string(argv[2])+"/likelihoods.txt";
ofstream out;
out.open(outfile.c_str());
for(unsigned i = minC; i <= maxC; i++){
double d = (i*(c->vocsize-1))+(i-1);
long double penalty = (d/2.0)*log2(c->terms);
long double dr = ((d/2.0)*(2*pi));
long double bic = -likelihoods[i] + penalty;
cout << i << " " << -likelihoods[i] << " " << penalty << " " << bic << endl;
out << i << " " << -likelihoods[i] << " " << penalty << " " << bic << endl;
}
out.close();
return 0;
}
示例2: remap
void remap(PhonemeAlphabet& alph, Corpus& corp) {
for(unsigned i = 0; i < corp.size(); i++) {
auto& labels = corp.label(i);
for(auto& p : labels)
p.id = alph.new_id(p.id);
auto& inputs = corp.input(i);
for(auto& p : inputs)
p.id = alph.new_id(p.id);
}
}
示例3: Print
static void Print(const Rung& r, const Corpus& source, const Corpus& target,
const PrintParams& params) {
if(r.i == source.size() && r.j == target.size())
return;
if(r.score < params.printThreshold)
return;
if(params.print11 && (r.bead[0] != 1 || r.bead[1] != 1))
return;
if(!params.printUnaligned && (r.bead[0] == 0 || r.bead[1] == 0))
return;
const Sentence& s1 = source(r.i, r.i + r.bead[0] - 1);
const Sentence& s2 = target(r.j, r.j + r.bead[1] - 1);
if(params.printIds) std::cout << r.i << " " << r.j << "\t";
if(params.printBeads) std::cout << r.bead << "\t";
if(params.printScores) std::cout << r.score << "\t";
std::cout << s1 << "\t" << s2 << std::endl;
}
示例4: init_tool
bool init_tool(int argc, const char** argv, Options* opts) {
*opts = Options::parse_options(argc, argv);
if(!Options::has_required(*opts))
return false;
COLOR_ENABLED = !opts->has_opt("no-color");
FORCE_SCALE = opts->has_opt("force-scale");
SMOOTH = opts->has_opt("smooth");
SCALE_ENERGY = opts->has_opt("energy");
PRINT_SCALE = opts->has_opt("print-scale");
REPORT_PROGRESS = opts->has_opt("progress");
VLOG = std::ofstream(opts->get_opt<std::string>("vlog", "vlog.log"));
crf.label_alphabet = &alphabet_synth;
baseline_crf.label_alphabet = &alphabet_synth;
build_data(*opts);
pre_process(alphabet_synth, corpus_synth);
pre_process(alphabet_test, corpus_test);
alphabet_synth.optimize();
remap(alphabet_synth, corpus_synth);
alphabet_test.optimize();
remap(alphabet_test, corpus_test);
auto testSize = opts->get_opt<unsigned>("test-corpus-size", 10);
for(auto i = testSize; i < corpus_test.size(); i++)
corpus_eval.add(corpus_test.input(i), corpus_test.label(i));
corpus_test.set_max_size(testSize);
INFO("Synth sequences = " << corpus_synth.size());
INFO("Test sequences = " << corpus_test.size());
INFO("Eval sequences = " << corpus_eval.size());
return true;
}