本文整理汇总了C++中Corpus类的典型用法代码示例。如果您正苦于以下问题:C++ Corpus类的具体用法?C++ Corpus怎么用?C++ Corpus使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Corpus类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: load
void load(unsigned int K, const Corpus& corpus) {
// K_(K), V_(corpus.getV()) {
K_ = K;
V_ = corpus.getV();
indices_.resize(V_);
lengths_.resize(V_);
indices_[0] = 0;
lengths_[0] = min(corpus.getWordCount(0), K);
int total = lengths_[0];
for (unsigned int ii = 1; ii < V_; ++ii) {
lengths_[ii] = min(corpus.getWordCount(ii), K);
indices_[ii] = indices_[ii - 1] + lengths_[ii - 1];
total += lengths_[ii];
}
data_.resize(total);
for (unsigned int ii = 0; ii < total; ++ii) {
data_[ii] = 0;
}
// Set up M_ and mask_
M_ = ceil(log2(K));
mask_ = (1L << (M_)) - 1;
}
示例2: App
void App() {
long t1;
(void) time(&t1);
seedMT(t1);
float em_converged = 1e-4;
int em_max_iter = 20;
int em_estimate_alpha = 1; //1 indicate estimate alpha and 0 use given value
int var_max_iter = 30;
double var_converged = 1e-6;
double initial_alpha = 0.1;
int n_topic = 30;
LDA lda;
lda.Init(em_converged, em_max_iter, em_estimate_alpha, var_max_iter,
var_converged, initial_alpha, n_topic);
Corpus cor;
//Str data = "../../data/ap.dat";
Str data = "lda_data";
cor.LoadData(data);
Corpus train;
Corpus test;
double p = 0.8;
SplitData(cor, p, &train, &test);
Str type = "seeded";
LdaModel m;
lda.RunEM(type, train, test, &m);
LOG(INFO) << m.alpha;
VVReal gamma;
VVVReal phi;
lda.Infer(test, m, &gamma, &phi);
WriteStrToFile(Join(gamma, " ", "\n"), "gamma");
WriteStrToFile(Join(phi, " ", "\n", "\n\n"), "phi");
}
示例3:
vector<float> AvaliadorAcuracia::calcularDesempenho( Corpus &corpus, int atributo_padrao, int atributo_teste )
{
/**
*
* Calcula a porcentagem de acerto para um determinado atributo
*
*/
vector<float> vectorAcuracia;
int row = corpus.pegarQtdSentencas(), column, acertos = 0, totalTokens = 0;
for ( register int i = 0; i < row; ++i )
{
column = corpus.pegarQtdTokens( i );
for ( register int j = 0; j < column; ++j )
{
if ( corpus.pegarValor(i,j,atributo_padrao) == corpus.pegarValor(i,j,atributo_teste) )
++acertos;
++totalTokens;
}
}
vectorAcuracia.push_back( (float)acertos / totalTokens );
return vectorAcuracia;
}
示例4: fullOrderedCoverFromCorpus
OrderedCover fullOrderedCoverFromCorpus(Corpus const& corpus) {
std::map<std::string, OrderedDocCover> _map;
for (auto i=corpus.begin() ; i!=corpus.end() ; ++i) {
_map.insert(_map.end(),
{i->first, fullOrderedDocCoverFromDoc(i->second)});
}
return OrderedCover(_map);
}
示例5: writeCorpusToStream
void writeCorpusToStream(std::ostream& os, Corpus const& corpus) {
for (auto i=corpus.begin() ; i!=corpus.end() ; ++i) {
if (i!=corpus.begin()) {
os << std::endl;
}
os << i->first << std::endl;
writeDocToStream(os, i->second, i->first);
}
}
示例6: main
//Reads the corpus file, the output folder, the minimum and the maximum number of clusters and runs the EM algorithm.
int main(int argc, char **argv) {
const char* info = "printinfo";
if(strcmp(argv[2],info) == 0){
Corpus *c = new Corpus(argv[1]);
cout << "Corpus Loaded - Unique Terms = " << c->vocsize << endl;
cout << "Total Terms = " << c->terms << endl;
cout << "Total Articles = " << c->size() << endl;
double avg = (double)c->terms/(double)c->size();
cout << "avg = " << avg << endl;
std::tr1::unordered_map<string,int>::iterator it;
string outfile = "Vocabulary.txt";
ofstream out;
out.open(outfile.c_str());
for(it=c->id2word.begin(); it != c->id2word.end(); it++){
if(c->df[it->second] > 3){
out << it->first << endl;
}
}
out.close();
return 0;
}
long pi = 3.141592653589793;
if(argc < 6)
cout << "Usage: ./em Cropus_File Output_Folder min_number_of_clusters max_number_of_clusters max_em_iterations" << endl;
int key=15;
long double likelihood=0.0,L=0;
Corpus *c = new Corpus(argv[1]);
int minC = atoi(argv[3]);
int maxC = atoi(argv[4]);
int MaxIter = atoi(argv[5]);
long double likelihoods[maxC+1];
cout << "Corpus Loaded - Unique Terms = " << c->vocsize << endl;
//OMPED Iterations in order to accelerate the process
#pragma omp parallel for
for(unsigned j=minC; j <= maxC; j++){
EM *em = new EM(j,c,MaxIter,string(argv[2]));
likelihoods[j] = em->run();
em->~EM();
}
string outfile = string(argv[2])+"/likelihoods.txt";
ofstream out;
out.open(outfile.c_str());
for(unsigned i = minC; i <= maxC; i++){
double d = (i*(c->vocsize-1))+(i-1);
long double penalty = (d/2.0)*log2(c->terms);
long double dr = ((d/2.0)*(2*pi));
long double bic = -likelihoods[i] + penalty;
cout << i << " " << -likelihoods[i] << " " << penalty << " " << bic << endl;
out << i << " " << -likelihoods[i] << " " << penalty << " " << bic << endl;
}
out.close();
return 0;
}
示例7: main
int main()
{
Corpus corpus = Corpus::construct( TEST_DATA );
corpus.write( TMP_DATA );
// Assert files are equal
assert( file_equal( TEST_DATA, TMP_DATA ) );
return 0;
}
示例8: remap
void remap(PhonemeAlphabet& alph, Corpus& corp) {
for(unsigned i = 0; i < corp.size(); i++) {
auto& labels = corp.label(i);
for(auto& p : labels)
p.id = alph.new_id(p.id);
auto& inputs = corp.input(i);
for(auto& p : inputs)
p.id = alph.new_id(p.id);
}
}
示例9: corpusSample
Corpus
corpusSample(Corpus const& corpus, std::vector<std::string> const& docIds)
throw(std::runtime_error) {
Corpus _sample;
for (auto i=docIds.begin() ; i!=docIds.end() ; ++i) {
auto j = corpus.find(*i);
if (j == corpus.end()) {
throw std::runtime_error(ERR_DOCUMENT_NOT_FOUND);
} else {
_sample[*i] = j->second;
}
}
return _sample;
}
示例10: init_tool
namespace tool {
Corpus corpus_synth, corpus_test, corpus_eval;
CRF crf;
BaselineCRF baseline_crf;
PhonemeAlphabet alphabet_synth, alphabet_test;
StringLabelProvider labels_synth;
StringLabelProvider labels_test;
StringLabelProvider labels_all;
std::ofstream VLOG;
bool init_tool(int argc, const char** argv, Options* opts) {
*opts = Options::parse_options(argc, argv);
if(!Options::has_required(*opts))
return false;
COLOR_ENABLED = !opts->has_opt("no-color");
FORCE_SCALE = opts->has_opt("force-scale");
SMOOTH = opts->has_opt("smooth");
SCALE_ENERGY = opts->has_opt("energy");
PRINT_SCALE = opts->has_opt("print-scale");
REPORT_PROGRESS = opts->has_opt("progress");
VLOG = std::ofstream(opts->get_opt<std::string>("vlog", "vlog.log"));
crf.label_alphabet = &alphabet_synth;
baseline_crf.label_alphabet = &alphabet_synth;
build_data(*opts);
pre_process(alphabet_synth, corpus_synth);
pre_process(alphabet_test, corpus_test);
alphabet_synth.optimize();
remap(alphabet_synth, corpus_synth);
alphabet_test.optimize();
remap(alphabet_test, corpus_test);
auto testSize = opts->get_opt<unsigned>("test-corpus-size", 10);
for(auto i = testSize; i < corpus_test.size(); i++)
corpus_eval.add(corpus_test.input(i), corpus_test.label(i));
corpus_test.set_max_size(testSize);
INFO("Synth sequences = " << corpus_synth.size());
INFO("Test sequences = " << corpus_test.size());
INFO("Eval sequences = " << corpus_eval.size());
return true;
}
}
示例11: readCorpusFromStream
Corpus readCorpusFromStream(std::istream& is) {
Corpus corpus;
std::string docName;
Document doc;
doc = readDocFromStream(is, docName);
while (!is.eof() && !is.fail()) {
corpus[docName] = doc;
doc = readDocFromStream(is, docName);
}
if (corpus.find(docName) == corpus.end()) {
corpus[docName] = doc;
}
return corpus;
}
示例12: corpus
vector<float> AvaliadorMatrizConfusao::calcularDesempenho( Corpus &corpus, int atributo_padrao, int atributo_teste )
{
/**
*
* Calcula somatorio de verdadeiros e negativos por classe
*
*/
vector<float> vectorMatriz;
int row = corpus.pegarQtdSentencas(), column,
numeroClasses = classes.size(), tam, posVerdadeiro, posResposta;
string resposta, verdade;
tam = numeroClasses*numeroClasses;
vectorMatriz.resize(tam);
for ( register int c = 0; c < tam; ++c )
vectorMatriz[c] = 0.0;
for ( register int i = 0; i < row; ++i )
{
column = corpus.pegarQtdTokens( i );
for ( register int j = 0; j < column; ++j )
{
verdade = corpus(i,j,atributo_padrao);
resposta = corpus(i,j,atributo_teste);
posVerdadeiro = posResposta = -1;
for ( register int c = 0; c < numeroClasses; ++c ){
if (classes[c]==verdade)
posVerdadeiro = c;
if (classes[c]==resposta)
posResposta = c;
}
if (posVerdadeiro == -1 || posResposta == -1 ){
cout << "Classe não encontrada, uma exceção será gerada.";
throw "Classe não encontrada pela matriz de confusão";
}
vectorMatriz[posVerdadeiro*numeroClasses+posResposta]++;
}
}
ultimaMatriz = vectorMatriz;
return vectorMatriz;
}
示例13: criarAtributos
void ProcessadorAttDisc::criarAtributos(Corpus &objCorpus){
for(int i=0; i< numatributos; i++)
{
objCorpus.criarAtributo("New"+atributo[i],"0");
}
}
示例14: main
int main(int argc, char* argv[])
{
seedMT( time(NULL) );
if ( argc > 1 )
{
Corpus* c = new Corpus();
Params param;
param.read_settings( argv[5] ); //"settings.txt");
param.NTOPICS = atoi(argv[1]);
param.INITIAL_C = atof(argv[2]);
param.LAMBDA = atof(argv[3]);
param.RHO = atof(argv[4]);
param.NFOLDS = 1;
if ( argc > 6 ) param.NFOLDS = atoi(argv[6]);
if ( argc > 7 ) param.DELTA_ELL = atof(argv[7]);
c->read_data(param.train_filename, param.NLABELS);
char dir[512];
sprintf(dir, "s%d_c%d_f%d_s%d", param.NTOPICS, (int)param.INITIAL_C,
param.NFOLDS, param.SUPERVISED);
mkdir(dir,0755);
MedSTC model;
model.train("random", dir, c, ¶m);
// testing.
Corpus *tstC = new Corpus();
tstC->read_data(param.test_filename, param.NLABELS);
MedSTC evlModel;
double dAcc = evlModel.sparse_coding(dir, tstC, ¶m);
printf("Accuracy: %.3f\n", dAcc);
delete tstC;
delete c;
} else {
printf("usage : MedSTC est [initial alpha] [k] [labels] [random/seeded/*] [directory]\n");
printf(" MedSTC cv [foldnum] [foldix] [initial alpha] [k] [labels] [settings] [data] [random/seeded/*] [directory]\n");
printf(" MedSTC inf [settings] [model] [data] [name]\n");
}
return 0;
}
示例15: MGRTMApp
void MGRTMApp() {
ml::Converged converged;
converged.em_converged_ = 1e-4;
converged.em_max_iter_ = 100;
converged.var_converged_ = 1e-4;
converged.var_max_iter_ = 10;
int rho = 3;
VarMGRTM var;
var.Init(converged,rho);
var.Load(FLAGS_net_path, FLAGS_cor_path, FLAGS_neg_times);
Str path(FLAGS_cor_path);
Corpus cor;
cor.LoadData(path);
MGRTM m;
m.Init(2, FLAGS_local_topic, FLAGS_global_topic, cor.TermNum(), 1, 0.01, 0.01);
var.RunEM(&m);
}