当前位置: 首页>>代码示例>>C++>>正文


C++ Corpus类代码示例

本文整理汇总了C++中Corpus的典型用法代码示例。如果您正苦于以下问题:C++ Corpus类的具体用法?C++ Corpus怎么用?C++ Corpus使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Corpus类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: load

  void load(unsigned int K, const Corpus& corpus) {
		//	K_(K), V_(corpus.getV()) {
		K_ = K;
		V_ = corpus.getV();

		indices_.resize(V_);
		lengths_.resize(V_);

		indices_[0] = 0;
		lengths_[0] = min(corpus.getWordCount(0), K);
		int total = lengths_[0];

		for (unsigned int ii = 1; ii < V_; ++ii) {
			lengths_[ii] = min(corpus.getWordCount(ii), K);
			indices_[ii] = indices_[ii - 1] + lengths_[ii - 1];
			total += lengths_[ii];
		}

		data_.resize(total);
		for (unsigned int ii = 0; ii < total; ++ii) {
			data_[ii] = 0;
		}
		// Set up M_ and mask_
		M_ = ceil(log2(K));
		mask_ = (1L << (M_)) - 1;
	}
开发者ID:rforge,项目名称:rtm,代码行数:26,代码来源:sparseLDA.cpp

示例2: App

void App() {
  long t1;
  (void) time(&t1);
  seedMT(t1);
  float em_converged = 1e-4;
  int em_max_iter = 20;
  int em_estimate_alpha = 1; //1 indicate estimate alpha and 0 use given value
  int var_max_iter = 30;
  double var_converged = 1e-6;
  double initial_alpha = 0.1;
  int n_topic = 30;
  LDA lda;
  lda.Init(em_converged, em_max_iter, em_estimate_alpha, var_max_iter,
                         var_converged, initial_alpha, n_topic);
  Corpus cor;
  //Str data = "../../data/ap.dat";
  Str data = "lda_data";
  cor.LoadData(data);
  Corpus train;
  Corpus test;
  double p = 0.8;
  SplitData(cor, p, &train, &test);
  Str type = "seeded";
  LdaModel m;
  lda.RunEM(type, train, test, &m);

  LOG(INFO) << m.alpha;
  VVReal gamma;
  VVVReal phi;
  lda.Infer(test, m, &gamma, &phi);
  WriteStrToFile(Join(gamma, " ", "\n"), "gamma");
  WriteStrToFile(Join(phi, " ", "\n", "\n\n"), "phi");
}
开发者ID:lijiankou,项目名称:bigdata,代码行数:33,代码来源:lda_app.cpp

示例3:

vector<float> AvaliadorAcuracia::calcularDesempenho( Corpus &corpus, int atributo_padrao, int atributo_teste )
{
    /**
    *
    *   Calcula a porcentagem de acerto para um determinado atributo
    *
    */
    vector<float> vectorAcuracia;
    int row = corpus.pegarQtdSentencas(), column, acertos = 0, totalTokens = 0;

    for ( register int i = 0; i < row; ++i )
    {
        column = corpus.pegarQtdTokens( i );

        for ( register int j = 0; j < column; ++j )
        {
            if ( corpus.pegarValor(i,j,atributo_padrao) == corpus.pegarValor(i,j,atributo_teste) )
                ++acertos;

            ++totalTokens;
        }
    }


    vectorAcuracia.push_back( (float)acertos / totalTokens );

    return vectorAcuracia;
}
开发者ID:cristianommsilva,项目名称:machine-learning-ip-ime,代码行数:28,代码来源:avaliador_acuracia.cpp

示例4: fullOrderedCoverFromCorpus

OrderedCover fullOrderedCoverFromCorpus(Corpus const& corpus) {
    std::map<std::string, OrderedDocCover> _map;
    for (auto i=corpus.begin() ; i!=corpus.end() ; ++i) {
        _map.insert(_map.end(),
                    {i->first, fullOrderedDocCoverFromDoc(i->second)});
    }
    return OrderedCover(_map);
}
开发者ID:estnltk,项目名称:pfe,代码行数:8,代码来源:Corpus.cpp

示例5: writeCorpusToStream

void writeCorpusToStream(std::ostream& os, Corpus const& corpus) {
    for (auto i=corpus.begin() ; i!=corpus.end() ; ++i) {
        if (i!=corpus.begin()) {
            os << std::endl;
        }
        os << i->first << std::endl;
        writeDocToStream(os, i->second, i->first);
    }
}
开发者ID:estnltk,项目名称:pfe,代码行数:9,代码来源:Corpus.cpp

示例6: main

//Reads the corpus file, the output folder, the minimum and the maximum number of clusters and runs the EM algorithm.
int main(int argc, char **argv) {

  const char* info = "printinfo";
  if(strcmp(argv[2],info) == 0){
     Corpus *c = new Corpus(argv[1]);
     cout << "Corpus Loaded - Unique Terms = " << c->vocsize << endl;
     cout << "Total Terms = " << c->terms << endl;
     cout << "Total Articles = " << c->size() << endl;
     double avg = (double)c->terms/(double)c->size();
     cout << "avg = " << avg << endl;
     std::tr1::unordered_map<string,int>::iterator it;
     string outfile = "Vocabulary.txt";
     ofstream out;
     out.open(outfile.c_str());
     for(it=c->id2word.begin(); it != c->id2word.end(); it++){
       if(c->df[it->second] > 3){
	out << it->first << endl;
       }
    }
    out.close();
    return 0;
  }
  long pi = 3.141592653589793;
  if(argc < 6)
    cout << "Usage: ./em Cropus_File Output_Folder min_number_of_clusters max_number_of_clusters max_em_iterations" << endl;
  int key=15;
  long double likelihood=0.0,L=0;
  
  Corpus *c = new Corpus(argv[1]);
  int minC = atoi(argv[3]);
  int maxC = atoi(argv[4]);
  int MaxIter = atoi(argv[5]);
  long double likelihoods[maxC+1];
  cout << "Corpus Loaded - Unique Terms = " << c->vocsize << endl;
//OMPED Iterations in order to accelerate the process
#pragma omp parallel for
  for(unsigned j=minC; j <= maxC; j++){
    EM *em = new EM(j,c,MaxIter,string(argv[2]));
    likelihoods[j] = em->run();
    em->~EM();
  }
  
  string outfile = string(argv[2])+"/likelihoods.txt";
  ofstream out;
  out.open(outfile.c_str());
  for(unsigned i = minC; i <= maxC; i++){
    double d = (i*(c->vocsize-1))+(i-1);
    long double penalty = (d/2.0)*log2(c->terms);
    long double dr = ((d/2.0)*(2*pi));
    long double bic = -likelihoods[i] + penalty;
    cout << i << " " << -likelihoods[i] << " " << penalty << " " << bic << endl;
    out << i << " " << -likelihoods[i] << " " << penalty << " " << bic << endl;
  }
  out.close();
  return 0;
}
开发者ID:f1r3w1nd,项目名称:EM,代码行数:57,代码来源:main.cpp

示例7: main

int main()
{
    Corpus corpus = Corpus::construct( TEST_DATA );

    corpus.write( TMP_DATA );

    // Assert files are equal
    assert( file_equal( TEST_DATA, TMP_DATA ) );

    return 0;
}
开发者ID:arunchaganty,项目名称:ctm-cvb,代码行数:11,代码来源:read_corpus.cpp

示例8: remap

void remap(PhonemeAlphabet& alph, Corpus& corp) {
  for(unsigned i = 0; i < corp.size(); i++) {
    auto& labels = corp.label(i);
    for(auto& p : labels)
      p.id = alph.new_id(p.id);

    auto& inputs = corp.input(i);
    for(auto& p : inputs)
      p.id = alph.new_id(p.id);
  }
}
开发者ID:ivanzamanov,项目名称:mini-crf,代码行数:11,代码来源:tool.cpp

示例9: corpusSample

Corpus
corpusSample(Corpus const& corpus, std::vector<std::string> const& docIds)
throw(std::runtime_error) {
    Corpus _sample;
    for (auto i=docIds.begin() ; i!=docIds.end() ; ++i) {
        auto j = corpus.find(*i);
        if (j == corpus.end()) {
            throw std::runtime_error(ERR_DOCUMENT_NOT_FOUND);
        } else {
            _sample[*i] = j->second;
        }
    }
    return _sample;
}
开发者ID:estnltk,项目名称:pfe,代码行数:14,代码来源:Corpus.cpp

示例10: init_tool

namespace tool {
  Corpus corpus_synth, corpus_test, corpus_eval;

  CRF crf;
  BaselineCRF baseline_crf;
  PhonemeAlphabet alphabet_synth, alphabet_test;

  StringLabelProvider labels_synth;
  StringLabelProvider labels_test;
  StringLabelProvider labels_all;

  std::ofstream VLOG;

  bool init_tool(int argc, const char** argv, Options* opts) {
    *opts = Options::parse_options(argc, argv);
    if(!Options::has_required(*opts))
      return false;
    COLOR_ENABLED = !opts->has_opt("no-color");
    FORCE_SCALE = opts->has_opt("force-scale");
    SMOOTH = opts->has_opt("smooth");
    SCALE_ENERGY = opts->has_opt("energy");
    PRINT_SCALE = opts->has_opt("print-scale");
    REPORT_PROGRESS = opts->has_opt("progress");

    VLOG = std::ofstream(opts->get_opt<std::string>("vlog", "vlog.log"));

    crf.label_alphabet = &alphabet_synth;
    baseline_crf.label_alphabet = &alphabet_synth;
    build_data(*opts);

    pre_process(alphabet_synth, corpus_synth);
    pre_process(alphabet_test, corpus_test);
  
    alphabet_synth.optimize();
    remap(alphabet_synth, corpus_synth);

    alphabet_test.optimize();
    remap(alphabet_test, corpus_test);

    auto testSize = opts->get_opt<unsigned>("test-corpus-size", 10);
    for(auto i = testSize; i < corpus_test.size(); i++)
      corpus_eval.add(corpus_test.input(i), corpus_test.label(i));
    corpus_test.set_max_size(testSize);

    INFO("Synth sequences = " << corpus_synth.size());
    INFO("Test sequences = " << corpus_test.size());
    INFO("Eval sequences = " << corpus_eval.size());
    return true;
  }
}
开发者ID:ivanzamanov,项目名称:mini-crf,代码行数:50,代码来源:tool.cpp

示例11: readCorpusFromStream

Corpus readCorpusFromStream(std::istream& is) {
    Corpus corpus;
    std::string docName;
    Document doc;

    doc = readDocFromStream(is, docName);
    while (!is.eof() && !is.fail()) {
        corpus[docName] = doc;
        doc = readDocFromStream(is, docName);
    }
    if (corpus.find(docName) == corpus.end()) {
        corpus[docName] = doc;
    }
    return corpus;
}
开发者ID:estnltk,项目名称:pfe,代码行数:15,代码来源:Corpus.cpp

示例12: corpus

vector<float> AvaliadorMatrizConfusao::calcularDesempenho( Corpus &corpus, int atributo_padrao, int atributo_teste )
{
    /**
    *
    *   Calcula somatorio de verdadeiros e negativos por classe
    *
    */
    vector<float> vectorMatriz;
    int row = corpus.pegarQtdSentencas(), column,
     numeroClasses = classes.size(), tam, posVerdadeiro, posResposta;
    string resposta, verdade;

    tam = numeroClasses*numeroClasses;

    vectorMatriz.resize(tam);
    for ( register int c = 0; c < tam; ++c )
        vectorMatriz[c] = 0.0;

    for ( register int i = 0; i < row; ++i )
    {
        column = corpus.pegarQtdTokens( i );

        for ( register int j = 0; j < column; ++j )
        {
            verdade = corpus(i,j,atributo_padrao);
            resposta = corpus(i,j,atributo_teste);

            posVerdadeiro = posResposta = -1;
            for ( register int c = 0; c < numeroClasses; ++c ){
                if (classes[c]==verdade)
                    posVerdadeiro = c;
                if (classes[c]==resposta)
                    posResposta = c;

            }
            if (posVerdadeiro == -1 || posResposta == -1 ){
                cout << "Classe não encontrada, uma exceção será gerada.";
                throw "Classe não encontrada pela matriz de confusão";
            }

            vectorMatriz[posVerdadeiro*numeroClasses+posResposta]++;
        }
    }

    ultimaMatriz = vectorMatriz;
    return vectorMatriz;
}
开发者ID:duartejulio,项目名称:fama,代码行数:47,代码来源:avaliadormatrizconfusao.cpp

示例13: criarAtributos

void ProcessadorAttDisc::criarAtributos(Corpus &objCorpus){

    for(int i=0; i< numatributos; i++)
    {
        objCorpus.criarAtributo("New"+atributo[i],"0");
    }

}
开发者ID:duartejulio,项目名称:fama,代码行数:8,代码来源:processadorattdisc.cpp

示例14: main

int main(int argc, char* argv[])
{
	seedMT( time(NULL) );

	if ( argc > 1 )
	{
		Corpus* c = new Corpus();
		Params param;

		param.read_settings( argv[5] ); //"settings.txt");
		param.NTOPICS    = atoi(argv[1]);
		param.INITIAL_C  = atof(argv[2]);
		param.LAMBDA     = atof(argv[3]);
		param.RHO        = atof(argv[4]);
		param.NFOLDS	 = 1;
		if ( argc > 6 ) param.NFOLDS     = atoi(argv[6]);
		if ( argc > 7 ) param.DELTA_ELL  = atof(argv[7]);

		c->read_data(param.train_filename, param.NLABELS);
		char dir[512];
		sprintf(dir, "s%d_c%d_f%d_s%d", param.NTOPICS, (int)param.INITIAL_C, 
			param.NFOLDS, param.SUPERVISED);
		mkdir(dir,0755);

		MedSTC model;
		model.train("random", dir, c, &param);

		// testing.
		Corpus *tstC = new Corpus();
		tstC->read_data(param.test_filename, param.NLABELS);
		MedSTC evlModel;
		double dAcc = evlModel.sparse_coding(dir, tstC, &param);
		printf("Accuracy: %.3f\n", dAcc);

		delete tstC;
		delete c;
	} else {
		printf("usage : MedSTC est [initial alpha] [k] [labels] [random/seeded/*] [directory]\n");
		printf("        MedSTC cv [foldnum] [foldix] [initial alpha] [k] [labels] [settings] [data] [random/seeded/*] [directory]\n");
		printf("        MedSTC inf [settings] [model] [data] [name]\n");
	}

	return 0;
}
开发者ID:aykutfirat,项目名称:MedSTC-Mac,代码行数:44,代码来源:main.cpp

示例15: MGRTMApp

void MGRTMApp() {
  ml::Converged converged;
  converged.em_converged_ = 1e-4;
  converged.em_max_iter_ = 100;
  converged.var_converged_ = 1e-4;
  converged.var_max_iter_ = 10;
  int rho = 3;
           
  VarMGRTM var;
  var.Init(converged,rho);
  var.Load(FLAGS_net_path, FLAGS_cor_path, FLAGS_neg_times);
                    
  Str path(FLAGS_cor_path);
  Corpus cor;
  cor.LoadData(path);
                           
  MGRTM m;
  m.Init(2, FLAGS_local_topic, FLAGS_global_topic, cor.TermNum(), 1, 0.01, 0.01);
  var.RunEM(&m);
}
开发者ID:lijiankou,项目名称:mllib-1,代码行数:20,代码来源:estimate.cpp


注:本文中的Corpus类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。