当前位置: 首页>>代码示例>>C++>>正文


C++ api::QueryEnvironment类代码示例

本文整理汇总了C++中indri::api::QueryEnvironment的典型用法代码示例。如果您正苦于以下问题:C++ QueryEnvironment类的具体用法?C++ QueryEnvironment怎么用?C++ QueryEnvironment使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了QueryEnvironment类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: runtime_error

double indri::query::ConceptSelectorFuns::findConceptScorePrf(string conceptSty, string conceptStr, string qId, std::vector<lemur::api::DOCID_T> topDocIds,
        indri::api::QueryEnvironment & env,
        indri::query::QueryReformulator * queryReformulator,
        vector<string> resourceNames_)
{
    // runQuery the new query text on these workset of top-ranked documents
    vector<pair<string, vector<pair<string, string> > > > candConcepts_;
    vector<pair<string, string> > tmp = {make_pair(conceptSty, conceptStr)};
    candConcepts_ = {make_pair(qId, tmp )};
    oneResourceConceptsParams.oneResourceConcepts = candConcepts_;

    vector<pair<string, string> > queriesText = queryReformulator->testOneConceptAddition2OneQuery(conceptSty, conceptStr, qId, resourceNames_);

    std::vector< indri::api::ScoredExtentResult > results_;
    if(wsuIr::expander::Utility::runQuery_results_isExist(queriesText, topDocIds))
    {
        results_ = wsuIr::expander::Utility::runQuery_results_get(queriesText, topDocIds);
    }
    else
    {
        results_ = env.runQuery(queriesText.front().second, topDocIds, topDocIds.size());
        wsuIr::expander::Utility::runQuery_results_store(queriesText, topDocIds, results_);
    }

    if(results_.size() != topDocIds.size())
        throw runtime_error("RunQUery.cpp: some of top-ranked documents are not scored");

    double conceptScore = 0;
    for(auto r: results_)
    {
        conceptScore += r.score;
    }

    return conceptScore;
}
开发者ID:teanalab,项目名称:MRF-L,代码行数:35,代码来源:ConceptSelectorFuns.cpp

示例2: open_indexes

static void open_indexes( indri::api::QueryEnvironment& environment, 
                          indri::api::Parameters& param ) {
  if( param.exists( "index" ) ) {
    indri::api::Parameters indexes = param["index"];
    for( unsigned int i=0; i < indexes.size(); i++ ) {
      environment.addIndex( std::string(indexes[i]) );
    }
  }
  if( param.exists( "server" ) ) {
    indri::api::Parameters servers = param["server"];
    for( unsigned int i=0; i < servers.size(); i++ ) {
      environment.addServer( std::string(servers[i]) );
    }
  }
  std::vector<std::string> smoothingRules;
  if( copy_parameters_to_string_vector( smoothingRules, param, "rule" ) )
    environment.setScoringRules( smoothingRules );
}
开发者ID:blaze3j,项目名称:DocHunt,代码行数:18,代码来源:clarity.cpp

示例3: clarity

// how to just compute the clarity score without printing out the terms.
static double clarity( const std::string& query, 
                       indri::api::QueryEnvironment & env, 
                       const std::vector<indri::query::RelevanceModel::Gram*>& grams, int numTerms ) {

  int count = 0;
  double sum=0, ln_Pr=0;
  for( size_t j=0; j< numTerms && j < grams.size(); j++ ) {
    std::string t = grams[j]->terms[0];
    count++;
    // query-clarity = SUM_w{P(w|Q)*log(P(w|Q)/P(w))}
    // P(w)=cf(w)/|C|
    // the relevance model uses stemmed terms, so use stemCount
    double pw = ((double)env.stemCount(t)/(double)env.termCount());
    // P(w|Q) is a prob computed by any model, e.g. relevance models
    double pwq = grams[j]->weight;
    sum += pwq;    
    ln_Pr += (pwq)*log(pwq/pw);
  }
  return (ln_Pr/(sum ? sum : 1.0)/log(2.0));
}
开发者ID:blaze3j,项目名称:DocHunt,代码行数:21,代码来源:clarity.cpp

示例4:

void matIR::QueryStats::init(const std::string& query, indri::api::QueryEnvironment& environment)
{

    // Extract only the terms from the query and add to the vector
    indri::api::QueryParserWrapper *parser = indri::api::QueryParserFactory::get(query, "indri");
    indri::lang::ScoredExtentNode* rootNode = parser->query();
    indri::lang::RawScorerNodeExtractor extractor;
    rootNode->walk(extractor);
    std::vector<indri::lang::RawScorerNode*>& scorerNodes = extractor.getScorerNodes();

    for (int i = 0; i < scorerNodes.size(); i++){
        std::string qterm = environment.stemTerm(scorerNodes[i]->queryText());
        queryString.push_back(qterm);
        if(environment.stemCount(qterm) == 0)
            continue;
        if( _queryTokens.find(qterm) == _queryTokens.end() )
            _queryTokens.insert(make_pair( qterm, 1));
        else
            _queryTokens[qterm] += 1;
    }

    // Initialize vectors


    _query_collectionFrequency.set_size(_queryTokens.size());
    _query_documentFrequency.set_size(_queryTokens.size());



    // Now obtain the statistics
    int i = 0;
    map<std::string, int>::const_iterator iter;
    for (iter=_queryTokens.begin(); iter != _queryTokens.end(); ++iter) {
        std::string stem = environment.stemTerm(iter->first);
        _query_collectionFrequency(i) = (double) environment.stemCount(stem);
        _query_documentFrequency(i) = (double) environment.documentStemCount(stem);
        ++i;

    }
}
开发者ID:semanticpc,项目名称:matIR,代码行数:40,代码来源:QueryStats.cpp

示例5: updateQueryDetails

    void updateQueryDetails(indri::api::QueryEnvironment& environment,
                            Results& resultData,
                            string query){

        indri::api::QueryParserWrapper *parser = indri::api::QueryParserFactory::get(query, "indri");
        indri::lang::ScoredExtentNode* rootNode = parser->query();
        indri::lang::RawScorerNodeExtractor extractor;
        rootNode->walk(extractor);
        vector<indri::lang::RawScorerNode*>& scorerNodes = extractor.getScorerNodes();

        for (int i = 0; i < scorerNodes.size(); i++){
            string qterm = environment.stemTerm(scorerNodes[i]->queryText());
            if(environment.stemCount(qterm) == 0)
                continue;
            if( resultData.queryStems.find(qterm) == resultData.queryStems.end() ){
                resultData.queryStems.insert(make_pair( qterm, 1));
                resultData.queryStemOrder.push_back(qterm);
            }
            else
                resultData.queryStems[qterm] += 1;
        }
    }
开发者ID:semanticpc,项目名称:indriR,代码行数:22,代码来源:indriRetOld.cpp

示例6: max

multimap<double, pair<string, string> > indri::query::ConceptSelectorFuns::normConceptScorePrf(
                                                            vector<pair<string, string> > concatenatedGoodConcepts,
                                                            string qId,
                                                            vector<string> topDocsNames,
                                                            indri::api::QueryEnvironment & env,
                                                            indri::query::QueryReformulator * queryReformulator,
                                                            vector<string> resourceNames_)
{
    std::vector<lemur::api::DOCID_T> topDocIds = env.documentIDsFromMetadata("docno", topDocsNames);
    multimap<double, pair<string, string>, std::greater<double> > scoredConcepts_;
    for(auto concStyStrPair: concatenatedGoodConcepts) // for each each extracted concept
    {
        string conceptSty = concStyStrPair.first;
        string conceptStr = concStyStrPair.second;
        double conceptScore = indri::query::ConceptSelectorFuns::findConceptScorePrf(conceptSty,
                                                                                        conceptStr,
                                                                                        qId,
                                                                                        topDocIds,
                                                                                        env,
                                                                                        queryReformulator,
                                                                                        resourceNames_);

        scoredConcepts_.insert(make_pair(conceptScore, make_pair(conceptSty, conceptStr)));
        cout << "indri::query::ConceptSelectorFuns::normConceptScorePrf: conceptScore = " << conceptStr << " -> " << conceptScore << endl;
    }

    double max_sc = 0;
    double min_sc = std::numeric_limits<double>::infinity();
    for (auto sc: scoredConcepts_)
    {
        max_sc = max(max_sc, sc.first);
        min_sc = min(min_sc, sc.first);
    }
    cout << "indri::query::ConceptSelectorFuns::normConceptScorePrf: min_sc, max_sc: " << min_sc << ", " << max_sc << endl;

    // min-max normalize socores in scoredConcepts_
    multimap<double, pair<string, string> > scoredConcepts_norm;
    for (auto itSc = scoredConcepts_.begin(); itSc != scoredConcepts_.end(); itSc++)
    {
        double conceptScore = (itSc->first- min_sc)/(max_sc- min_sc);
        scoredConcepts_norm.insert(make_pair(conceptScore, make_pair((itSc->second).first, (itSc->second).second)));
        cout << "indri::query::ConceptSelectorFuns::normConceptScorePrf: scoredConcepts_norm: scoredConcepts_ = " << itSc->first  << endl;
        cout << "indri::query::ConceptSelectorFuns::normConceptScorePrf: scoredConcepts_norm: conceptScore = " << conceptScore << " =  (" << itSc->first << " - " << min_sc << " )/( " << max_sc << " - " << min_sc << " )" << endl;
    }

    return scoredConcepts_norm;
}
开发者ID:teanalab,项目名称:MRF-L,代码行数:47,代码来源:ConceptSelectorFuns.cpp

示例7: convert_docnoscore_to_binary

void convert_docnoscore_to_binary( indri::file::File& outfile, const std::string& infile, indri::api::QueryEnvironment& env ) {
  std::ifstream in;
  std::string docnoName = "docno";
  
  indri::file::SequentialWriteBuffer* outb = new indri::file::SequentialWriteBuffer( outfile, 1024*1024 );
  in.open( infile.c_str(), std::ifstream::in );
  
  while( !in.eof() ) {
    std::string docno;
    double score;
    
    in >> docno
       >> score;

    if( in.eof() )
      break;
       
    std::cout << "looking up: " << docno << " " << score << std::endl;
       
    std::vector<std::string> docnoValues;
    docnoValues.push_back( docno );
       
    std::vector<lemur::api::DOCID_T> result = env.documentIDsFromMetadata( docnoName, docnoValues );
    
    if( result.size() == 0 ) {
      //      LEMUR_THROW( LEMUR_IO_ERROR, "No document exists with docno: " + docno );
      continue; // allow entries that don't exist and ignore silently.
    }
    
    int document = result[0];
    std::cout << document << std::endl;
      
    outb->write( (const void*) &document, sizeof(UINT32) );
    outb->write( (const void*) &score, sizeof(double) );
  }
  
  outb->flush();
  delete outb;
  in.close();
}
开发者ID:blaze3j,项目名称:DocHunt,代码行数:40,代码来源:makeprior.cpp

示例8: generateResults

    SEXP generateResults(string _qno, string _query, int _documentLimit, bool stats) {

        resultsData = resultsData_nullCopy;
        documentIDs.clear();
        scores.clear();
        extDocIDs.clear();
        terms.clear();
        _gramTable.clear();
        results.clear();
        qno = _qno;
        query = _query;



        documentLimit = _documentLimit;
        qa = environment.runAnnotatedQuery(query, _documentLimit);


        results = qa->getResults();
        _logtoposterior(results);

        // Extract Documents
        for (size_t i = 0; i < results.size(); i++){
            documentIDs.push_back(results[i].document);
            scores.push_back(results[i].score);
        }
        extDocIDs = environment.documentMetadata(documentIDs, "docno");
        if(stats){
            updateQueryDetails(environment, resultsData, query);
            countGrams();
            buildStats();
        }

        return Rcpp::wrap(true);
    }
开发者ID:semanticpc,项目名称:indriR,代码行数:35,代码来源:indriRetOld.cpp

示例9: buildStats

    void buildStats() {
        HGram::iterator iter;
        resultsData.tfMatrix = arma::zeros<arma::mat>(results.size(),
                                                      _gramTable.size());
        // Initialize the
        resultsData.dfVector.set_size(_gramTable.size());
        resultsData.ctfVector.set_size(_gramTable.size());

        int tmpTermID = -1;
        for( iter = _gramTable.begin(); iter != _gramTable.end(); iter++ ) {
            double gramCount = 0;
            ++tmpTermID;
            Gram* gram = *iter->first;
            GramCounts* gramCounts = *iter->second;
            gram->internal_termID = tmpTermID;
            terms.push_back(gram->term);
             if( resultsData.queryStems.find(gram->term) != resultsData.queryStems.end() )
                resultsData.queryStemIndex[gram->term] = tmpTermID;

            resultsData.ctfVector(tmpTermID) = environment.stemCount(gram->term);
            resultsData.dfVector(tmpTermID) =  environment.documentStemCount(gram->term);
            size_t c, r;
            for( r = 0, c = 0; r < results.size() && c < gramCounts->counts.size(); r++ ) {
                if( gramCounts->counts[c].first == r ) {
                    resultsData.tfMatrix(r, tmpTermID) = gramCounts->counts[c].second;
                    c++;
                }
            }
        }
        _gramTable.clear();
    }
开发者ID:semanticpc,项目名称:indriR,代码行数:31,代码来源:indriRetOld.cpp

示例10: runQuery

    SEXP runQuery(string _qno, string _query, int _documentLimit, string _runid="default"){
        indri::api::QueryAnnotation* qa;
        qa = environment.runAnnotatedQuery(_query, _documentLimit);

        std::vector<indri::api::ScoredExtentResult> results = qa->getResults();
        //_logtoposterior(results);

        // Extract Documents
        std::vector<lemur::api::DOCID_T> documentIDs;
        std::vector<double> scores;
        for (size_t i = 0; i < results.size(); i++){
            documentIDs.push_back(results[i].document);
            scores.push_back(results[i].score);
        }
        vector<string> res_qno;
        vector<string> res_q0;
        vector<string> res_runid;

        int documentLimit = _documentLimit;

        for(int i=0; i < documentLimit; i++){
            res_qno.push_back(qno);
            res_q0.push_back("Q0");
            res_runid.push_back(_runid);

        }
        std::vector<string> extDocIDs = environment.documentMetadata(documentIDs, "docno");
        return Rcpp::DataFrame::create( Named("topic")= _qno,
                Named("q0")= res_q0, Named("docID")=  wrap(extDocIDs),
                Named("rank")= seq( 1, documentLimit ),
                Named("score")= wrap(scores),
                Named("runID")= res_runid);
        }
开发者ID:semanticpc,项目名称:indriR,代码行数:33,代码来源:indriRetOld.cpp

示例11: Index

 Index(string _indexPath, bool _server) {
     try {
         if (_server) environment.addServer(_indexPath);
         else environment.addIndex(_indexPath);
     } catch (std::exception &ex) {
         forward_exception_to_r(ex);
     } catch (lemur::api::Exception& e) {
         ::Rf_error("Unable to open index");
     } catch (...) {
         ::Rf_error("Caught unhandled exception");
     }
 }
开发者ID:semanticpc,项目名称:indriR,代码行数:12,代码来源:indriRetOld.cpp

示例12: getDocTermMatrix

    SEXP getDocTermMatrix(string termWeighting){
        Rcpp::List dimnms = Rcpp::List::create(extDocIDs, terms);
        if(termWeighting == "tf"){
            NumericMatrix d = Rcpp::wrap(resultsData.tfMatrix);
            d.attr("dimnames") = dimnms;
            return d;
        }else if(termWeighting == "tf_normalized"){
            arma::mat tfnorm = resultsData.tfMatrix;
            arma::rowvec docLen = arma::sum(tfnorm, 0);
            tfnorm.each_row() /= docLen;
            NumericMatrix d = Rcpp::wrap(tfnorm);
            d.attr("dimnames") = dimnms;
            return d;
        }else if(termWeighting == "tfidf"){
            arma::mat tfidfMat = resultsData.tfMatrix;
            arma::vec idf = arma::log((environment.documentCount() + 1) /
                    (resultsData.dfVector + 0.5));
            tfidfMat.each_row() %= idf.t();
            NumericMatrix d = Rcpp::wrap(tfidfMat);
            d.attr("dimnames") = dimnms;
            return d;
        }else if(termWeighting == "idf"){

        }


    }
开发者ID:semanticpc,项目名称:indriR,代码行数:27,代码来源:indriRetOld.cpp

示例13: _runQuery

  // Runs the query, expanding it if necessary.  Will print output as well if verbose is on.
  void _runQuery( std::stringstream& output, const std::string& query,
                  const std::string &queryType, const std::vector<std::string> &workingSet, std::vector<std::string> relFBDocs ) {
    try {
      if( _printQuery ) output << "# query: " << query << std::endl;
      std::vector<lemur::api::DOCID_T> docids;;
      if (workingSet.size() > 0) 
        docids = _environment.documentIDsFromMetadata("docno", workingSet);

      if (relFBDocs.size() == 0) {
          if( _printSnippets ) {
            if (workingSet.size() > 0) 
              _annotation = _environment.runAnnotatedQuery( query, docids, _initialRequested, queryType ); 
            else
              _annotation = _environment.runAnnotatedQuery( query, _initialRequested );
            _results = _annotation->getResults();
          } else {
            if (workingSet.size() > 0)
              _results = _environment.runQuery( query, docids, _initialRequested, queryType );
            else
              _results = _environment.runQuery( query, _initialRequested, queryType );
          }
      }
      
      if( _expander ) {
        std::vector<indri::api::ScoredExtentResult> fbDocs;
        if (relFBDocs.size() > 0) {
          docids = _environment.documentIDsFromMetadata("docno", relFBDocs);
          for (size_t i = 0; i < docids.size(); i++) {
            indri::api::ScoredExtentResult r(0.0, docids[i]);
            fbDocs.push_back(r);
          }
        }
        std::string expandedQuery;
        if (relFBDocs.size() != 0)
          expandedQuery = _expander->expand( query, fbDocs );
        else
          expandedQuery = _expander->expand( query, _results );
        if( _printQuery ) output << "# expanded: " << expandedQuery << std::endl;
        if (workingSet.size() > 0) {
          docids = _environment.documentIDsFromMetadata("docno", workingSet);
          _results = _environment.runQuery( expandedQuery, docids, _requested, queryType );
        } else {
          _results = _environment.runQuery( expandedQuery, _requested, queryType );
        }
      }
    }
    catch( lemur::api::Exception& e )
    {
      _results.clear();
      LEMUR_RETHROW(e, "QueryThread::_runQuery Exception");
    }
  }
开发者ID:wangxuemin,项目名称:coding,代码行数:53,代码来源:IndriRunQuery.cpp

示例14: countGrams

    void countGrams() {
        std::vector<indri::api::DocumentVector*> vectors =
                environment.documentVectors( documentIDs );
        // for each query result
        for( size_t i=0; i< results.size(); i++ ) {
            // run through the text, extracting n-grams
            indri::api::ScoredExtentResult& result = results[i];
            indri::api::DocumentVector* v = vectors[i];
            std::vector<int>& positions = v->positions();
            std::vector<std::string>& stems = v->stems();
            std::vector< indri::api::DocumentVector::Field >& fields = v->fields();
            if (result.end == 0) result.end = positions.size();
            // for each word position in the text
            for( int j = result.begin; j < result.end; j++ ) {
                //int maxGram = std::min( _maxGrams, result.end - j );

                GramCounts* newCounts = new GramCounts;
                bool containsOOV = false;

                // build the gram

                if( positions[ j ] == 0 || (! isValid(stems[ positions[ j ] ])) ) {
                    containsOOV = true;
                    continue;
                }

                newCounts->gram.term =  stems[ positions[ j ] ] ;
                if( containsOOV ) {
                    // if this contanied OOV, all larger n-grams
                    // starting at this point also will
                    delete newCounts;
                    break;
                }

                GramCounts** gramCounts = 0;
                gramCounts = _gramTable.find( &newCounts->gram );
                if( gramCounts == 0 ) {
                    _gramTable.insert( &newCounts->gram, newCounts );
                    gramCounts = &newCounts;
                } else {
                    delete newCounts;
                }
                if( (*gramCounts)->counts.size() && (*gramCounts)->counts.back().first == i ) {
                    // we already have some counts going for this query result, so just add this one
                    (*gramCounts)->counts.back().second++;
                } else {
                    // no counts yet in this document, so add an entry
                    (*gramCounts)->counts.push_back( std::make_pair( i, 1 ) );
                }
            }
        }
        for (unsigned int i = 0; i < vectors.size(); i++)
            delete vectors[i];
    }
开发者ID:semanticpc,项目名称:indriR,代码行数:54,代码来源:indriRetOld.cpp

示例15: generateSnippets

    SEXP generateSnippets(bool html){
        vector<string> snippetString;
        vector< indri::api::ParsedDocument* > pdocuments = environment.documents(documentIDs);
        indri::api::SnippetBuilder sp(html);

        for( size_t row=0; row < documentIDs.size(); row++ )
            snippetString.push_back(sp.build(documentIDs[row], pdocuments[row], qa));
        CharacterVector c = wrap(snippetString);
        c.attr("names") = extDocIDs;
        return c;
    }
开发者ID:semanticpc,项目名称:indriR,代码行数:11,代码来源:indriRetOld.cpp


注:本文中的indri::api::QueryEnvironment类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。