当前位置: 首页>>代码示例>>Java>>正文


Java MultiFields.getTermDocsEnum方法代码示例

本文整理汇总了Java中org.apache.lucene.index.MultiFields.getTermDocsEnum方法的典型用法代码示例。如果您正苦于以下问题:Java MultiFields.getTermDocsEnum方法的具体用法?Java MultiFields.getTermDocsEnum怎么用?Java MultiFields.getTermDocsEnum使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.index.MultiFields的用法示例。


在下文中一共展示了MultiFields.getTermDocsEnum方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getFeatures

import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
static double[] getFeatures(IndexReader ir, String fieldName, BytesRef rawPhrase, int docId, int docSize, int numDocs, boolean inc)
    throws IOException {
  PostingsEnum de = MultiFields.getTermDocsEnum(ir, fieldName, rawPhrase);
  int ret = de.advance(docId);
  if(ret == PostingsEnum.NO_MORE_DOCS){
    throw new RuntimeException("no more docs...");
  }
  else{
    int freq = de.freq();
    if(freq < 2) return null;
    
    PostingsEnum pe = MultiFields.getTermPositionsEnum(ir, fieldName, rawPhrase);
    int ret2 = pe.advance(docId);
    if(ret2 == PostingsEnum.NO_MORE_DOCS){
      throw new RuntimeException("no more docs...");
    }
    else{
      double[] features = new double[2];
      int pos = pe.nextPosition();
      int docFreq = ir.docFreq(new Term(fieldName, rawPhrase));
      if(inc){
        docFreq++;
        numDocs++;
      }
      features[0] = Commons.calcTfIdf(freq, docSize, docFreq, numDocs);
      features[1] = Commons.calcFirstOccurrence(pos, docSize);
      
      return features;
    }
  }
}
 
开发者ID:kojisekig,项目名称:KEA-lucene,代码行数:32,代码来源:KeyphraseExtractor2.java

示例2: getDoc

import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
private Document getDoc(String s, IndexReader reader) throws IOException {
    //TODO: normalize s?
    BytesRef bytesRef = new BytesRef(s);

    PostingsEnum docsEnum = MultiFields.getTermDocsEnum(reader,
            SyntacticSynsConfig.getSynsTargetFieldName(), bytesRef);
    if (docsEnum == null) {
        //couldn't find search term
        return null;
    }

    int i = 0;
    int tmpDocID = docsEnum.nextDoc();
    int docID = -1;
    while (tmpDocID != PostingsEnum.NO_MORE_DOCS) {
        docID = tmpDocID;
        tmpDocID = docsEnum.nextDoc();
        i++;
    }
    if (i > 1) {
        //TODO: log or do something "there should only be one key term!"
    }
    if (docID > -1) {
        System.out.println(docID);
        return reader.document(docID);
    }
    return null;
}
 
开发者ID:tballison,项目名称:lucene-addons,代码行数:29,代码来源:SearchSingleTerm.java

示例3: buildModel

import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
static KEAModel buildModel(Map<String, Set<String>> knownKeyphrases) throws IOException {
  
  Directory indexDir = Commons.getLuceneDirectory(LUCENE_INDEX_DIR);
  IndexReader ir = DirectoryReader.open(indexDir);
  KEAModel model = new KEAModel(ir, knownKeyphrases);

  try{
    for(int n = 1; n <= 3; n++){
      System.out.printf("%s : building %d-gram model\n", new Date().toString(), n);
      String fieldName = Commons.getFieldName(FIELD_NAME, n);
      Terms terms = MultiFields.getTerms(ir, fieldName);
      TermsEnum te = terms.iterator();
      for(BytesRef rawPhrase = te.next(); rawPhrase != null; rawPhrase = te.next()){
        String phrase = rawPhrase.utf8ToString();
        // use KEAStopFilter instead
        //if(stopWords(phrase, n)) continue;

        //System.out.printf("%s ", phrase);
        PostingsEnum de = MultiFields.getTermDocsEnum(ir, fieldName, rawPhrase);
        while(de.nextDoc() != PostingsEnum.NO_MORE_DOCS){
          int docId = de.docID();
          int freq = de.freq();
          // Let's consider only terms that occurs more than one time in the document
          // KEA papers said "To reduce the size of the training set, we discard any phrase that occurs only once in the document."
          if(freq > 1){
            PostingsEnum pe = MultiFields.getTermPositionsEnum(ir, fieldName, rawPhrase);
            int ret = pe.advance(docId);
            if(ret == PostingsEnum.NO_MORE_DOCS){
              System.out.printf("(NO_MORE_DOCS) %d\n", docId);
            }
            else{
              // get first position of the term in the doc (first occurrence)
              int pos = pe.nextPosition();
              model.add(docId, fieldName, phrase, freq, pos);
            }
          }
        }
      }
    }
  }
  finally{
    IOUtils.closeWhileHandlingException(ir);
  }
  
  return model;
}
 
开发者ID:kojisekig,项目名称:KEA-lucene,代码行数:47,代码来源:KEAModelBuilder.java

示例4: getOrdinal

import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
@Override
public int getOrdinal(FacetLabel cp) throws IOException {
  ensureOpen();
  if (cp.length == 0) {
    return ROOT_ORDINAL;
  }

  // First try to find the answer in the LRU cache:
  synchronized (ordinalCache) {
    Integer res = ordinalCache.get(cp);
    if (res != null) {
      if (res.intValue() < indexReader.maxDoc()) {
        // Since the cache is shared with DTR instances allocated from
        // doOpenIfChanged, we need to ensure that the ordinal is one that
        // this DTR instance recognizes.
        return res.intValue();
      } else {
        // if we get here, it means that the category was found in the cache,
        // but is not recognized by this TR instance. Therefore there's no
        // need to continue search for the path on disk, because we won't find
        // it there too.
        return TaxonomyReader.INVALID_ORDINAL;
      }
    }
  }

  // If we're still here, we have a cache miss. We need to fetch the
  // value from disk, and then also put it in the cache:
  int ret = TaxonomyReader.INVALID_ORDINAL;
  DocsEnum docs = MultiFields.getTermDocsEnum(indexReader, null, Consts.FULL, new BytesRef(FacetsConfig.pathToString(cp.components, cp.length)), 0);
  if (docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
    ret = docs.docID();
    
    // we only store the fact that a category exists, not its inexistence.
    // This is required because the caches are shared with new DTR instances
    // that are allocated from doOpenIfChanged. Therefore, if we only store
    // information about found categories, we cannot accidently tell a new
    // generation of DTR that a category does not exist.
    synchronized (ordinalCache) {
      ordinalCache.put(cp, Integer.valueOf(ret));
    }
  }

  return ret;
}
 
开发者ID:europeana,项目名称:search,代码行数:46,代码来源:DirectoryTaxonomyReader.java

示例5: getOrdinal

import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
@Override
public int getOrdinal(CategoryPath cp) throws IOException {
  ensureOpen();
  if (cp.length == 0) {
    return ROOT_ORDINAL;
  }

  // First try to find the answer in the LRU cache:
  synchronized (ordinalCache) {
    Integer res = ordinalCache.get(cp);
    if (res != null) {
      if (res.intValue() < indexReader.maxDoc()) {
        // Since the cache is shared with DTR instances allocated from
        // doOpenIfChanged, we need to ensure that the ordinal is one that
        // this DTR instance recognizes.
        return res.intValue();
      } else {
        // if we get here, it means that the category was found in the cache,
        // but is not recognized by this TR instance. Therefore there's no
        // need to continue search for the path on disk, because we won't find
        // it there too.
        return TaxonomyReader.INVALID_ORDINAL;
      }
    }
  }

  // If we're still here, we have a cache miss. We need to fetch the
  // value from disk, and then also put it in the cache:
  int ret = TaxonomyReader.INVALID_ORDINAL;
  DocsEnum docs = MultiFields.getTermDocsEnum(indexReader, null, Consts.FULL, new BytesRef(cp.toString(delimiter)), 0);
  if (docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
    ret = docs.docID();
    
    // we only store the fact that a category exists, not its inexistence.
    // This is required because the caches are shared with new DTR instances
    // that are allocated from doOpenIfChanged. Therefore, if we only store
    // information about found categories, we cannot accidently tell a new
    // generation of DTR that a category does not exist.
    synchronized (ordinalCache) {
      ordinalCache.put(cp, Integer.valueOf(ret));
    }
  }

  return ret;
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:46,代码来源:DirectoryTaxonomyReader.java


注:本文中的org.apache.lucene.index.MultiFields.getTermDocsEnum方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。