当前位置: 首页>>代码示例>>Java>>正文


Java MultiFields.getFields方法代码示例

本文整理汇总了Java中org.apache.lucene.index.MultiFields.getFields方法的典型用法代码示例。如果您正苦于以下问题:Java MultiFields.getFields方法的具体用法?Java MultiFields.getFields怎么用?Java MultiFields.getFields使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.index.MultiFields的用法示例。


在下文中一共展示了MultiFields.getFields方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: assertNormsEquals

import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
/** 
 * checks that norms are the same across all fields 
 */
public void assertNormsEquals(String info, IndexReader leftReader, IndexReader rightReader) throws IOException {
  Fields leftFields = MultiFields.getFields(leftReader);
  Fields rightFields = MultiFields.getFields(rightReader);
  // Fields could be null if there are no postings,
  // but then it must be null for both
  if (leftFields == null || rightFields == null) {
    assertNull(info, leftFields);
    assertNull(info, rightFields);
    return;
  }
  
  for (String field : leftFields) {
    NumericDocValues leftNorms = MultiDocValues.getNormValues(leftReader, field);
    NumericDocValues rightNorms = MultiDocValues.getNormValues(rightReader, field);
    if (leftNorms != null && rightNorms != null) {
      assertDocValuesEquals(info, leftReader.maxDoc(), leftNorms, rightNorms);
    } else {
      assertNull(info, leftNorms);
      assertNull(info, rightNorms);
    }
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:26,代码来源:LuceneTestCase.java

示例2: getFirstMatch

import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
protected int getFirstMatch(IndexReader r, Term t) throws IOException {
  Fields fields = MultiFields.getFields(r);
  if (fields == null) return -1;
  Terms terms = fields.terms(t.field());
  if (terms == null) return -1;
  BytesRef termBytes = t.bytes();
  final TermsEnum termsEnum = terms.iterator(null);
  if (!termsEnum.seekExact(termBytes)) {
    return -1;
  }
  DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null, DocsEnum.FLAG_NONE);
  int id = docs.nextDoc();
  if (id != DocIdSetIterator.NO_MORE_DOCS) {
    int next = docs.nextDoc();
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
  }
  return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
 
开发者ID:europeana,项目名称:search,代码行数:19,代码来源:TestRTGBase.java

示例3: LuceneUtils

import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
/**
 * @param flagConfig Contains all information necessary for configuring LuceneUtils.
 *        {@link FlagConfig#luceneindexpath()} must be non-empty. 
 */
public LuceneUtils(FlagConfig flagConfig) throws IOException {
  if (flagConfig.luceneindexpath().isEmpty()) {
    throw new IllegalArgumentException(
        "-luceneindexpath is a required argument for initializing LuceneUtils instance.");
  }

  this.compositeReader = DirectoryReader.open(
      FSDirectory.open(FileSystems.getDefault().getPath(flagConfig.luceneindexpath())));
  this.leafReader = SlowCompositeReaderWrapper.wrap(compositeReader);
  MultiFields.getFields(compositeReader);
  this.flagConfig = flagConfig;
  if (!flagConfig.stoplistfile().isEmpty())
    loadStopWords(flagConfig.stoplistfile());

  if (!flagConfig.startlistfile().isEmpty())
    loadStartWords(flagConfig.startlistfile());

  VerbatimLogger.info("Initialized LuceneUtils from Lucene index in directory: " + flagConfig.luceneindexpath() + "\n");
  VerbatimLogger.info("Fields in index are: " + String.join(", ", this.getFieldNames()) + "\n");
}
 
开发者ID:semanticvectors,项目名称:semanticvectors,代码行数:25,代码来源:LuceneUtils.java

示例4: getIdfs

import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
/**
 * 
 * @param reader
 * @return Map of term and its inverse document frequency
 * 
 * @throws IOException
 */
public Map<String, Float> getIdfs(IndexReader reader) throws IOException
{
     Fields fields = MultiFields.getFields(reader); //get the fields of the index 
  
     for (String field: fields) 
     {	 
         TermsEnum termEnum = MultiFields.getTerms(reader, field).iterator(null);
    
         BytesRef bytesRef;
         while ((bytesRef = termEnum.next()) != null) 
         {
             if (termEnum.seekExact(bytesRef)) 
             {
            	 String term = bytesRef.utf8ToString(); 
            	 float idf = tfidfSIM.idf( termEnum.docFreq(), reader.numDocs() );
            	 inverseDocFreq.put(term, idf);    
            	 System.out.println(term +" idf= "+ idf);
             }
         }
     }
 
     return inverseDocFreq;
}
 
开发者ID:usc-isi-i2,项目名称:eswc-2015-semantic-typing,代码行数:31,代码来源:TfIdfSearcher.java

示例5: getFirstMatch

import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
protected int getFirstMatch(IndexReader r, Term t) throws IOException {
  Fields fields = MultiFields.getFields(r);
  if (fields == null) return -1;
  Terms terms = fields.terms(t.field());
  if (terms == null) return -1;
  BytesRef termBytes = t.bytes();
  final TermsEnum termsEnum = terms.iterator(null);
  if (!termsEnum.seekExact(termBytes, false)) {
    return -1;
  }
  DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null, DocsEnum.FLAG_NONE);
  int id = docs.nextDoc();
  if (id != DocIdSetIterator.NO_MORE_DOCS) {
    int next = docs.nextDoc();
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
  }
  return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:19,代码来源:TestRTGBase.java

示例6: generateFields

import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
/**
 * Here we could go overboard and use a pre-generated indexed random document for a given Item,
 * but for now we'd prefer to simply return the id as the content of the document and that for
 * every field.
 */
private static Fields generateFields(String[] fieldNames, String text) throws IOException {
    MemoryIndex index = new MemoryIndex();
    for (String fieldName : fieldNames) {
        index.addField(fieldName, text, new WhitespaceAnalyzer());
    }
    return MultiFields.getFields(index.createSearcher().getIndexReader());
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:13,代码来源:MoreLikeThisQueryBuilderTests.java

示例7: dummy

import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
private void dummy() throws IOException {
    Fields fields = MultiFields.getFields(this.reader);
    Terms terms = fields.terms("field");
    TermsEnum iterator = terms.iterator(null);
    BytesRef byteRef = null;
    while ((byteRef = iterator.next()) != null) {
        String term = new String(byteRef.bytes, byteRef.offset,
                byteRef.length);
        Term termInstance = new Term("tokens", term);
        long termFreq = this.reader.totalTermFreq(termInstance);
        this.TermFreqMap.put(term, termFreq);
        System.out.println(termFreq);
    }
}
 
开发者ID:Mondego,项目名称:SourcererCC,代码行数:15,代码来源:TermFreq.java

示例8: searchGenesInVcfFiles

import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
public Set<String> searchGenesInVcfFiles(String gene, List<VcfFile> vcfFiles)
        throws IOException {
    if (CollectionUtils.isEmpty(vcfFiles)) {
        return Collections.emptySet();
    }

    BooleanQuery.Builder builder = new BooleanQuery.Builder();

    PrefixQuery geneIdPrefixQuery = new PrefixQuery(
            new Term(FeatureIndexFields.GENE_ID.getFieldName(), gene.toLowerCase()));
    PrefixQuery geneNamePrefixQuery = new PrefixQuery(
            new Term(FeatureIndexFields.GENE_NAME.getFieldName(), gene.toLowerCase()));
    BooleanQuery.Builder geneIdOrNameQuery = new BooleanQuery.Builder();
    geneIdOrNameQuery.add(geneIdPrefixQuery, BooleanClause.Occur.SHOULD);
    geneIdOrNameQuery.add(geneNamePrefixQuery, BooleanClause.Occur.SHOULD);

    builder.add(geneIdOrNameQuery.build(), BooleanClause.Occur.MUST);
    BooleanQuery query = builder.build();

    Set<String> geneIds = new HashSet<>();

    SimpleFSDirectory[] indexes = fileManager.getIndexesForFiles(vcfFiles);

    try (MultiReader reader = openMultiReader(indexes)) {
        if (reader.numDocs() == 0) {
            return Collections.emptySet();
        }
        if (StringUtils.isEmpty(gene)) {
            Fields fields = MultiFields.getFields(reader);
            fetchTermValues(geneIds, fields, FeatureIndexFields.GENE_ID.getFieldName());
            fetchTermValues(geneIds, fields, FeatureIndexFields.GENE_NAME.getFieldName());
        } else {
            IndexSearcher searcher = new IndexSearcher(reader);
            final TopDocs docs = searcher.search(query, reader.numDocs());
            final ScoreDoc[] hits = docs.scoreDocs;
            geneIds = fetchGeneIds(hits, searcher);
        }

    } catch (IOException e) {
        LOGGER.error(getMessage(MessagesConstants.ERROR_FEATURE_INDEX_SEARCH_FAILED), e);
        return Collections.emptySet();
    }

    return geneIds;
}
 
开发者ID:epam,项目名称:NGB,代码行数:46,代码来源:FeatureIndexDao.java

示例9: testReadTokens

import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
/**
 * Test ReadTokensTask
 */
public void testReadTokens() throws Exception {

  // We will call ReadTokens on this many docs
  final int NUM_DOCS = 20;

  // Read tokens from first NUM_DOCS docs from Reuters and
  // then build index from the same docs
  String algLines1[] = {
    "# ----- properties ",
    "analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer",
    "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
    "docs.file=" + getReuters20LinesFile(),
    "# ----- alg ",
    "{ReadTokens}: " + NUM_DOCS,
    "ResetSystemErase",
    "CreateIndex",
    "{AddDoc}: " + NUM_DOCS,
    "CloseIndex",
  };

  // Run algo
  Benchmark benchmark = execBenchmark(algLines1);

  List<TaskStats> stats = benchmark.getRunData().getPoints().taskStats();

  // Count how many tokens all ReadTokens saw
  int totalTokenCount1 = 0;
  for (final TaskStats stat : stats) {
    if (stat.getTask().getName().equals("ReadTokens")) {
      totalTokenCount1 += stat.getCount();
    }
  }

  // Separately count how many tokens are actually in the index:
  IndexReader reader = DirectoryReader.open(benchmark.getRunData().getDirectory());
  assertEquals(NUM_DOCS, reader.numDocs());

  int totalTokenCount2 = 0;

  Fields fields = MultiFields.getFields(reader);

  for (String fieldName : fields) {
    if (fieldName.equals(DocMaker.ID_FIELD) || fieldName.equals(DocMaker.DATE_MSEC_FIELD) || fieldName.equals(DocMaker.TIME_SEC_FIELD)) {
      continue;
    }
    Terms terms = fields.terms(fieldName);
    if (terms == null) {
      continue;
    }
    TermsEnum termsEnum = terms.iterator(null);
    DocsEnum docs = null;
    while(termsEnum.next() != null) {
      docs = TestUtil.docs(random(), termsEnum, MultiFields.getLiveDocs(reader), docs, DocsEnum.FLAG_FREQS);
      while(docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
        totalTokenCount2 += docs.freq();
      }
    }
  }
  reader.close();

  // Make sure they are the same
  assertEquals(totalTokenCount1, totalTokenCount2);
}
 
开发者ID:europeana,项目名称:search,代码行数:67,代码来源:TestPerfTasksLogic.java

示例10: getTfIdfs

import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
public Map<String, HashMap<Integer, Float>> getTfIdfs() throws IOException
{
  float tf, idf, tfidf_score;
	
   Fields fields = MultiFields.getFields(indexReader); //get the fields of the index 
  
   for (String field: fields) 
   {	 
       TermsEnum termEnum = MultiFields.getTerms(indexReader, field).iterator(null);
  
       BytesRef bytesRef;
       while ((bytesRef = termEnum.next()) != null) 
       {
           if (termEnum.seekExact(bytesRef)) 
           {
           	 String term = bytesRef.utf8ToString(); 
           	 idf = tfidfSIM.idf( termEnum.docFreq(), indexReader.numDocs() );
           	 inverseDocFreq.put(term, idf);    
           	 
           	 System.out.println("Term = "+term);
           	 //System.out.println("idf= "+ idf);
           	 
         		 HashMap<Integer,Float> docTfIdf = new HashMap<Integer,Float>();
           	 
           	 DocsEnum docsEnum = termEnum.docs(liveDocs, null);
              if (docsEnum != null) 
              {
                 int doc; 
                 while((doc = docsEnum.nextDoc())!=DocIdSetIterator.NO_MORE_DOCS) 
                  {
                      tf = tfidfSIM.tf(docsEnum.freq());
                      tfidf_score = tf*idf; 
                      
                      docTfIdf.put(docsEnum.docID(), tfidf_score);

                      System.out.println("doc= "+ docsEnum.docID()+" tfidf_score= " + tfidf_score);
                  }
                 
                 tf_Idf_Weights.put(term, docTfIdf);
              } 
           }
       }
   }
	
	return tf_Idf_Weights;
}
 
开发者ID:usc-isi-i2,项目名称:eswc-2015-semantic-typing,代码行数:47,代码来源:TfIdfSearcher.java

示例11: testReadTokens

import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
/**
 * Test ReadTokensTask
 */
public void testReadTokens() throws Exception {

  // We will call ReadTokens on this many docs
  final int NUM_DOCS = 20;

  // Read tokens from first NUM_DOCS docs from Reuters and
  // then build index from the same docs
  String algLines1[] = {
    "# ----- properties ",
    "analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer",
    "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
    "docs.file=" + getReuters20LinesFile(),
    "# ----- alg ",
    "{ReadTokens}: " + NUM_DOCS,
    "ResetSystemErase",
    "CreateIndex",
    "{AddDoc}: " + NUM_DOCS,
    "CloseIndex",
  };

  // Run algo
  Benchmark benchmark = execBenchmark(algLines1);

  List<TaskStats> stats = benchmark.getRunData().getPoints().taskStats();

  // Count how many tokens all ReadTokens saw
  int totalTokenCount1 = 0;
  for (final TaskStats stat : stats) {
    if (stat.getTask().getName().equals("ReadTokens")) {
      totalTokenCount1 += stat.getCount();
    }
  }

  // Separately count how many tokens are actually in the index:
  IndexReader reader = DirectoryReader.open(benchmark.getRunData().getDirectory());
  assertEquals(NUM_DOCS, reader.numDocs());

  int totalTokenCount2 = 0;

  Fields fields = MultiFields.getFields(reader);

  for (String fieldName : fields) {
    if (fieldName.equals(DocMaker.ID_FIELD) || fieldName.equals(DocMaker.DATE_MSEC_FIELD) || fieldName.equals(DocMaker.TIME_SEC_FIELD)) {
      continue;
    }
    Terms terms = fields.terms(fieldName);
    if (terms == null) {
      continue;
    }
    TermsEnum termsEnum = terms.iterator(null);
    DocsEnum docs = null;
    while(termsEnum.next() != null) {
      docs = _TestUtil.docs(random(), termsEnum, MultiFields.getLiveDocs(reader), docs, DocsEnum.FLAG_FREQS);
      while(docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
        totalTokenCount2 += docs.freq();
      }
    }
  }
  reader.close();

  // Make sure they are the same
  assertEquals(totalTokenCount1, totalTokenCount2);
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:67,代码来源:TestPerfTasksLogic.java


注:本文中的org.apache.lucene.index.MultiFields.getFields方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。