当前位置: 首页>>代码示例>>Java>>正文


Java MultiFields类代码示例

本文整理汇总了Java中org.apache.lucene.index.MultiFields的典型用法代码示例。如果您正苦于以下问题:Java MultiFields类的具体用法?Java MultiFields怎么用?Java MultiFields使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


MultiFields类属于org.apache.lucene.index包,在下文中一共展示了MultiFields类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testBuildWordScorer

import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
/**
 * Test the WordScorer emitted by the smoothing model
 */
public void testBuildWordScorer() throws IOException {
    SmoothingModel testModel = createTestModel();
    Map<String, Analyzer> mapping = new HashMap<>();
    mapping.put("field", new WhitespaceAnalyzer());
    PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(), mapping);
    IndexWriter writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(wrapper));
    Document doc = new Document();
    doc.add(new Field("field", "someText", TextField.TYPE_NOT_STORED));
    writer.addDocument(doc);
    DirectoryReader ir = DirectoryReader.open(writer);

    WordScorer wordScorer = testModel.buildWordScorerFactory().newScorer(ir, MultiFields.getTerms(ir, "field"), "field", 0.9d,
            BytesRefs.toBytesRef(" "));
    assertWordScorer(wordScorer, testModel);
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:19,代码来源:SmoothingModelTestCase.java

示例2: QueryAutoStopWordAnalyzer

import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
/**
 * Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
 * given selection of fields from terms with a document frequency greater than
 * the given maxDocFreq
 *
 * @param delegate Analyzer whose TokenStream will be filtered
 * @param indexReader IndexReader to identify the stopwords from
 * @param fields Selection of fields to calculate stopwords for
 * @param maxDocFreq Document frequency terms should be above in order to be stopwords
 * @throws IOException Can be thrown while reading from the IndexReader
 */
public QueryAutoStopWordAnalyzer(
    Analyzer delegate,
    IndexReader indexReader,
    Collection<String> fields,
    int maxDocFreq) throws IOException {
  super(delegate.getReuseStrategy());
  this.delegate = delegate;
  
  for (String field : fields) {
    Set<String> stopWords = new HashSet<>();
    Terms terms = MultiFields.getTerms(indexReader, field);
    CharsRefBuilder spare = new CharsRefBuilder();
    if (terms != null) {
      TermsEnum te = terms.iterator(null);
      BytesRef text;
      while ((text = te.next()) != null) {
        if (te.docFreq() > maxDocFreq) {
          spare.copyUTF8Bytes(text);
          stopWords.add(spare.toString());
        }
      }
    }
    stopWordsPerField.put(field, stopWords);
  }
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:37,代码来源:QueryAutoStopWordAnalyzer.java

示例3: visitMatchingTerms

import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  /* check term presence in index here for symmetry with other SimpleTerm's */
  Terms terms = MultiFields.getTerms(reader, fieldName);
  if (terms != null) {
    TermsEnum termsEnum = terms.iterator(null);

    TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getTermText()));
    if (status == TermsEnum.SeekStatus.FOUND) {
      mtv.visitMatchingTerm(getLuceneTerm(fieldName));
    }
  }
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:18,代码来源:SrndTermQuery.java

示例4: shardOperation

import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
@Override
protected FieldStatsShardResponse shardOperation(FieldStatsShardRequest request) {
    ShardId shardId = request.shardId();
    Map<String, FieldStats> fieldStats = new HashMap<>();
    IndexService indexServices = indicesService.indexServiceSafe(shardId.getIndex());
    MapperService mapperService = indexServices.mapperService();
    IndexShard shard = indexServices.shardSafe(shardId.id());
    try (Engine.Searcher searcher = shard.acquireSearcher("fieldstats")) {
        for (String field : request.getFields()) {
            MappedFieldType fieldType = mapperService.fullName(field);
            if (fieldType != null) {
                IndexReader reader = searcher.reader();
                Terms terms = MultiFields.getTerms(reader, field);
                if (terms != null) {
                    fieldStats.put(field, fieldType.stats(terms, reader.maxDoc()));
                }
            } else {
                throw new IllegalArgumentException("field [" + field + "] doesn't exist");
            }
        }
    } catch (IOException e) {
        throw ExceptionsHelper.convertToElastic(e);
    }
    return new FieldStatsShardResponse(shardId, fieldStats);
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:26,代码来源:TransportFieldStatsTransportAction.java

示例5: testSeekCeilNotFound

import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
public void testSeekCeilNotFound() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), dir);
  Document doc = new Document();
  // Get empty string in there!
  doc.add(newStringField("field", "", Field.Store.NO));
  w.addDocument(doc);
  
  for(int i=0;i<36;i++) {
    doc = new Document();
    String term = "" + (char) (97+i);
    String term2 = "a" + (char) (97+i);
    doc.add(newTextField("field", term + " " + term2, Field.Store.NO));
    w.addDocument(doc);
  }

  w.forceMerge(1);
  IndexReader r = w.getReader();
  TermsEnum te = MultiFields.getTerms(r, "field").iterator(null);
  assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seekCeil(new BytesRef(new byte[] {0x22})));
  assertEquals("a", te.term().utf8ToString());
  assertEquals(1L, te.ord());
  r.close();
  w.close();
  dir.close();
}
 
开发者ID:europeana,项目名称:search,代码行数:27,代码来源:TestOrdsBlockTree.java

示例6: assignClass

import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
/**
 * {@inheritDoc}
 */
@Override
public ClassificationResult<BytesRef> assignClass(String inputDocument) throws IOException {
  if (atomicReader == null) {
    throw new IOException("You must first call Classifier#train");
  }
  double max = - Double.MAX_VALUE;
  BytesRef foundClass = new BytesRef();

  Terms terms = MultiFields.getTerms(atomicReader, classFieldName);
  TermsEnum termsEnum = terms.iterator(null);
  BytesRef next;
  String[] tokenizedDoc = tokenizeDoc(inputDocument);
  while ((next = termsEnum.next()) != null) {
    double clVal = calculateLogPrior(next) + calculateLogLikelihood(tokenizedDoc, next);
    if (clVal > max) {
      max = clVal;
      foundClass = BytesRef.deepCopyOf(next);
    }
  }
  double score = 10 / Math.abs(max);
  return new ClassificationResult<>(foundClass, score);
}
 
开发者ID:europeana,项目名称:search,代码行数:26,代码来源:SimpleNaiveBayesClassifier.java

示例7: assertNormsEquals

import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
/** 
 * checks that norms are the same across all fields 
 */
public void assertNormsEquals(String info, IndexReader leftReader, IndexReader rightReader) throws IOException {
  Fields leftFields = MultiFields.getFields(leftReader);
  Fields rightFields = MultiFields.getFields(rightReader);
  // Fields could be null if there are no postings,
  // but then it must be null for both
  if (leftFields == null || rightFields == null) {
    assertNull(info, leftFields);
    assertNull(info, rightFields);
    return;
  }
  
  for (String field : leftFields) {
    NumericDocValues leftNorms = MultiDocValues.getNormValues(leftReader, field);
    NumericDocValues rightNorms = MultiDocValues.getNormValues(rightReader, field);
    if (leftNorms != null && rightNorms != null) {
      assertDocValuesEquals(info, leftReader.maxDoc(), leftNorms, rightNorms);
    } else {
      assertNull(info, leftNorms);
      assertNull(info, rightNorms);
    }
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:26,代码来源:LuceneTestCase.java

示例8: countTerms

import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
private int countTerms(MultiTermQuery q) throws Exception {
  final Terms terms = MultiFields.getTerms(reader, q.getField());
  if (terms == null)
    return 0;
  final TermsEnum termEnum = q.getTermsEnum(terms);
  assertNotNull(termEnum);
  int count = 0;
  BytesRef cur, last = null;
  while ((cur = termEnum.next()) != null) {
    count++;
    if (last != null) {
      assertTrue(last.compareTo(cur) < 0);
    }
    last = BytesRef.deepCopyOf(cur);
  } 
  // LUCENE-3314: the results after next() already returned null are undefined,
  // assertNull(termEnum.next());
  return count;
}
 
开发者ID:europeana,项目名称:search,代码行数:20,代码来源:TestNumericRangeQuery32.java

示例9: testAllDocs

import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
public void testAllDocs() throws Exception {
  initializeIndex(new String[]{"A", "B", "C", "D"});
  IndexReader reader = DirectoryReader.open(dir);
  IndexSearcher searcher = newSearcher(reader);
  TermRangeQuery query = new TermRangeQuery("content", null, null, true, true);
  Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "content");
  assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
  assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length);
  query = new TermRangeQuery("content", null, null, false, false);
  assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
  assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length);
  query = TermRangeQuery.newStringRange("content", "", null, true, false);
  assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
  assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length);
  // and now anothe one
  query = TermRangeQuery.newStringRange("content", "B", null, true, false);
  assertTrue(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
  assertEquals(3, searcher.search(query, null, 1000).scoreDocs.length);
  reader.close();
}
 
开发者ID:europeana,项目名称:search,代码行数:21,代码来源:TestTermRangeQuery.java

示例10: testPrefixTerm

import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
/**
 * Tests if a WildcardQuery that has only a trailing * in the term is
 * rewritten to a single PrefixQuery. The boost and rewriteMethod should be
 * preserved.
 */
public void testPrefixTerm() throws IOException {
  Directory indexStore = getIndexStore("field", new String[]{"prefix", "prefixx"});
  IndexReader reader = DirectoryReader.open(indexStore);
  IndexSearcher searcher = newSearcher(reader);

  MultiTermQuery wq = new WildcardQuery(new Term("field", "prefix*"));
  assertMatches(searcher, wq, 2);
  Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "field");
  assertTrue(wq.getTermsEnum(terms) instanceof PrefixTermsEnum);
  
  wq = new WildcardQuery(new Term("field", "*"));
  assertMatches(searcher, wq, 2);
  assertFalse(wq.getTermsEnum(terms) instanceof PrefixTermsEnum);
  assertFalse(wq.getTermsEnum(terms).getClass().getSimpleName().contains("AutomatonTermsEnum"));
  reader.close();
  indexStore.close();
}
 
开发者ID:europeana,项目名称:search,代码行数:23,代码来源:TestWildcard.java

示例11: getFirstMatch

import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
protected int getFirstMatch(IndexReader r, Term t) throws IOException {
  Fields fields = MultiFields.getFields(r);
  if (fields == null) return -1;
  Terms terms = fields.terms(t.field());
  if (terms == null) return -1;
  BytesRef termBytes = t.bytes();
  final TermsEnum termsEnum = terms.iterator(null);
  if (!termsEnum.seekExact(termBytes)) {
    return -1;
  }
  DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null, DocsEnum.FLAG_NONE);
  int id = docs.nextDoc();
  if (id != DocIdSetIterator.NO_MORE_DOCS) {
    int next = docs.nextDoc();
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
  }
  return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
 
开发者ID:europeana,项目名称:search,代码行数:19,代码来源:TestRTGBase.java

示例12: iterateAllDocs

import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
private DocIdSetIterator iterateAllDocs()
{
    IndexReader reader = searcher.getIndexReader();
    final Bits liveDocs = MultiFields.getLiveDocs( reader );
    final DocIdSetIterator allDocs = DocIdSetIterator.all( reader.maxDoc() );
    if ( liveDocs == null )
    {
        return allDocs;
    }

    return new FilteredDocIdSetIterator( allDocs )
    {
        @Override
        protected boolean match( int doc )
        {
            return liveDocs.get( doc );
        }
    };
}
 
开发者ID:neo4j-contrib,项目名称:neo4j-lucene5-index,代码行数:20,代码来源:LuceneAllDocumentsReader.java

示例13: LuceneUtils

import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
/**
 * @param flagConfig Contains all information necessary for configuring LuceneUtils.
 *        {@link FlagConfig#luceneindexpath()} must be non-empty. 
 */
public LuceneUtils(FlagConfig flagConfig) throws IOException {
  if (flagConfig.luceneindexpath().isEmpty()) {
    throw new IllegalArgumentException(
        "-luceneindexpath is a required argument for initializing LuceneUtils instance.");
  }

  this.compositeReader = DirectoryReader.open(
      FSDirectory.open(FileSystems.getDefault().getPath(flagConfig.luceneindexpath())));
  this.leafReader = SlowCompositeReaderWrapper.wrap(compositeReader);
  MultiFields.getFields(compositeReader);
  this.flagConfig = flagConfig;
  if (!flagConfig.stoplistfile().isEmpty())
    loadStopWords(flagConfig.stoplistfile());

  if (!flagConfig.startlistfile().isEmpty())
    loadStartWords(flagConfig.startlistfile());

  VerbatimLogger.info("Initialized LuceneUtils from Lucene index in directory: " + flagConfig.luceneindexpath() + "\n");
  VerbatimLogger.info("Fields in index are: " + String.join(", ", this.getFieldNames()) + "\n");
}
 
开发者ID:semanticvectors,项目名称:semanticvectors,代码行数:25,代码来源:LuceneUtils.java

示例14: testSearchSpeed

import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
private void testSearchSpeed(ArrayList<String> images, final Class featureClass) throws IOException {
    parallelIndexer = new ParallelIndexer(8, indexPath, testExtensive, true) {
        @Override
        public void addBuilders(ChainedDocumentBuilder builder) {
            builder.addBuilder(new GenericDocumentBuilder(featureClass, "feature"));
        }
    };
    parallelIndexer.run();
    IndexReader reader = DirectoryReader.open(new RAMDirectory(FSDirectory.open(new File(indexPath)), IOContext.READONCE));
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    double queryCount = 0d;
    ImageSearcher searcher = new GenericFastImageSearcher(100, featureClass, "feature");
    long ms = System.currentTimeMillis();
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it.
        String fileName = getIDfromFileName(reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
        if (queries.keySet().contains(fileName)) {
            queryCount += 1d;
            // ok, we've got a query here for a document ...
            Document queryDoc = reader.document(i);
            ImageSearchHits hits = searcher.search(queryDoc, reader);
        }
    }
    ms = System.currentTimeMillis() - ms;
    System.out.printf("%s \t %3.1f \n", featureClass.getName().substring(featureClass.getName().lastIndexOf('.')+1), (double) ms / queryCount);
}
 
开发者ID:fish2000,项目名称:lire,代码行数:27,代码来源:TestUCID.java

示例15: getIdfs

import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
/**
 * 
 * @param reader
 * @return Map of term and its inverse document frequency
 * 
 * @throws IOException
 */
public Map<String, Float> getIdfs(IndexReader reader) throws IOException
{
     Fields fields = MultiFields.getFields(reader); //get the fields of the index 
  
     for (String field: fields) 
     {	 
         TermsEnum termEnum = MultiFields.getTerms(reader, field).iterator(null);
    
         BytesRef bytesRef;
         while ((bytesRef = termEnum.next()) != null) 
         {
             if (termEnum.seekExact(bytesRef)) 
             {
            	 String term = bytesRef.utf8ToString(); 
            	 float idf = tfidfSIM.idf( termEnum.docFreq(), reader.numDocs() );
            	 inverseDocFreq.put(term, idf);    
            	 System.out.println(term +" idf= "+ idf);
             }
         }
     }
 
     return inverseDocFreq;
}
 
开发者ID:usc-isi-i2,项目名称:eswc-2015-semantic-typing,代码行数:31,代码来源:TfIdfSearcher.java


注:本文中的org.apache.lucene.index.MultiFields类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。