本文整理汇总了Java中org.apache.lucene.index.MultiFields类的典型用法代码示例。如果您正苦于以下问题:Java MultiFields类的具体用法?Java MultiFields怎么用?Java MultiFields使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
MultiFields类属于org.apache.lucene.index包,在下文中一共展示了MultiFields类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testBuildWordScorer
import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
/**
* Test the WordScorer emitted by the smoothing model
*/
public void testBuildWordScorer() throws IOException {
SmoothingModel testModel = createTestModel();
Map<String, Analyzer> mapping = new HashMap<>();
mapping.put("field", new WhitespaceAnalyzer());
PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(), mapping);
IndexWriter writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(wrapper));
Document doc = new Document();
doc.add(new Field("field", "someText", TextField.TYPE_NOT_STORED));
writer.addDocument(doc);
DirectoryReader ir = DirectoryReader.open(writer);
WordScorer wordScorer = testModel.buildWordScorerFactory().newScorer(ir, MultiFields.getTerms(ir, "field"), "field", 0.9d,
BytesRefs.toBytesRef(" "));
assertWordScorer(wordScorer, testModel);
}
示例2: QueryAutoStopWordAnalyzer
import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
/**
* Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
* given selection of fields from terms with a document frequency greater than
* the given maxDocFreq
*
* @param delegate Analyzer whose TokenStream will be filtered
* @param indexReader IndexReader to identify the stopwords from
* @param fields Selection of fields to calculate stopwords for
* @param maxDocFreq Document frequency terms should be above in order to be stopwords
* @throws IOException Can be thrown while reading from the IndexReader
*/
public QueryAutoStopWordAnalyzer(
Analyzer delegate,
IndexReader indexReader,
Collection<String> fields,
int maxDocFreq) throws IOException {
super(delegate.getReuseStrategy());
this.delegate = delegate;
for (String field : fields) {
Set<String> stopWords = new HashSet<>();
Terms terms = MultiFields.getTerms(indexReader, field);
CharsRefBuilder spare = new CharsRefBuilder();
if (terms != null) {
TermsEnum te = terms.iterator(null);
BytesRef text;
while ((text = te.next()) != null) {
if (te.docFreq() > maxDocFreq) {
spare.copyUTF8Bytes(text);
stopWords.add(spare.toString());
}
}
}
stopWordsPerField.put(field, stopWords);
}
}
示例3: visitMatchingTerms
import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
@Override
public void visitMatchingTerms(
IndexReader reader,
String fieldName,
MatchingTermVisitor mtv) throws IOException
{
/* check term presence in index here for symmetry with other SimpleTerm's */
Terms terms = MultiFields.getTerms(reader, fieldName);
if (terms != null) {
TermsEnum termsEnum = terms.iterator(null);
TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getTermText()));
if (status == TermsEnum.SeekStatus.FOUND) {
mtv.visitMatchingTerm(getLuceneTerm(fieldName));
}
}
}
示例4: shardOperation
import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
@Override
protected FieldStatsShardResponse shardOperation(FieldStatsShardRequest request) {
ShardId shardId = request.shardId();
Map<String, FieldStats> fieldStats = new HashMap<>();
IndexService indexServices = indicesService.indexServiceSafe(shardId.getIndex());
MapperService mapperService = indexServices.mapperService();
IndexShard shard = indexServices.shardSafe(shardId.id());
try (Engine.Searcher searcher = shard.acquireSearcher("fieldstats")) {
for (String field : request.getFields()) {
MappedFieldType fieldType = mapperService.fullName(field);
if (fieldType != null) {
IndexReader reader = searcher.reader();
Terms terms = MultiFields.getTerms(reader, field);
if (terms != null) {
fieldStats.put(field, fieldType.stats(terms, reader.maxDoc()));
}
} else {
throw new IllegalArgumentException("field [" + field + "] doesn't exist");
}
}
} catch (IOException e) {
throw ExceptionsHelper.convertToElastic(e);
}
return new FieldStatsShardResponse(shardId, fieldStats);
}
示例5: testSeekCeilNotFound
import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
public void testSeekCeilNotFound() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
// Get empty string in there!
doc.add(newStringField("field", "", Field.Store.NO));
w.addDocument(doc);
for(int i=0;i<36;i++) {
doc = new Document();
String term = "" + (char) (97+i);
String term2 = "a" + (char) (97+i);
doc.add(newTextField("field", term + " " + term2, Field.Store.NO));
w.addDocument(doc);
}
w.forceMerge(1);
IndexReader r = w.getReader();
TermsEnum te = MultiFields.getTerms(r, "field").iterator(null);
assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seekCeil(new BytesRef(new byte[] {0x22})));
assertEquals("a", te.term().utf8ToString());
assertEquals(1L, te.ord());
r.close();
w.close();
dir.close();
}
示例6: assignClass
import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
/**
* {@inheritDoc}
*/
@Override
public ClassificationResult<BytesRef> assignClass(String inputDocument) throws IOException {
if (atomicReader == null) {
throw new IOException("You must first call Classifier#train");
}
double max = - Double.MAX_VALUE;
BytesRef foundClass = new BytesRef();
Terms terms = MultiFields.getTerms(atomicReader, classFieldName);
TermsEnum termsEnum = terms.iterator(null);
BytesRef next;
String[] tokenizedDoc = tokenizeDoc(inputDocument);
while ((next = termsEnum.next()) != null) {
double clVal = calculateLogPrior(next) + calculateLogLikelihood(tokenizedDoc, next);
if (clVal > max) {
max = clVal;
foundClass = BytesRef.deepCopyOf(next);
}
}
double score = 10 / Math.abs(max);
return new ClassificationResult<>(foundClass, score);
}
示例7: assertNormsEquals
import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
/**
* checks that norms are the same across all fields
*/
public void assertNormsEquals(String info, IndexReader leftReader, IndexReader rightReader) throws IOException {
Fields leftFields = MultiFields.getFields(leftReader);
Fields rightFields = MultiFields.getFields(rightReader);
// Fields could be null if there are no postings,
// but then it must be null for both
if (leftFields == null || rightFields == null) {
assertNull(info, leftFields);
assertNull(info, rightFields);
return;
}
for (String field : leftFields) {
NumericDocValues leftNorms = MultiDocValues.getNormValues(leftReader, field);
NumericDocValues rightNorms = MultiDocValues.getNormValues(rightReader, field);
if (leftNorms != null && rightNorms != null) {
assertDocValuesEquals(info, leftReader.maxDoc(), leftNorms, rightNorms);
} else {
assertNull(info, leftNorms);
assertNull(info, rightNorms);
}
}
}
示例8: countTerms
import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
private int countTerms(MultiTermQuery q) throws Exception {
final Terms terms = MultiFields.getTerms(reader, q.getField());
if (terms == null)
return 0;
final TermsEnum termEnum = q.getTermsEnum(terms);
assertNotNull(termEnum);
int count = 0;
BytesRef cur, last = null;
while ((cur = termEnum.next()) != null) {
count++;
if (last != null) {
assertTrue(last.compareTo(cur) < 0);
}
last = BytesRef.deepCopyOf(cur);
}
// LUCENE-3314: the results after next() already returned null are undefined,
// assertNull(termEnum.next());
return count;
}
示例9: testAllDocs
import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
public void testAllDocs() throws Exception {
initializeIndex(new String[]{"A", "B", "C", "D"});
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = newSearcher(reader);
TermRangeQuery query = new TermRangeQuery("content", null, null, true, true);
Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "content");
assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length);
query = new TermRangeQuery("content", null, null, false, false);
assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length);
query = TermRangeQuery.newStringRange("content", "", null, true, false);
assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length);
// and now anothe one
query = TermRangeQuery.newStringRange("content", "B", null, true, false);
assertTrue(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
assertEquals(3, searcher.search(query, null, 1000).scoreDocs.length);
reader.close();
}
示例10: testPrefixTerm
import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
/**
* Tests if a WildcardQuery that has only a trailing * in the term is
* rewritten to a single PrefixQuery. The boost and rewriteMethod should be
* preserved.
*/
public void testPrefixTerm() throws IOException {
Directory indexStore = getIndexStore("field", new String[]{"prefix", "prefixx"});
IndexReader reader = DirectoryReader.open(indexStore);
IndexSearcher searcher = newSearcher(reader);
MultiTermQuery wq = new WildcardQuery(new Term("field", "prefix*"));
assertMatches(searcher, wq, 2);
Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "field");
assertTrue(wq.getTermsEnum(terms) instanceof PrefixTermsEnum);
wq = new WildcardQuery(new Term("field", "*"));
assertMatches(searcher, wq, 2);
assertFalse(wq.getTermsEnum(terms) instanceof PrefixTermsEnum);
assertFalse(wq.getTermsEnum(terms).getClass().getSimpleName().contains("AutomatonTermsEnum"));
reader.close();
indexStore.close();
}
示例11: getFirstMatch
import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
protected int getFirstMatch(IndexReader r, Term t) throws IOException {
Fields fields = MultiFields.getFields(r);
if (fields == null) return -1;
Terms terms = fields.terms(t.field());
if (terms == null) return -1;
BytesRef termBytes = t.bytes();
final TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(termBytes)) {
return -1;
}
DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null, DocsEnum.FLAG_NONE);
int id = docs.nextDoc();
if (id != DocIdSetIterator.NO_MORE_DOCS) {
int next = docs.nextDoc();
assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
}
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
示例12: iterateAllDocs
import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
private DocIdSetIterator iterateAllDocs()
{
IndexReader reader = searcher.getIndexReader();
final Bits liveDocs = MultiFields.getLiveDocs( reader );
final DocIdSetIterator allDocs = DocIdSetIterator.all( reader.maxDoc() );
if ( liveDocs == null )
{
return allDocs;
}
return new FilteredDocIdSetIterator( allDocs )
{
@Override
protected boolean match( int doc )
{
return liveDocs.get( doc );
}
};
}
示例13: LuceneUtils
import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
/**
* @param flagConfig Contains all information necessary for configuring LuceneUtils.
* {@link FlagConfig#luceneindexpath()} must be non-empty.
*/
public LuceneUtils(FlagConfig flagConfig) throws IOException {
if (flagConfig.luceneindexpath().isEmpty()) {
throw new IllegalArgumentException(
"-luceneindexpath is a required argument for initializing LuceneUtils instance.");
}
this.compositeReader = DirectoryReader.open(
FSDirectory.open(FileSystems.getDefault().getPath(flagConfig.luceneindexpath())));
this.leafReader = SlowCompositeReaderWrapper.wrap(compositeReader);
MultiFields.getFields(compositeReader);
this.flagConfig = flagConfig;
if (!flagConfig.stoplistfile().isEmpty())
loadStopWords(flagConfig.stoplistfile());
if (!flagConfig.startlistfile().isEmpty())
loadStartWords(flagConfig.startlistfile());
VerbatimLogger.info("Initialized LuceneUtils from Lucene index in directory: " + flagConfig.luceneindexpath() + "\n");
VerbatimLogger.info("Fields in index are: " + String.join(", ", this.getFieldNames()) + "\n");
}
示例14: testSearchSpeed
import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
private void testSearchSpeed(ArrayList<String> images, final Class featureClass) throws IOException {
parallelIndexer = new ParallelIndexer(8, indexPath, testExtensive, true) {
@Override
public void addBuilders(ChainedDocumentBuilder builder) {
builder.addBuilder(new GenericDocumentBuilder(featureClass, "feature"));
}
};
parallelIndexer.run();
IndexReader reader = DirectoryReader.open(new RAMDirectory(FSDirectory.open(new File(indexPath)), IOContext.READONCE));
Bits liveDocs = MultiFields.getLiveDocs(reader);
double queryCount = 0d;
ImageSearcher searcher = new GenericFastImageSearcher(100, featureClass, "feature");
long ms = System.currentTimeMillis();
for (int i = 0; i < reader.maxDoc(); i++) {
if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it.
String fileName = getIDfromFileName(reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
if (queries.keySet().contains(fileName)) {
queryCount += 1d;
// ok, we've got a query here for a document ...
Document queryDoc = reader.document(i);
ImageSearchHits hits = searcher.search(queryDoc, reader);
}
}
ms = System.currentTimeMillis() - ms;
System.out.printf("%s \t %3.1f \n", featureClass.getName().substring(featureClass.getName().lastIndexOf('.')+1), (double) ms / queryCount);
}
示例15: getIdfs
import org.apache.lucene.index.MultiFields; //导入依赖的package包/类
/**
*
* @param reader
* @return Map of term and its inverse document frequency
*
* @throws IOException
*/
public Map<String, Float> getIdfs(IndexReader reader) throws IOException
{
Fields fields = MultiFields.getFields(reader); //get the fields of the index
for (String field: fields)
{
TermsEnum termEnum = MultiFields.getTerms(reader, field).iterator(null);
BytesRef bytesRef;
while ((bytesRef = termEnum.next()) != null)
{
if (termEnum.seekExact(bytesRef))
{
String term = bytesRef.utf8ToString();
float idf = tfidfSIM.idf( termEnum.docFreq(), reader.numDocs() );
inverseDocFreq.put(term, idf);
System.out.println(term +" idf= "+ idf);
}
}
}
return inverseDocFreq;
}