本文整理汇总了Java中org.apache.lucene.analysis.Analyzer类的典型用法代码示例。如果您正苦于以下问题:Java Analyzer类的具体用法?Java Analyzer怎么用?Java Analyzer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Analyzer类属于org.apache.lucene.analysis包,在下文中一共展示了Analyzer类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: generateTermVectors
import org.apache.lucene.analysis.Analyzer; //导入依赖的package包/类
private Fields generateTermVectors(Collection<GetField> getFields, boolean withOffsets, @Nullable Map<String, String> perFieldAnalyzer, Set<String> fields)
throws IOException {
/* store document in memory index */
MemoryIndex index = new MemoryIndex(withOffsets);
for (GetField getField : getFields) {
String field = getField.getName();
if (fields.contains(field) == false) {
// some fields are returned even when not asked for, eg. _timestamp
continue;
}
Analyzer analyzer = getAnalyzerAtField(field, perFieldAnalyzer);
for (Object text : getField.getValues()) {
index.addField(field, text.toString(), analyzer);
}
}
/* and read vectors from it */
return MultiFields.getFields(index.createSearcher().getIndexReader());
}
示例2: createTransactionalIndex
import org.apache.lucene.analysis.Analyzer; //导入依赖的package包/类
/**
* Creates a new {@link Index.Transactional} for given folder with given lucene Analyzer.
* The returned {@link Index} is not cached, next call with the same arguments returns a different instance
* of {@link Index}. The caller is responsible to cache the returned {@link Index}.
* @param cacheFolder the folder in which the index is stored
* @param analyzer the lucene Analyzer used to split fields into tokens.
* @param isWritable <code>false</code> if we will use it as read only
* @return the created {@link Index.Transactional}
* @throws IOException in case of IO problem.
* @since 2.27.1
*/
@NonNull
public static Index.Transactional createTransactionalIndex(final @NonNull File cacheFolder, final @NonNull Analyzer analyzer, boolean isWritable) throws IOException {
Parameters.notNull("cacheFolder", cacheFolder); //NOI18N
Parameters.notNull("analyzer", analyzer); //NOI18N
if (!cacheFolder.canRead()) {
throw new IOException(String.format("Cannot read cache folder: %s.", cacheFolder.getAbsolutePath())); //NOI18N
}
if (isWritable && !cacheFolder.canWrite()) {
throw new IOException(String.format("Cannot write to cache folder: %s.", cacheFolder.getAbsolutePath())); //NOI18N
}
final Index.Transactional index = factory.createIndex(cacheFolder, analyzer);
assert index != null;
indexes.put(cacheFolder, new Ref(cacheFolder,index));
return index;
}
示例3: searchIndex
import org.apache.lucene.analysis.Analyzer; //导入依赖的package包/类
/**
* 查询索引
*
* @param keywords
* @return
* @throws Exception
*/
public List<Document> searchIndex(Integer typeId, String keywords) throws Exception {
// 1.init searcher
Analyzer analyzer = new PaodingAnalyzer();
IndexReader reader = IndexReader.open(typeId == appConfig.getGameTypeId() ? appConfig.getGameIndexDir()
: appConfig.getSoftIndexDir());
BooleanClause.Occur[] flags = new BooleanClause.Occur[] { BooleanClause.Occur.SHOULD,
BooleanClause.Occur.SHOULD };
Query query = MultiFieldQueryParser.parse(keywords, appConfig.getQueryFields(), flags, analyzer);
query = query.rewrite(reader);
// 2.search
List<Document> docs = new ArrayList<Document>();
Hits hits = (typeId == appConfig.getGameTypeId() ? gameSearcher.search(query, Sort.RELEVANCE) : softSearcher
.search(query, Sort.RELEVANCE));// searcher.search(query,
// Sort.RELEVANCE);
for (int i = 0; i < hits.length(); i++) {
docs.add(hits.doc(i));
}
// 3.return
reader.close();
return docs;
}
示例4: DocumentFieldMappers
import org.apache.lucene.analysis.Analyzer; //导入依赖的package包/类
public DocumentFieldMappers(Collection<FieldMapper> mappers, Analyzer defaultIndex, Analyzer defaultSearch, Analyzer defaultSearchQuote) {
Map<String, FieldMapper> fieldMappers = new HashMap<>();
Map<String, Analyzer> indexAnalyzers = new HashMap<>();
Map<String, Analyzer> searchAnalyzers = new HashMap<>();
Map<String, Analyzer> searchQuoteAnalyzers = new HashMap<>();
for (FieldMapper mapper : mappers) {
fieldMappers.put(mapper.name(), mapper);
MappedFieldType fieldType = mapper.fieldType();
put(indexAnalyzers, fieldType.names().indexName(), fieldType.indexAnalyzer(), defaultIndex);
put(searchAnalyzers, fieldType.names().indexName(), fieldType.searchAnalyzer(), defaultSearch);
put(searchQuoteAnalyzers, fieldType.names().indexName(), fieldType.searchQuoteAnalyzer(), defaultSearchQuote);
}
this.fieldMappers = Collections.unmodifiableMap(fieldMappers);
this.indexAnalyzer = new FieldNameAnalyzer(indexAnalyzers);
this.searchAnalyzer = new FieldNameAnalyzer(searchAnalyzers);
this.searchQuoteAnalyzer = new FieldNameAnalyzer(searchQuoteAnalyzers);
}
示例5: testBuildWordScorer
import org.apache.lucene.analysis.Analyzer; //导入依赖的package包/类
/**
* Test the WordScorer emitted by the smoothing model
*/
public void testBuildWordScorer() throws IOException {
SmoothingModel testModel = createTestModel();
Map<String, Analyzer> mapping = new HashMap<>();
mapping.put("field", new WhitespaceAnalyzer());
PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(), mapping);
IndexWriter writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(wrapper));
Document doc = new Document();
doc.add(new Field("field", "someText", TextField.TYPE_NOT_STORED));
writer.addDocument(doc);
DirectoryReader ir = DirectoryReader.open(writer);
WordScorer wordScorer = testModel.buildWordScorerFactory().newScorer(ir, MultiFields.getTerms(ir, "field"), "field", 0.9d,
BytesRefs.toBytesRef(" "));
assertWordScorer(wordScorer, testModel);
}
示例6: findGoodEndForNoHighlightExcerpt
import org.apache.lucene.analysis.Analyzer; //导入依赖的package包/类
private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, Analyzer analyzer, String fieldName, String contents) throws IOException {
try (TokenStream tokenStream = analyzer.tokenStream(fieldName, contents)) {
if (!tokenStream.hasAttribute(OffsetAttribute.class)) {
// Can't split on term boundaries without offsets
return -1;
}
int end = -1;
tokenStream.reset();
while (tokenStream.incrementToken()) {
OffsetAttribute attr = tokenStream.getAttribute(OffsetAttribute.class);
if (attr.endOffset() >= noMatchSize) {
// Jump to the end of this token if it wouldn't put us past the boundary
if (attr.endOffset() == noMatchSize) {
end = noMatchSize;
}
return end;
}
end = attr.endOffset();
}
tokenStream.end();
// We've exhausted the token stream so we should just highlight everything.
return end;
}
}
示例7: testSimple
import org.apache.lucene.analysis.Analyzer; //导入依赖的package包/类
public void testSimple() throws IOException {
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer t = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(t, new UniqueTokenFilter(t));
}
};
TokenStream test = analyzer.tokenStream("test", "this test with test");
test.reset();
CharTermAttribute termAttribute = test.addAttribute(CharTermAttribute.class);
assertThat(test.incrementToken(), equalTo(true));
assertThat(termAttribute.toString(), equalTo("this"));
assertThat(test.incrementToken(), equalTo(true));
assertThat(termAttribute.toString(), equalTo("test"));
assertThat(test.incrementToken(), equalTo(true));
assertThat(termAttribute.toString(), equalTo("with"));
assertThat(test.incrementToken(), equalTo(false));
}
示例8: DirectCandidateGenerator
import org.apache.lucene.analysis.Analyzer; //导入依赖的package包/类
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader,
double nonErrorLikelihood, int numCandidates, Analyzer preFilter, Analyzer postFilter, Terms terms) throws IOException {
if (terms == null) {
throw new IllegalArgumentException("generator field [" + field + "] doesn't exist");
}
this.spellchecker = spellchecker;
this.field = field;
this.numCandidates = numCandidates;
this.suggestMode = suggestMode;
this.reader = reader;
final long dictSize = terms.getSumTotalTermFreq();
this.useTotalTermFrequency = dictSize != -1;
this.dictSize = dictSize == -1 ? reader.maxDoc() : dictSize;
this.preFilter = preFilter;
this.postFilter = postFilter;
this.nonErrorLikelihood = nonErrorLikelihood;
float thresholdFrequency = spellchecker.getThresholdFrequency();
this.frequencyPlateau = thresholdFrequency >= 1.0f ? (int) thresholdFrequency: (int)(dictSize * thresholdFrequency);
termsEnum = terms.iterator();
}
示例9: testCommonTermsQuery
import org.apache.lucene.analysis.Analyzer; //导入依赖的package包/类
public void testCommonTermsQuery() throws IOException {
Directory dir = newDirectory();
String value = "The quick brown fox.";
Analyzer analyzer = new StandardAnalyzer();
IndexReader ir = indexOneDoc(dir, "text", value, analyzer);
CommonTermsQuery query = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, 128);
query.add(new Term("text", "quick"));
query.add(new Term("text", "brown"));
query.add(new Term("text", "fox"));
IndexSearcher searcher = newSearcher(ir);
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertThat(topDocs.totalHits, equalTo(1));
int docId = topDocs.scoreDocs[0].doc;
CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder());
CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer,
passageFormatter, null, value, false);
Snippet[] snippets = highlighter.highlightField("text", query, docId, 5);
assertThat(snippets.length, equalTo(1));
assertThat(snippets[0].getText(), equalTo("The <b>quick</b> <b>brown</b> <b>fox</b>."));
ir.close();
dir.close();
}
示例10: parse
import org.apache.lucene.analysis.Analyzer; //导入依赖的package包/类
/**
* @deprecated Use {@link #parse(String, String[], BooleanClause.Occur[], Analyzer)}
*/
@Deprecated
public static Query parse(Version matchVersion, String query, String[] fields,
BooleanClause.Occur[] flags, Analyzer analyzer) throws ParseException {
if (fields.length != flags.length)
throw new IllegalArgumentException("fields.length != flags.length");
BooleanQuery bQuery = new BooleanQuery();
for (int i = 0; i < fields.length; i++) {
QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer);
Query q = qp.parse(query);
if (q!=null && // q never null, just being defensive
(!(q instanceof BooleanQuery) || ((BooleanQuery)q).getClauses().length>0)) {
bQuery.add(q, flags[i]);
}
}
return bQuery;
}
示例11: assertLuceneAnalyzersAreNotClosed
import org.apache.lucene.analysis.Analyzer; //导入依赖的package包/类
private void assertLuceneAnalyzersAreNotClosed(Map<PreBuiltAnalyzers, List<Version>> loadedAnalyzers) throws IOException {
for (Map.Entry<PreBuiltAnalyzers, List<Version>> preBuiltAnalyzerEntry : loadedAnalyzers.entrySet()) {
for (Version version : preBuiltAnalyzerEntry.getValue()) {
Analyzer analyzer = preBuiltAnalyzerEntry.getKey().getCache().get(version);
try (TokenStream stream = analyzer.tokenStream("foo", "bar")) {
stream.reset();
while (stream.incrementToken()) {
}
stream.end();
}
}
}
}
示例12: indexInit
import org.apache.lucene.analysis.Analyzer; //导入依赖的package包/类
public void indexInit() throws Exception {
Analyzer analyzer = new IKAnalyzer();
// Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);
this.indexSettings = new LuceneIndexSettings(analyzer);
this.indexSettings.createFSDirectory("f:\\file");
this.luceneIndex = new LuceneIndex(this.indexSettings);
this.luceneIndexSearch = new LuceneIndexSearch(indexSettings, new LuceneResultCollector(indexSettings));
}
示例13: tokenStream
import org.apache.lucene.analysis.Analyzer; //导入依赖的package包/类
@Override
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) {
if (fieldType().indexOptions() != IndexOptions.NONE) {
return getCachedStream().setIntValue(number);
}
return null;
}
示例14: getAnalyzer
import org.apache.lucene.analysis.Analyzer; //导入依赖的package包/类
/**
* Returns the {@link Locale} specific {@link Analyzer}.
* If {@link Locale} is null than the {@link #DEFAULT_ANALYZER} will be returned.
* @param locale the {@link Locale} to get the {@link Analyzer}.
* @return the {@link Analyzer}. Null if no {@link Analyzer} was defined for the given {@link Locale}.
*/
Analyzer getAnalyzer(final Locale locale) {
if(locale == null) {
return DEFAULT_ANALYZER;
}
return ANALYZERS.get(locale.getLanguage());
}
示例15: getAnalyzers
import org.apache.lucene.analysis.Analyzer; //导入依赖的package包/类
@Override
public Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> extra = new HashMap<>();
extra.put("jieba_search", JiebaAnalyzerProvider::getJiebaSearchAnalyzerProvider);
extra.put("jieba_index", JiebaAnalyzerProvider::getJiebaIndexAnalyzerProvider);
return extra;
}