本文整理汇总了Java中org.apache.lucene.analysis.core.WhitespaceAnalyzer类的典型用法代码示例。如果您正苦于以下问题:Java WhitespaceAnalyzer类的具体用法?Java WhitespaceAnalyzer怎么用?Java WhitespaceAnalyzer使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
WhitespaceAnalyzer类属于org.apache.lucene.analysis.core包,在下文中一共展示了WhitespaceAnalyzer类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testRamDirectory
import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
public void testRamDirectory() throws IOException {
long start = System.currentTimeMillis();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
.OpenMode.CREATE);
RAMDirectory ramDirectory = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(ramDirectory, indexWriterConfig);
for (int i = 0; i < 10000000; i++) {
indexWriter.addDocument(addDocument(i));
}
indexWriter.commit();
indexWriter.close();
long end = System.currentTimeMillis();
log.error("RamDirectory consumes {}s!", (end - start) / 1000);
start = System.currentTimeMillis();
IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(ramDirectory));
int total = 0;
for (int i = 0; i < 10000000; i++) {
TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
TopDocs search = indexSearcher.search(key1, 10);
total += search.totalHits;
}
System.out.println(total);
end = System.currentTimeMillis();
log.error("RamDirectory search consumes {}ms!", (end - start));
}
示例2: testMMapDirectory
import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
public void testMMapDirectory() throws IOException {
long start = System.currentTimeMillis();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
.OpenMode.CREATE);
FSDirectory open = FSDirectory.open(Paths.get("E:/testlucene"));
IndexWriter indexWriter = new IndexWriter(open, indexWriterConfig);
for (int i = 0; i < 10000000; i++) {
indexWriter.addDocument(addDocument(i));
}
indexWriter.commit();
indexWriter.close();
long end = System.currentTimeMillis();
log.error("MMapDirectory consumes {}s!", (end - start) / 1000);
start = System.currentTimeMillis();
IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(open));
int total = 0;
for (int i = 0; i < 10000000; i++) {
TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
TopDocs search = indexSearcher.search(key1, 10);
total += search.totalHits;
}
System.out.println(total);
end = System.currentTimeMillis();
log.error("MMapDirectory search consumes {}ms!", (end - start));
}
示例3: testCreateMultiDocumentSearcher
import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
public void testCreateMultiDocumentSearcher() throws Exception {
int numDocs = randomIntBetween(2, 8);
List<ParseContext.Document> docs = new ArrayList<>(numDocs);
for (int i = 0; i < numDocs; i++) {
docs.add(new ParseContext.Document());
}
Analyzer analyzer = new WhitespaceAnalyzer();
ParsedDocument parsedDocument = new ParsedDocument(null, null, "_id", "_type", null, docs, null, null, null);
IndexSearcher indexSearcher = PercolateQueryBuilder.createMultiDocumentSearcher(analyzer, parsedDocument);
assertThat(indexSearcher.getIndexReader().numDocs(), equalTo(numDocs));
// ensure that any query get modified so that the nested docs are never included as hits:
Query query = new MatchAllDocsQuery();
BooleanQuery result = (BooleanQuery) indexSearcher.createNormalizedWeight(query, true).getQuery();
assertThat(result.clauses().size(), equalTo(2));
assertThat(result.clauses().get(0).getQuery(), sameInstance(query));
assertThat(result.clauses().get(0).getOccur(), equalTo(BooleanClause.Occur.MUST));
assertThat(result.clauses().get(1).getOccur(), equalTo(BooleanClause.Occur.MUST_NOT));
}
示例4: testBuildWordScorer
import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
/**
* Test the WordScorer emitted by the smoothing model
*/
public void testBuildWordScorer() throws IOException {
SmoothingModel testModel = createTestModel();
Map<String, Analyzer> mapping = new HashMap<>();
mapping.put("field", new WhitespaceAnalyzer());
PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(), mapping);
IndexWriter writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(wrapper));
Document doc = new Document();
doc.add(new Field("field", "someText", TextField.TYPE_NOT_STORED));
writer.addDocument(doc);
DirectoryReader ir = DirectoryReader.open(writer);
WordScorer wordScorer = testModel.buildWordScorerFactory().newScorer(ir, MultiFields.getTerms(ir, "field"), "field", 0.9d,
BytesRefs.toBytesRef(" "));
assertWordScorer(wordScorer, testModel);
}
示例5: loadAnalyzerFactory
import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
@Override
protected void loadAnalyzerFactory(Map<String, AnalyzerInfo> analyzerFactoryMap) {
//extract entire word
registerAnalyzer(analyzerFactoryMap, "keyword", "Keyword Analyzer", new DefaultAnalyzerFactory(KeywordAnalyzer.class));
//lucene StandardAnalyzer
registerAnalyzer(analyzerFactoryMap, "standard", "Standard Analyzer", new DefaultAnalyzerFactory(StandardAnalyzer.class));
registerAnalyzer(analyzerFactoryMap, "ngram", "NGram Analyzer", new DefaultAnalyzerFactory(NGramWordAnalyzer.class));
registerAnalyzer(analyzerFactoryMap, "primary", "Primary Word Analyzer", new DefaultAnalyzerFactory(PrimaryWordAnalyzer.class));
registerAnalyzer(analyzerFactoryMap, "whitespace", "Whitespace Analyzer", new DefaultAnalyzerFactory(WhitespaceAnalyzer.class));
registerAnalyzer(analyzerFactoryMap, "csv", "Comma separated value Analyzer", new DefaultAnalyzerFactory(CSVAnalyzer.class));
registerAnalyzer(analyzerFactoryMap, "autocomplete", "Autocomplete Analyzer", new DefaultAnalyzerFactory(AutocompleteAnalyzer.class));
}
示例6: index
import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
/**
* Index a picture
* @param source
* @param picture_id
* @param conf
* @throws IOException
*/
public static void index(byte[] source, UUID picture_id, IndexWriterConfig conf) throws IOException
{
ByteArrayInputStream in = new ByteArrayInputStream(source);
BufferedImage image = ImageIO.read(in);
// Creating an Lucene IndexWriter
log.debug("Is Lucene configured? " + (conf == null));
if(conf == null) {
conf = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
}
luceneIndexer(image, picture_id, FeatureEnumerate.AutoColorCorrelogram.getText(), DocumentBuilderFactory.getAutoColorCorrelogramDocumentBuilder(), conf);
luceneIndexer(image, picture_id, FeatureEnumerate.CEDD.getText(), DocumentBuilderFactory.getCEDDDocumentBuilder(), conf);
luceneIndexer(image, picture_id, FeatureEnumerate.ColorLayout.getText(), DocumentBuilderFactory.getColorLayoutBuilder(), conf);
luceneIndexer(image, picture_id, FeatureEnumerate.EdgeHistogram.getText(), DocumentBuilderFactory.getEdgeHistogramBuilder(), conf);
luceneIndexer(image, picture_id, FeatureEnumerate.ColorHistogram.getText(), DocumentBuilderFactory.getColorHistogramDocumentBuilder(), conf);
luceneIndexer(image, picture_id, FeatureEnumerate.PHOG.getText(), DocumentBuilderFactory.getPHOGDocumentBuilder(), conf);
}
示例7: deleteFromFeature
import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
private static void deleteFromFeature(UUID pictureId, Term term, String prefix, IndexWriterConfig conf) throws IOException {
File file = getPath(prefix);
// Creating an Lucene IndexWriter
log.debug("Is Lucene configured: " + (conf == null));
if(conf == null) {
conf = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
}
IndexWriter iw = new IndexWriter(FSDirectory.open(file), conf);
iw.deleteDocuments(term);
iw.close();
}
示例8: createTestNormsDocument
import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
private Document createTestNormsDocument(boolean setNormsProp,
boolean normsPropVal, boolean setBodyNormsProp, boolean bodyNormsVal)
throws Exception {
Properties props = new Properties();
// Indexing configuration.
props.setProperty("analyzer", WhitespaceAnalyzer.class.getName());
props.setProperty("directory", "RAMDirectory");
if (setNormsProp) {
props.setProperty("doc.tokenized.norms", Boolean.toString(normsPropVal));
}
if (setBodyNormsProp) {
props.setProperty("doc.body.tokenized.norms", Boolean.toString(bodyNormsVal));
}
// Create PerfRunData
Config config = new Config(props);
DocMaker dm = new DocMaker();
dm.setConfig(config, new OneDocSource());
return dm.makeDocument();
}
示例9: index
import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
/** Build the example index. */
private void index() throws IOException {
IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER,
new WhitespaceAnalyzer()));
// Writes facet ords to a separate directory from the main index
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
Document doc = new Document();
doc.add(new TextField("c", "foo bar", Store.NO));
doc.add(new NumericDocValuesField("popularity", 5L));
doc.add(new FacetField("A", "B"));
indexWriter.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new TextField("c", "foo foo bar", Store.NO));
doc.add(new NumericDocValuesField("popularity", 3L));
doc.add(new FacetField("A", "C"));
indexWriter.addDocument(config.build(taxoWriter, doc));
indexWriter.close();
taxoWriter.close();
}
示例10: index
import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
/** Build the example index. */
public void index() throws IOException {
IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER,
new WhitespaceAnalyzer()));
// Add documents with a fake timestamp, 1000 sec before
// "now", 2000 sec before "now", ...:
for(int i=0;i<100;i++) {
Document doc = new Document();
long then = nowSec - i * 1000;
// Add as doc values field, so we can compute range facets:
doc.add(new NumericDocValuesField("timestamp", then));
// Add as numeric field so we can drill-down:
doc.add(new LongField("timestamp", then, Field.Store.NO));
indexWriter.addDocument(doc);
}
// Open near-real-time searcher
searcher = new IndexSearcher(DirectoryReader.open(indexWriter, true));
indexWriter.close();
}
示例11: testUnicode
import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
@Test
public void testUnicode() {
SpellingQueryConverter converter = new SpellingQueryConverter();
converter.init(new NamedList());
converter.setAnalyzer(new WhitespaceAnalyzer());
// chinese text value
Collection<Token> tokens = converter.convert("text_field:我购买了道具和服装。");
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
tokens = converter.convert("text_购field:我购买了道具和服装。");
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
tokens = converter.convert("text_field:我购xyz买了道具和服装。");
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
}
示例12: testMultipleClauses
import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
@Test
public void testMultipleClauses() {
SpellingQueryConverter converter = new SpellingQueryConverter();
converter.init(new NamedList());
converter.setAnalyzer(new WhitespaceAnalyzer());
// two field:value pairs should give two tokens
Collection<Token> tokens = converter.convert("买text_field:我购买了道具和服装。 field2:bar");
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
// a field:value pair and a search term should give two tokens
tokens = converter.convert("text_field:我购买了道具和服装。 bar");
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
}
示例13: testTermOffsetsTokenStream
import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
@Test
public void testTermOffsetsTokenStream() throws Exception {
String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" };
Analyzer a1 = new WhitespaceAnalyzer();
TokenStream tokenStream = a1.tokenStream("", "a b c d e f g h i j k l m n");
tokenStream.reset();
TermOffsetsTokenStream tots = new TermOffsetsTokenStream(
tokenStream);
for( String v : multivalued ){
TokenStream ts1 = tots.getMultiValuedTokenStream( v.length() );
Analyzer a2 = new WhitespaceAnalyzer();
TokenStream ts2 = a2.tokenStream("", v);
ts2.reset();
while (ts1.incrementToken()) {
assertTrue(ts2.incrementToken());
assertEquals(ts1, ts2);
}
assertFalse(ts2.incrementToken());
}
}
示例14: generateIndex
import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
public void generateIndex(String path, List<AnnotatedEntailmentPair> aps) throws Exception {
log.info("Rules extraction started.");
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_47, new WhitespaceAnalyzer(Version.LUCENE_47));
conf.setOpenMode(OpenMode.CREATE);
writer = new IndexWriter(FSDirectory.open(new File(path)), conf);
Document doc = new Document();
doc.add(new StringField(IndexRulesSource.TERMDOC_FIELD, "true", Store.YES));
for (String u : rulesSource.uses())
doc.add(new StringField(IndexRulesSource.USES_FIELD, u, Store.YES));
writer.addDocument(doc);
start(aps.iterator());
writer.waitForMerges();
writer.close(true);
log.info(cache.size() + " rules extracted!");
}
示例15: KeywordFinder
import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
public KeywordFinder(File inputFile) throws IOException {
RAMDirectory ramdir = new RAMDirectory();
IndexWriterConfig conf = new IndexWriterConfig(Version.LATEST, new WhitespaceAnalyzer());
IndexWriter writer = new IndexWriter(ramdir, conf);
BufferedReader reader = new BufferedReader(new FileReader(inputFile));
while (reader.ready()) {
String keyword = reader.readLine().toLowerCase().trim();
if (keyword.length() > 0) {
Document doc = new Document();
doc.add(new TextField("keyword", keyword.replace("-", " ").replace("_", " ").replace("\\", " ").replace("/", " "), Field.Store.YES));
writer.addDocument(doc);
}
}
writer.close();
searcher = new IndexSearcher(DirectoryReader.open(ramdir));
}