当前位置: 首页>>代码示例>>Java>>正文


Java WhitespaceAnalyzer类代码示例

本文整理汇总了Java中org.apache.lucene.analysis.core.WhitespaceAnalyzer的典型用法代码示例。如果您正苦于以下问题:Java WhitespaceAnalyzer类的具体用法?Java WhitespaceAnalyzer怎么用?Java WhitespaceAnalyzer使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


WhitespaceAnalyzer类属于org.apache.lucene.analysis.core包,在下文中一共展示了WhitespaceAnalyzer类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testRamDirectory

import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
public void testRamDirectory() throws IOException {
    long start = System.currentTimeMillis();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
            .OpenMode.CREATE);
    RAMDirectory ramDirectory = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(ramDirectory, indexWriterConfig);
    for (int i = 0; i < 10000000; i++) {
        indexWriter.addDocument(addDocument(i));
    }
    indexWriter.commit();
    indexWriter.close();
    long end = System.currentTimeMillis();
    log.error("RamDirectory consumes {}s!", (end - start) / 1000);
    start = System.currentTimeMillis();
    IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(ramDirectory));
    int total = 0;
    for (int i = 0; i < 10000000; i++) {
        TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
        TopDocs search = indexSearcher.search(key1, 10);
        total += search.totalHits;
    }
    System.out.println(total);
    end = System.currentTimeMillis();
    log.error("RamDirectory search consumes {}ms!", (end - start));
}
 
开发者ID:shijiebei2009,项目名称:RedisDirectory,代码行数:26,代码来源:TestLucene.java

示例2: testMMapDirectory

import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
public void testMMapDirectory() throws IOException {
    long start = System.currentTimeMillis();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
            .OpenMode.CREATE);
    FSDirectory open = FSDirectory.open(Paths.get("E:/testlucene"));
    IndexWriter indexWriter = new IndexWriter(open, indexWriterConfig);
    for (int i = 0; i < 10000000; i++) {
        indexWriter.addDocument(addDocument(i));
    }
    indexWriter.commit();
    indexWriter.close();
    long end = System.currentTimeMillis();
    log.error("MMapDirectory consumes {}s!", (end - start) / 1000);
    start = System.currentTimeMillis();
    IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(open));
    int total = 0;
    for (int i = 0; i < 10000000; i++) {
        TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
        TopDocs search = indexSearcher.search(key1, 10);
        total += search.totalHits;
    }
    System.out.println(total);
    end = System.currentTimeMillis();
    log.error("MMapDirectory search consumes {}ms!", (end - start));
}
 
开发者ID:shijiebei2009,项目名称:RedisDirectory,代码行数:26,代码来源:TestLucene.java

示例3: testCreateMultiDocumentSearcher

import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
public void testCreateMultiDocumentSearcher() throws Exception {
    int numDocs = randomIntBetween(2, 8);
    List<ParseContext.Document> docs = new ArrayList<>(numDocs);
    for (int i = 0; i < numDocs; i++) {
        docs.add(new ParseContext.Document());
    }

    Analyzer analyzer = new WhitespaceAnalyzer();
    ParsedDocument parsedDocument = new ParsedDocument(null, null, "_id", "_type", null, docs, null, null, null);
    IndexSearcher indexSearcher = PercolateQueryBuilder.createMultiDocumentSearcher(analyzer, parsedDocument);
    assertThat(indexSearcher.getIndexReader().numDocs(), equalTo(numDocs));

    // ensure that any query get modified so that the nested docs are never included as hits:
    Query query = new MatchAllDocsQuery();
    BooleanQuery result = (BooleanQuery) indexSearcher.createNormalizedWeight(query, true).getQuery();
    assertThat(result.clauses().size(), equalTo(2));
    assertThat(result.clauses().get(0).getQuery(), sameInstance(query));
    assertThat(result.clauses().get(0).getOccur(), equalTo(BooleanClause.Occur.MUST));
    assertThat(result.clauses().get(1).getOccur(), equalTo(BooleanClause.Occur.MUST_NOT));
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:21,代码来源:PercolateQueryBuilderTests.java

示例4: testBuildWordScorer

import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
/**
 * Test the WordScorer emitted by the smoothing model
 */
public void testBuildWordScorer() throws IOException {
    SmoothingModel testModel = createTestModel();
    Map<String, Analyzer> mapping = new HashMap<>();
    mapping.put("field", new WhitespaceAnalyzer());
    PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(), mapping);
    IndexWriter writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(wrapper));
    Document doc = new Document();
    doc.add(new Field("field", "someText", TextField.TYPE_NOT_STORED));
    writer.addDocument(doc);
    DirectoryReader ir = DirectoryReader.open(writer);

    WordScorer wordScorer = testModel.buildWordScorerFactory().newScorer(ir, MultiFields.getTerms(ir, "field"), "field", 0.9d,
            BytesRefs.toBytesRef(" "));
    assertWordScorer(wordScorer, testModel);
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:19,代码来源:SmoothingModelTestCase.java

示例5: loadAnalyzerFactory

import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
@Override
protected void loadAnalyzerFactory(Map<String, AnalyzerInfo> analyzerFactoryMap) {
	//extract entire word 
	registerAnalyzer(analyzerFactoryMap, "keyword", "Keyword Analyzer", new DefaultAnalyzerFactory(KeywordAnalyzer.class));
	//lucene StandardAnalyzer
	registerAnalyzer(analyzerFactoryMap, "standard", "Standard Analyzer", new DefaultAnalyzerFactory(StandardAnalyzer.class));
	
	registerAnalyzer(analyzerFactoryMap, "ngram", "NGram Analyzer", new DefaultAnalyzerFactory(NGramWordAnalyzer.class));
	
	registerAnalyzer(analyzerFactoryMap, "primary", "Primary Word Analyzer", new DefaultAnalyzerFactory(PrimaryWordAnalyzer.class));
	
	registerAnalyzer(analyzerFactoryMap, "whitespace", "Whitespace Analyzer", new DefaultAnalyzerFactory(WhitespaceAnalyzer.class));
	
	registerAnalyzer(analyzerFactoryMap, "csv", "Comma separated value Analyzer", new DefaultAnalyzerFactory(CSVAnalyzer.class));

       registerAnalyzer(analyzerFactoryMap, "autocomplete", "Autocomplete Analyzer", new DefaultAnalyzerFactory(AutocompleteAnalyzer.class));
}
 
开发者ID:gncloud,项目名称:fastcatsearch3,代码行数:18,代码来源:BasicAnalysisPlugin.java

示例6: index

import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
/**
 * Index a picture
 * @param source
 * @param picture_id
 * @param conf
 * @throws IOException
 */
public static void index(byte[] source, UUID picture_id, IndexWriterConfig conf) throws IOException
{
    ByteArrayInputStream in = new ByteArrayInputStream(source);
    BufferedImage image = ImageIO.read(in);

    // Creating an Lucene IndexWriter
    log.debug("Is Lucene configured? " + (conf == null));
    if(conf == null) {
        conf = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
        conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    }

    luceneIndexer(image, picture_id, FeatureEnumerate.AutoColorCorrelogram.getText(), DocumentBuilderFactory.getAutoColorCorrelogramDocumentBuilder(), conf);
    luceneIndexer(image, picture_id, FeatureEnumerate.CEDD.getText(), DocumentBuilderFactory.getCEDDDocumentBuilder(), conf);
    luceneIndexer(image, picture_id, FeatureEnumerate.ColorLayout.getText(), DocumentBuilderFactory.getColorLayoutBuilder(), conf);
    luceneIndexer(image, picture_id, FeatureEnumerate.EdgeHistogram.getText(), DocumentBuilderFactory.getEdgeHistogramBuilder(), conf);
    luceneIndexer(image, picture_id, FeatureEnumerate.ColorHistogram.getText(), DocumentBuilderFactory.getColorHistogramDocumentBuilder(), conf);
    luceneIndexer(image, picture_id, FeatureEnumerate.PHOG.getText(), DocumentBuilderFactory.getPHOGDocumentBuilder(), conf);

}
 
开发者ID:dalbelap,项目名称:flipper-reverse-image-search,代码行数:28,代码来源:LireBuilder.java

示例7: deleteFromFeature

import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
private static void deleteFromFeature(UUID pictureId, Term term, String prefix, IndexWriterConfig conf) throws IOException {

        File file = getPath(prefix);

        // Creating an Lucene IndexWriter
        log.debug("Is Lucene configured: " + (conf == null));
        if(conf == null) {
            conf = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
            conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        }
        IndexWriter iw = new IndexWriter(FSDirectory.open(file), conf);

        iw.deleteDocuments(term);

        iw.close();
    }
 
开发者ID:dalbelap,项目名称:flipper-reverse-image-search,代码行数:17,代码来源:LireBuilder.java

示例8: createTestNormsDocument

import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
private Document createTestNormsDocument(boolean setNormsProp,
    boolean normsPropVal, boolean setBodyNormsProp, boolean bodyNormsVal)
    throws Exception {
  Properties props = new Properties();
  
  // Indexing configuration.
  props.setProperty("analyzer", WhitespaceAnalyzer.class.getName());
  props.setProperty("directory", "RAMDirectory");
  if (setNormsProp) {
    props.setProperty("doc.tokenized.norms", Boolean.toString(normsPropVal));
  }
  if (setBodyNormsProp) {
    props.setProperty("doc.body.tokenized.norms", Boolean.toString(bodyNormsVal));
  }
  
  // Create PerfRunData
  Config config = new Config(props);
  
  DocMaker dm = new DocMaker();
  dm.setConfig(config, new OneDocSource());
  return dm.makeDocument();
}
 
开发者ID:europeana,项目名称:search,代码行数:23,代码来源:DocMakerTest.java

示例9: index

import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
/** Build the example index. */
private void index() throws IOException {
  IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, 
      new WhitespaceAnalyzer()));

  // Writes facet ords to a separate directory from the main index
  DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

  Document doc = new Document();
  doc.add(new TextField("c", "foo bar", Store.NO));
  doc.add(new NumericDocValuesField("popularity", 5L));
  doc.add(new FacetField("A", "B"));
  indexWriter.addDocument(config.build(taxoWriter, doc));

  doc = new Document();
  doc.add(new TextField("c", "foo foo bar", Store.NO));
  doc.add(new NumericDocValuesField("popularity", 3L));
  doc.add(new FacetField("A", "C"));
  indexWriter.addDocument(config.build(taxoWriter, doc));
  
  indexWriter.close();
  taxoWriter.close();
}
 
开发者ID:europeana,项目名称:search,代码行数:24,代码来源:ExpressionAggregationFacetsExample.java

示例10: index

import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
/** Build the example index. */
public void index() throws IOException {
  IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, 
      new WhitespaceAnalyzer()));

  // Add documents with a fake timestamp, 1000 sec before
  // "now", 2000 sec before "now", ...:
  for(int i=0;i<100;i++) {
    Document doc = new Document();
    long then = nowSec - i * 1000;
    // Add as doc values field, so we can compute range facets:
    doc.add(new NumericDocValuesField("timestamp", then));
    // Add as numeric field so we can drill-down:
    doc.add(new LongField("timestamp", then, Field.Store.NO));
    indexWriter.addDocument(doc);
  }

  // Open near-real-time searcher
  searcher = new IndexSearcher(DirectoryReader.open(indexWriter, true));
  indexWriter.close();
}
 
开发者ID:europeana,项目名称:search,代码行数:22,代码来源:RangeFacetsExample.java

示例11: testUnicode

import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
@Test
public void testUnicode() {
  SpellingQueryConverter converter = new SpellingQueryConverter();
  converter.init(new NamedList());
  converter.setAnalyzer(new WhitespaceAnalyzer());
  
  // chinese text value
  Collection<Token> tokens = converter.convert("text_field:我购买了道具和服装。");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());

  tokens = converter.convert("text_购field:我购买了道具和服装。");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());

  tokens = converter.convert("text_field:我购xyz买了道具和服装。");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
}
 
开发者ID:europeana,项目名称:search,代码行数:20,代码来源:SpellingQueryConverterTest.java

示例12: testMultipleClauses

import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
@Test
public void testMultipleClauses() {
  SpellingQueryConverter converter = new SpellingQueryConverter();
  converter.init(new NamedList());
  converter.setAnalyzer(new WhitespaceAnalyzer());

  // two field:value pairs should give two tokens
  Collection<Token> tokens = converter.convert("买text_field:我购买了道具和服装。 field2:bar");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());

  // a field:value pair and a search term should give two tokens
  tokens = converter.convert("text_field:我购买了道具和服装。 bar");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
}
 
开发者ID:europeana,项目名称:search,代码行数:17,代码来源:SpellingQueryConverterTest.java

示例13: testTermOffsetsTokenStream

import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
@Test
public void testTermOffsetsTokenStream() throws Exception {
  String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" };
  Analyzer a1 = new WhitespaceAnalyzer();
  TokenStream tokenStream = a1.tokenStream("", "a b c d e f g h i j k l m n");
  tokenStream.reset();

  TermOffsetsTokenStream tots = new TermOffsetsTokenStream(
      tokenStream);
  for( String v : multivalued ){
    TokenStream ts1 = tots.getMultiValuedTokenStream( v.length() );
    Analyzer a2 = new WhitespaceAnalyzer();
    TokenStream ts2 = a2.tokenStream("", v);
    ts2.reset();

    while (ts1.incrementToken()) {
      assertTrue(ts2.incrementToken());
      assertEquals(ts1, ts2);
    }
    assertFalse(ts2.incrementToken());
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:23,代码来源:HighlighterTest.java

示例14: generateIndex

import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
public void generateIndex(String path, List<AnnotatedEntailmentPair> aps) throws Exception {

		log.info("Rules extraction started.");
		IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_47, new WhitespaceAnalyzer(Version.LUCENE_47));
		conf.setOpenMode(OpenMode.CREATE);
		writer = new IndexWriter(FSDirectory.open(new File(path)), conf);
		Document doc = new Document();
		doc.add(new StringField(IndexRulesSource.TERMDOC_FIELD, "true", Store.YES));
		for (String u : rulesSource.uses())
			doc.add(new StringField(IndexRulesSource.USES_FIELD, u, Store.YES));
		writer.addDocument(doc);
		start(aps.iterator());
		writer.waitForMerges();
		writer.close(true);
		log.info(cache.size() + " rules extracted!");

	}
 
开发者ID:kouylekov,项目名称:edits,代码行数:18,代码来源:RulesIndexGenerator.java

示例15: KeywordFinder

import org.apache.lucene.analysis.core.WhitespaceAnalyzer; //导入依赖的package包/类
public KeywordFinder(File inputFile) throws IOException {
    RAMDirectory ramdir = new RAMDirectory();
    IndexWriterConfig conf = new IndexWriterConfig(Version.LATEST, new WhitespaceAnalyzer());
    IndexWriter writer = new IndexWriter(ramdir, conf);
    BufferedReader reader = new BufferedReader(new FileReader(inputFile));
    while (reader.ready()) {
        String keyword = reader.readLine().toLowerCase().trim();
        if (keyword.length() > 0) {
            Document doc = new Document();
            doc.add(new TextField("keyword", keyword.replace("-", " ").replace("_", " ").replace("\\", " ").replace("/", " "), Field.Store.YES));
            writer.addDocument(doc);
        }
    }
    writer.close();
    searcher = new IndexSearcher(DirectoryReader.open(ramdir));
}
 
开发者ID:pippokill,项目名称:tri,代码行数:17,代码来源:KeywordFinder.java


注:本文中的org.apache.lucene.analysis.core.WhitespaceAnalyzer类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。