本文整理汇总了Java中org.apache.lucene.analysis.core.SimpleAnalyzer类的典型用法代码示例。如果您正苦于以下问题:Java SimpleAnalyzer类的具体用法?Java SimpleAnalyzer怎么用?Java SimpleAnalyzer使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
SimpleAnalyzer类属于org.apache.lucene.analysis.core包,在下文中一共展示了SimpleAnalyzer类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: IndexManager
import org.apache.lucene.analysis.core.SimpleAnalyzer; //导入依赖的package包/类
/**
* Construct an empty IndexManager
*/
public IndexManager(){
SimpleAnalyzer analyzer = new SimpleAnalyzer(Version.LUCENE_47);
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_47, analyzer);
try {
System.out.println("Building the Index...");
this.indexWriter = new IndexWriter(FSDirectory.open(new File(PATH)), indexWriterConfig);
//first ask the database to give me all of the tweets.
OracleDAL db = new OracleDAL();
ArrayList<Tweet> list = (ArrayList<Tweet>) db.getAllTweets();
//now build the index
int indexedDocumentCount = this.indexDocsFromList(indexWriter, list);
indexWriter.close();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
示例2: createIndexSpellchecker
import org.apache.lucene.analysis.core.SimpleAnalyzer; //导入依赖的package包/类
@NotNull
private static SpellChecker createIndexSpellchecker(@NotNull final Directory index) throws IOException {
final Directory spellCheckerDirectory = new RAMDirectory();
final IndexReader indexReader = DirectoryReader.open(index);
final Analyzer analyzer = new SimpleAnalyzer();
final IndexWriterConfig config = new IndexWriterConfig(analyzer);
final Dictionary dictionary = new HighFrequencyDictionary(indexReader, DRUG_TERMS_FIELD, 0.0f);
final SpellChecker spellChecker = new SpellChecker(spellCheckerDirectory);
spellChecker.indexDictionary(dictionary, config, false);
spellChecker.setAccuracy(SPELLCHECK_ACCURACY);
return spellChecker;
}
示例3: main
import org.apache.lucene.analysis.core.SimpleAnalyzer; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
Analyzer analyzer = new ShingleAnalyzerWrapper(new SimpleAnalyzer(), 9);
String content = KeepEverythingExtractor.INSTANCE.getText(new InputStreamReader(System.in));
TokenStream ts = analyzer.tokenStream("extracted_text", content);
CharTermAttribute cattr = ts.addAttribute(CharTermAttribute.class);
ts.reset();
while (ts.incrementToken()) {
System.out.println(cattr.toString());
}
ts.close();
}
示例4: createRegistry
import org.apache.lucene.analysis.core.SimpleAnalyzer; //导入依赖的package包/类
@Override
protected JndiRegistry createRegistry() throws Exception {
JndiRegistry registry = new JndiRegistry(createJndiContext());
registry.bind("std", new File("target/stdindexDir"));
registry.bind("load_dir", new File("src/test/resources/sources"));
registry.bind("stdAnalyzer", new StandardAnalyzer());
registry.bind("simple", new File("target/simpleindexDir"));
registry.bind("simpleAnalyzer", new SimpleAnalyzer());
registry.bind("whitespace", new File("target/whitespaceindexDir"));
registry.bind("whitespaceAnalyzer", new WhitespaceAnalyzer());
return registry;
}
示例5: LuceneService
import org.apache.lucene.analysis.core.SimpleAnalyzer; //导入依赖的package包/类
public LuceneService(String directoryPath) {
try {
File indexFiles = new File(directoryPath);
// 索引文件的保存位置
dir = FSDirectory.open(indexFiles);
// 分析器
analyzer = new SimpleAnalyzer(DEFAULT_VERSION);
// 配置类
} catch (IOException e) {
e.printStackTrace();
}
}
示例6: testIndexing
import org.apache.lucene.analysis.core.SimpleAnalyzer; //导入依赖的package包/类
public void testIndexing() throws IOException, ParserConfigurationException, SAXException {
IndexWriterConfig iwConf = new IndexWriterConfig(Version.LUCENE_42, new SimpleAnalyzer(Version.LUCENE_42));
IndexWriter iw = new IndexWriter(FSDirectory.open(testIndex), iwConf);
// if you want to append the index to a pre-existing one use the next line.
// iwConf.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
// create a LIRE DocumentBuilder for extracting FCTH (just an example, every other feature will do).
DocumentBuilder builder = DocumentBuilderFactory.getFCTHDocumentBuilder();
ArrayList<File> files = FileUtils.getAllImageFiles(new File("testdata/ferrari"), true);
// for handling the XML of the test data set
SAXParserFactory spf = SAXParserFactory.newInstance();
spf.setNamespaceAware(true);
SAXParser saxParser = spf.newSAXParser();
XMLReader xmlReader = saxParser.getXMLReader();
for (Iterator<File> iterator = files.iterator(); iterator.hasNext(); ) {
File img = iterator.next();
String path = img.getCanonicalPath();
// create the document with the LIRE DocumentBuilder, this adds the image features to the document.
Document d = builder.createDocument(new FileInputStream(img), path);
// handling the XML of the test data set
path = path.substring(0,path.lastIndexOf('.')) + ".xml";
TagHandler handler = new TagHandler();
xmlReader.setContentHandler(handler);
xmlReader.parse(new InputSource(new File(path).toURI().toString()));
// add the text to the document ...
d.add(new TextField("tags", handler.getTags(), Field.Store.YES));
// don't forget to add the document to the index.
iw.addDocument(d);
}
iw.close();
}
示例7: ContextSuggestDemo
import org.apache.lucene.analysis.core.SimpleAnalyzer; //导入依赖的package包/类
public ContextSuggestDemo() throws IOException {
indexDir = new RAMDirectory();
suggestDir = new RAMDirectory();
analyzer = new SimpleAnalyzer();
suggester = new AnalyzingInfixSuggester(suggestDir, analyzer, analyzer, 1, true);
buildSearchIndex();
buildSuggesterIndex();
}
示例8: getQuery
import org.apache.lucene.analysis.core.SimpleAnalyzer; //导入依赖的package包/类
/**
* Creates a query based on the given term field and type
* @param term Search Term for the query
* @param field Document Field for the Query which the term is matched against
* @param type The type of query to be created, either QUERY_BOOLEAN, or QUERY_STANDARD,
* @return a query for the given field and term using either a BooleanQuery with a
* WildcardQuery for the term or a Query built from a QueryParser and SimpleAnalyzer
*/
private Query getQuery(String term, String field, int type) {
Query qry = null;
if(type == FileSearcher.QUERY_BOOLEAN) {
qry = new BooleanQuery();
String[] words = term.split(" ");
((BooleanQuery) qry).add(new WildcardQuery(new Term(field, "*" + words[0])),
BooleanClause.Occur.MUST);
if(words.length > 1) {
for(int i = 1; i < words.length - 1; i++) {
((BooleanQuery) qry).add(new WildcardQuery(new Term(field, words[i])),
BooleanClause.Occur.MUST);
}
((BooleanQuery) qry).add(new WildcardQuery(new Term(field,
words[words.length - 1] + "*")),
BooleanClause.Occur.MUST);
}
} else if(type == FileSearcher.QUERY_STANDARD) {
try {
qry = new QueryParser(Version.LUCENE_47, field,
new SimpleAnalyzer(Version.LUCENE_47)).parse(term);
} catch(ParseException e) {
e.printStackTrace();
}
}
return qry;
}
示例9: testPerField
import org.apache.lucene.analysis.core.SimpleAnalyzer; //导入依赖的package包/类
public void testPerField() throws Exception {
String text = "Qwerty";
Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>();
analyzerPerField.put("special", new SimpleAnalyzer(TEST_VERSION_CURRENT));
PerFieldAnalyzerWrapper analyzer =
new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), analyzerPerField);
TokenStream tokenStream = analyzer.tokenStream("field",
new StringReader(text));
CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class);
tokenStream.reset();
assertTrue(tokenStream.incrementToken());
assertEquals("WhitespaceAnalyzer does not lowercase",
"Qwerty",
termAtt.toString());
tokenStream = analyzer.tokenStream("special",
new StringReader(text));
termAtt = tokenStream.getAttribute(CharTermAttribute.class);
tokenStream.reset();
assertTrue(tokenStream.incrementToken());
assertEquals("SimpleAnalyzer lowercases",
"qwerty",
termAtt.toString());
}
示例10: createIndex
import org.apache.lucene.analysis.core.SimpleAnalyzer; //导入依赖的package包/类
public void createIndex(List<File> files, String idxDirectory, String baseURI) {
try {
urlAnalyzer = new SimpleAnalyzer(LUCENE_VERSION);
literalAnalyzer = new LiteralAnalyzer(LUCENE_VERSION);
Map<String, Analyzer> mapping = new HashMap<String, Analyzer>();
mapping.put(TripleIndex.FIELD_NAME_SUBJECT, urlAnalyzer);
mapping.put(TripleIndex.FIELD_NAME_PREDICATE, urlAnalyzer);
mapping.put(TripleIndex.FIELD_NAME_OBJECT_URI, urlAnalyzer);
mapping.put(TripleIndex.FIELD_NAME_OBJECT_LITERAL, literalAnalyzer);
PerFieldAnalyzerWrapper perFieldAnalyzer = new PerFieldAnalyzerWrapper(urlAnalyzer, mapping);
File indexDirectory = new File(idxDirectory);
indexDirectory.mkdir();
directory = new MMapDirectory(indexDirectory);
IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, perFieldAnalyzer);
iwriter = new IndexWriter(directory, config);
iwriter.commit();
for (File file : files) {
String type = FileUtil.getFileExtension(file.getName());
if (type.equals(TTL))
indexTTLFile(file, baseURI);
if (type.equals(TSV))
indexTSVFile(file);
iwriter.commit();
}
iwriter.close();
ireader = DirectoryReader.open(directory);
} catch (Exception e) {
log.error("Error while creating TripleIndex.", e);
}
}
示例11: createIndex
import org.apache.lucene.analysis.core.SimpleAnalyzer; //导入依赖的package包/类
public void createIndex(List<File> files, String idxDirectory, String baseURI) {
try {
urlAnalyzer = new SimpleAnalyzer(LUCENE_VERSION);
literalAnalyzer = new LiteralAnalyzer(LUCENE_VERSION);
Map<String, Analyzer> mapping = new HashMap<String, Analyzer>();
mapping.put(FIELD_NAME_URI, urlAnalyzer);
mapping.put(FIELD_NAME_SURFACE_FORM, literalAnalyzer);
mapping.put(FIELD_NAME_URI_COUNT, literalAnalyzer);
mapping.put(FIELD_NAME_CONTEXT, literalAnalyzer);
PerFieldAnalyzerWrapper perFieldAnalyzer = new PerFieldAnalyzerWrapper(urlAnalyzer, mapping);
File indexDirectory = new File(idxDirectory);
indexDirectory.mkdir();
directory = new MMapDirectory(indexDirectory);
IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, perFieldAnalyzer);
iwriter = new IndexWriter(directory, config);
iwriter.commit();
for (File file : files) {
String type = FileUtil.getFileExtension(file.getName());
if (type.equals(TTL))
indexTTLFile(file, baseURI);
iwriter.commit();
}
} catch (Exception e) {
log.error("Error while creating TripleIndex.", e);
}
}
示例12: addIndexes
import org.apache.lucene.analysis.core.SimpleAnalyzer; //导入依赖的package包/类
public void addIndexes() throws Exception {
Directory otherDir = null;
Directory ramDir = null;
IndexWriter writer = new IndexWriter(otherDir, new IndexWriterConfig(Version.LUCENE_41, new SimpleAnalyzer(
Version.LUCENE_41)));
writer.addIndexes(new Directory[] { ramDir });
}
示例13: SimpleAnalyzerProvider
import org.apache.lucene.analysis.core.SimpleAnalyzer; //导入依赖的package包/类
public SimpleAnalyzerProvider(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
this.simpleAnalyzer = new SimpleAnalyzer();
this.simpleAnalyzer.setVersion(version);
}
示例14: get
import org.apache.lucene.analysis.core.SimpleAnalyzer; //导入依赖的package包/类
@Override
public SimpleAnalyzer get() {
return this.simpleAnalyzer;
}
示例15: SimpleAnalyzerProvider
import org.apache.lucene.analysis.core.SimpleAnalyzer; //导入依赖的package包/类
@Inject
public SimpleAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettingsService.getSettings(), name, settings);
this.simpleAnalyzer = new SimpleAnalyzer();
this.simpleAnalyzer.setVersion(version);
}