本文整理汇总了Java中org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer类的典型用法代码示例。如果您正苦于以下问题:Java SmartChineseAnalyzer类的具体用法?Java SmartChineseAnalyzer怎么用?Java SmartChineseAnalyzer使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
SmartChineseAnalyzer类属于org.apache.lucene.analysis.cn.smart包,在下文中一共展示了SmartChineseAnalyzer类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: ajaxbuild
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; //导入依赖的package包/类
/**
* ajax简历索引
*/
@Override
public void ajaxbuild() {
try {
FileUtils.deleteDirectory(new File(AUTOCOMPLETEPATH));
logger.info("delete autocomplete file success");
Directory dir = FSDirectory.open(Paths.get(AUTOCOMPLETEPATH));
SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(dir, analyzer);
//创建Blog测试数据
List<Blog> blogs = blogMapper.getAllBlog();
suggester.build(new BlogIterator(blogs.iterator()));
} catch (IOException e) {
System.err.println("Error!");
}
}
示例2: ajaxsearch
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; //导入依赖的package包/类
/**
* 根据关键词查找
*
* @param keyword
* @return
*/
@Override
public Set<String> ajaxsearch(String keyword) {
try {
Directory dir = FSDirectory.open(Paths.get(AUTOCOMPLETEPATH));
SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(dir, analyzer);
List<String> list = lookup(suggester, keyword);
list.sort((o1, o2) -> {
if (o1.length() > o2.length()) {
return 1;
} else {
return -1;
}
});
Set<String> set = new LinkedHashSet<>(list);
ssubSet(set, 7);
return set;
} catch (IOException e) {
System.err.println("Error!");
return null;
}
}
示例3: init
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; //导入依赖的package包/类
private static void init() {
if (indexWriter==null || searcherManager==null) {
try {
// load directory path
Properties properties = PropertiesUtil.loadProperties(PropertiesUtil.DEFAULT_CONFIG);
String luceneDirectory = PropertiesUtil.getString(properties, "lucene.directory");
// directory
directory = new SimpleFSDirectory(Paths.get(luceneDirectory));
// IndexWriter
Analyzer analyzer = new SmartChineseAnalyzer();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
indexWriter = new IndexWriter(directory, indexWriterConfig);
// SearcherManager
searcherManager = new SearcherManager(indexWriter, false, new SearcherFactory());
TrackingIndexWriter trackingIndexWriter = new TrackingIndexWriter(indexWriter);
ControlledRealTimeReopenThread controlledRealTimeReopenThread = new ControlledRealTimeReopenThread<IndexSearcher>(trackingIndexWriter, searcherManager, 5.0, 0.025);
controlledRealTimeReopenThread.setDaemon(true);//设为后台进程
controlledRealTimeReopenThread.start();
} catch (IOException e) {
logger.error("", e);
}
}
}
示例4: index
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; //导入依赖的package包/类
private void index() throws Exception {
IndexWriter writer = null;
directory = new RAMDirectory(); // 索引文件在内存
Analyzer analyzer = new SmartChineseAnalyzer(Version.LUCENE_46);
IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_46,
analyzer);
writer = new IndexWriter(directory, iwConfig);
List<Product> products = htmlFetcher.fetch();
for (Product product : products) {
Document doc = new Document();
doc.add(new TextField("description", product.getDescription(),
Store.YES));
doc.add(new StringField("username", product.getUserName(),
Store.YES));
doc.add(new StringField("url", product.getUrl(), Store.YES));
writer.addDocument(doc);
}
writer.commit();
writer.close();
}
示例5: main
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; //导入依赖的package包/类
public static void main(String[] args) {
try {
Directory dir = FSDirectory.open(Paths.get(AUTOCOMPLETEPATH));
RAMDirectory indexDir = new RAMDirectory();
SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(dir, analyzer);
IBlogService blogService = new BlogServiceImpl();
lookup(suggester, "jav");
// new BlogServiceImpl().ajaxsearch("北京");
} catch (Exception e) {
e.printStackTrace();
}
}
示例6: getWriter
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; //导入依赖的package包/类
private IndexWriter getWriter() throws Exception {
dir = FSDirectory.open(Paths.get(BASE_PATH));
SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
IndexWriterConfig config = new IndexWriterConfig(analyzer);
IndexWriter writer = new IndexWriter(dir, config);
return writer;
}
示例7: getIndexWriter
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; //导入依赖的package包/类
public static IndexWriter getIndexWriter(String indexPath, boolean create) throws IOException {
Directory dir = FSDirectory.open(Paths.get(indexPath));
Analyzer analyzer = new SmartChineseAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
LogMergePolicy mergePolicy = new LogByteSizeMergePolicy();
mergePolicy.setMergeFactor(50);
mergePolicy.setMaxMergeDocs(5000);
if (create){
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
} else {
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
}
return new IndexWriter(dir, iwc);
}
示例8: getWriter
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; //导入依赖的package包/类
/**
* 获取IndexWriter实例
*
* @return
* @throws Exception
*/
private IndexWriter getWriter() throws Exception {
//Analyzer analyzer=new StandardAnalyzer(); // 标准分词器
SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
IndexWriter writer = new IndexWriter(dir, iwc);
return writer;
}
示例9: search
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; //导入依赖的package包/类
public static void search(String indexDir, String q) throws Exception {
Directory dir = FSDirectory.open(Paths.get(indexDir));
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher is = new IndexSearcher(reader);
// Analyzer analyzer=new StandardAnalyzer(); // 标准分词器
SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
QueryParser parser = new QueryParser("desc", analyzer);
Query query = parser.parse(q);
long start = System.currentTimeMillis();
TopDocs hits = is.search(query, 10);
long end = System.currentTimeMillis();
System.out.println("匹配 " + q + " ,总共花费" + (end - start) + "毫秒" + "查询到" + hits.totalHits + "个记录");
QueryScorer scorer = new QueryScorer(query);
Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<b><font color='red'>", "</font></b>");
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, scorer);
highlighter.setTextFragmenter(fragmenter);
for (ScoreDoc scoreDoc : hits.scoreDocs) {
Document doc = is.doc(scoreDoc.doc);
System.out.println(doc.get("city"));
System.out.println(doc.get("desc"));
String desc = doc.get("desc");
if (desc != null) {
TokenStream tokenStream = analyzer.tokenStream("desc", new StringReader(desc));
System.out.println(highlighter.getBestFragment(tokenStream, desc));
}
}
reader.close();
}
示例10: getWriter
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; //导入依赖的package包/类
/**
* 获取IndexWriter实例
*
* @return
* @throws Exception
*/
private IndexWriter getWriter() throws Exception {
/*
* 生成的索引位置在env-config.properties里配置
*/
dir = FSDirectory.open(Paths.get(PropertiesUtil.getValue(EnvEnum.LUCENE_INDEX_PATH.val())));
SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
IndexWriter writer = new IndexWriter(dir, iwc);
return writer;
}
示例11: getWriter
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; //导入依赖的package包/类
/**
* 获取IndexWriter实例
* @return
* @throws Exception
*/
private IndexWriter getWriter()throws Exception{
/**
* 生成的索引我放在了C盘,可以根据自己的需要放在具体位置
*/
dir= FSDirectory.open(Paths.get("C://lucene"));
SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer();
IndexWriterConfig iwc=new IndexWriterConfig(analyzer);
IndexWriter writer=new IndexWriter(dir, iwc);
return writer;
}
示例12: stopWords
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; //导入依赖的package包/类
@Nonnull
private static CharArraySet stopWords(@Nonnull final String[] array)
throws UDFArgumentException {
if (array == null) {
return SmartChineseAnalyzer.getDefaultStopSet();
}
if (array.length == 0) {
return CharArraySet.EMPTY_SET;
}
CharArraySet results = new CharArraySet(Arrays.asList(array), true /* ignoreCase */);
return results;
}
示例13: getWriter
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; //导入依赖的package包/类
/**
* 获取IndexWriter实例
* @return
* @throws Exception
*/
private IndexWriter getWriter()throws Exception{
//Analyzer analyzer=new StandardAnalyzer(); // 标准分词器
SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer();
IndexWriterConfig iwc=new IndexWriterConfig(analyzer);
IndexWriter writer=new IndexWriter(dir, iwc);
return writer;
}
示例14: search
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; //导入依赖的package包/类
public static void search(String indexDir,String q)throws Exception{
Directory dir=FSDirectory.open(Paths.get(indexDir));
IndexReader reader=DirectoryReader.open(dir);
IndexSearcher is=new IndexSearcher(reader);
// Analyzer analyzer=new StandardAnalyzer(); // 标准分词器
SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer();
QueryParser parser=new QueryParser("desc", analyzer);
Query query=parser.parse(q);
long start=System.currentTimeMillis();
TopDocs hits=is.search(query, 10);
long end=System.currentTimeMillis();
System.out.println("匹配 "+q+" ,总共花费"+(end-start)+"毫秒"+"查询到"+hits.totalHits+"个记录");
QueryScorer scorer=new QueryScorer(query);
Fragmenter fragmenter=new SimpleSpanFragmenter(scorer);
SimpleHTMLFormatter simpleHTMLFormatter=new SimpleHTMLFormatter("<b><font color='red'>","</font></b>");
Highlighter highlighter=new Highlighter(simpleHTMLFormatter, scorer);
highlighter.setTextFragmenter(fragmenter);
for(ScoreDoc scoreDoc:hits.scoreDocs){
Document doc=is.doc(scoreDoc.doc);
System.out.println(doc.get("city"));
System.out.println(doc.get("desc"));
String desc=doc.get("desc");
if(desc!=null){
TokenStream tokenStream=analyzer.tokenStream("desc", new StringReader(desc));
System.out.println(highlighter.getBestFragment(tokenStream, desc));
}
}
reader.close();
}
示例15: main
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; //导入依赖的package包/类
public static void main(final String[] args) {
try {
final String text = "lucene分析器使用分词器和过滤器构成一个“管道”,文本在流经这个管道后成为可以进入索引的最小单位,因此,一个标准的分析器有两个部分组成,一个是分词器tokenizer,它用于将文本按照规则切分为一个个可以进入索引的最小单位。另外一个是TokenFilter,它主要作用是对切出来的词进行进一步的处理(如去掉敏感词、英文大小写转换、单复数处理)等。lucene中的Tokenstram方法首先创建一个tokenizer对象处理Reader对象中的流式文本,然后利用TokenFilter对输出流进行过滤处理";
final ArrayList<String> myStopWords = CollectionLiterals.<String>newArrayList("的", "在", "了", "呢", ",", "0", ":", ",", "是", "流");
final CharArraySet stopWords = new CharArraySet(0, true);
for (final String word : myStopWords) {
stopWords.add(word);
}
CharArraySet _defaultStopSet = SmartChineseAnalyzer.getDefaultStopSet();
final Iterator<Object> itor = _defaultStopSet.iterator();
while (itor.hasNext()) {
Object _next = itor.next();
stopWords.add(_next);
}
final SmartChineseAnalyzer sca = new SmartChineseAnalyzer(stopWords);
final TokenStream ts = sca.tokenStream("field", text);
CharTermAttribute ch = ts.<CharTermAttribute>addAttribute(CharTermAttribute.class);
TypeAttribute type = ts.<TypeAttribute>addAttribute(TypeAttribute.class);
ts.reset();
while (ts.incrementToken()) {
String _string = ch.toString();
String _plus = (_string + " | ");
String _type = type.type();
String _plus_1 = (_plus + _type);
InputOutput.<String>println(_plus_1);
}
ts.end();
ts.close();
} catch (Throwable _e) {
throw Exceptions.sneakyThrow(_e);
}
}