本文整理汇总了Java中org.apache.lucene.analysis.SimpleAnalyzer类的典型用法代码示例。如果您正苦于以下问题:Java SimpleAnalyzer类的具体用法?Java SimpleAnalyzer怎么用?Java SimpleAnalyzer使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
SimpleAnalyzer类属于org.apache.lucene.analysis包,在下文中一共展示了SimpleAnalyzer类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: buildIndex
import org.apache.lucene.analysis.SimpleAnalyzer; //导入依赖的package包/类
/**
* Adds the a corpus to the index
*
* @param source The source {source,target} that should be added.
*/
public void buildIndex(String source, String filePath) {
Date start = new Date();
try {
IndexWriter writer = new IndexWriter(source + "Index", new SimpleAnalyzer(), true);
indexDocs(writer, new File(filePath));
writer.optimize();
writer.close();
Date end = new Date();
System.out.print(end.getTime() - start.getTime());
System.out.println(" total milliseconds");
} catch (IOException e) {
System.out.println(" caught a " + e.getClass() +
"\n with message: " + e.getMessage());
}
}
示例2: getSentencesContaining
import org.apache.lucene.analysis.SimpleAnalyzer; //导入依赖的package包/类
/**
* getSentencesContaining returns a Vector of Integers containing the
* numbers of the sentences that contain the given words in the source
* language corpus.
*
* @param words_ The words to be found
* @return A Vector of the sentence numbers
*/
public Vector<String> getSentencesContaining(String words_) {
Vector<String> sentenceNums = new Vector<>();
words_ = requireAll(words_);
try {
Analyzer analyzer = new SimpleAnalyzer();
Query query = QueryParser.parse(words_, "contents", analyzer);
Hits hits = sourceSearcher.search(query);
// Add the numbers of all the hits to the Vector
for (int i = 0; i < hits.length(); i++) {
Document sentence = hits.doc(i);
sentenceNums.add(sentence.get("snum"));
//DEBUG System.out.println(sentence.get("snum") + ": " + sentence.get("contents"));
}
} catch (Exception e) {
System.out.println(" caught a " + e.getClass()
+ "\n with message: " + e.getMessage());
}
return sentenceNums;
}
示例3: numSentencesContaining
import org.apache.lucene.analysis.SimpleAnalyzer; //导入依赖的package包/类
/**
* numSentencesContaining returns the number of sentences containing the
* given words.
*
* @param words_ The words to be found
* @param searcher The searcher to be searched.
* @return The number of sentences containing the words
*/
public int numSentencesContaining(String words_, Searcher searcher) {
int num = 0;
words_ = requireAll(words_);
//DEBUG System.out.println("Finding hits for " + words_);
try {
Analyzer analyzer = new SimpleAnalyzer();
Query query = QueryParser.parse(words_, "contents", analyzer);
Hits hits = searcher.search(query);
num = hits.length();
} catch (Exception e) {
System.out.println(" caught a " + e.getClass()
+ "\n with message: " + e.getMessage());
}
return num;
}
示例4: testDefaultOperator
import org.apache.lucene.analysis.SimpleAnalyzer; //导入依赖的package包/类
public void testDefaultOperator() throws Exception {
Query query = new MultiFieldQueryParser(Version.LUCENE_41,
new String[]{"title", "subject"},
new SimpleAnalyzer()).parse("development");
Directory dir = TestUtil.getBookIndexDirectory();
IndexSearcher searcher = new IndexSearcher(
dir,
true);
TopDocs hits = searcher.search(query, 10);
assertTrue(TestUtil.hitsIncludeTitle(
searcher,
hits,
"Ant in Action"));
assertTrue(TestUtil.hitsIncludeTitle( //A
searcher, //A
hits, //A
"Extreme Programming Explained")); //A
searcher.close();
dir.close();
}
示例5: testSpecifiedOperator
import org.apache.lucene.analysis.SimpleAnalyzer; //导入依赖的package包/类
public void testSpecifiedOperator() throws Exception {
Query query = MultiFieldQueryParser.parse(Version.LUCENE_41,
"lucene",
new String[]{"title", "subject"},
new BooleanClause.Occur[]{BooleanClause.Occur.MUST,
BooleanClause.Occur.MUST},
new SimpleAnalyzer());
Directory dir = TestUtil.getBookIndexDirectory();
IndexSearcher searcher = new IndexSearcher(
dir,
true);
TopDocs hits = searcher.search(query, 10);
assertTrue(TestUtil.hitsIncludeTitle(
searcher,
hits,
"Lucene in Action, Second Edition"));
assertEquals("one and only one", 1, hits.scoreDocs.length);
searcher.close();
dir.close();
}
示例6: setUp
import org.apache.lucene.analysis.SimpleAnalyzer; //导入依赖的package包/类
public void setUp() throws Exception {
Directory directory = new RAMDirectory();
IndexWriter writer = new IndexWriter(directory,
new SimpleAnalyzer(),
IndexWriter.MaxFieldLength.UNLIMITED);
Document doc = new Document();
doc.add(new Field("partnum",
"Q36",
Field.Store.NO,
Field.Index.NOT_ANALYZED_NO_NORMS)); //A
doc.add(new Field("description",
"Illidium Space Modulator",
Field.Store.YES,
Field.Index.ANALYZED));
writer.addDocument(doc);
writer.close();
searcher = new IndexSearcher(directory);
}
示例7: getQuery
import org.apache.lucene.analysis.SimpleAnalyzer; //导入依赖的package包/类
public QueryParser getQuery() throws ParseException {
Analyzer analyzer = new SimpleAnalyzer();
QueryParser parser = new QueryParser(org.apache.lucene.util.Version.LUCENE_4_0, "title", analyzer);
String querystr = "test*";
Query query = parser.parse(querystr);
}
示例8: testBasicQueryParser
import org.apache.lucene.analysis.SimpleAnalyzer; //导入依赖的package包/类
public void testBasicQueryParser() throws Exception {
Query query = new QueryParser(Version.LUCENE_41, //1
"description", //1
new SimpleAnalyzer()) //1
.parse("partnum:Q36 AND SPACE"); //1
assertEquals("note Q36 -> q",
"+partnum:q +space", query.toString("description")); //2
assertEquals("doc not found :(", 0, TestUtil.hitCount(searcher, query));
}
示例9: testPerFieldAnalyzer
import org.apache.lucene.analysis.SimpleAnalyzer; //导入依赖的package包/类
public void testPerFieldAnalyzer() throws Exception {
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(
new SimpleAnalyzer());
analyzer.addAnalyzer("partnum", new KeywordAnalyzer());
Query query = new QueryParser(Version.LUCENE_41,
"description", analyzer).parse(
"partnum:Q36 AND SPACE");
assertEquals("Q36 kept as-is",
"+partnum:Q36 +space", query.toString("description"));
assertEquals("doc found!", 1, TestUtil.hitCount(searcher, query));
}
示例10: getAnalyzer
import org.apache.lucene.analysis.SimpleAnalyzer; //导入依赖的package包/类
public static Analyzer getAnalyzer(Configuration conf) throws IOException {
Class<? extends Analyzer> analyzerClass = conf.getClass(ANALYZER, SimpleAnalyzer.class, Analyzer.class);
return ReflectionUtils.newInstance(analyzerClass, conf);
}
示例11: countIntersections
import org.apache.lucene.analysis.SimpleAnalyzer; //导入依赖的package包/类
/**
* Counts the intersection between the sentences containing S in the source
* corpus and the sentences containing T in the target corpus.
*
* @param S
* The words in the source corpus, separated by spaces.
* @param T
* The words in the target corpus, separated by spaces.
* @return The number of sentences containing both all of the words in S and
* all of the words in T.
*/
public int countIntersections(String S, String T) {
int retNum = 0;
// Require all terms
S = requireAll(S);
T = requireAll(T);
try {
// Get all sentences for the source terms
Analyzer sanalyzer = new SimpleAnalyzer();
Query squery = QueryParser.parse(S, "contents", sanalyzer);
Hits sHits = sourceSearcher.search(squery, new Sort("snum"));
// Get all sentences for the target terms
Analyzer tanalyzer = new SimpleAnalyzer();
Query tquery = QueryParser.parse(T, "contents", tanalyzer);
Hits tHits = targetSearcher.search(tquery, new Sort("snum"));
int sCount = 0;
int tCount = 0;
// Compare the sentences, and count how many match
while (sCount < sHits.length() && tCount < tHits.length()) {
Document sSentence = sHits.doc(sCount);
int sSentNum = Integer.valueOf(sSentence.get("snum"));
Document tSentence = tHits.doc(tCount);
int tSentNum = Integer.valueOf(tSentence.get("snum"));
//DEBUG System.out.println("s " + sSentNum + "\tt " + tSentNum);
if (sSentNum == tSentNum) {
retNum++;
sCount++;
tCount++;
} else if (sSentNum > tSentNum) {
tCount++;
} else if (sSentNum < tSentNum) {
sCount++;
}
}
} catch (Exception e) {
System.out.println(" caught a " + e.getClass()
+ "\n with message: " + e.getMessage());
}
return retNum;
}