本文整理汇总了Java中org.apache.lucene.analysis.Token类的典型用法代码示例。如果您正苦于以下问题:Java Token类的具体用法?Java Token怎么用?Java Token使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Token类属于org.apache.lucene.analysis包,在下文中一共展示了Token类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testCountPositions
import org.apache.lucene.analysis.Token; //导入依赖的package包/类
public void testCountPositions() throws IOException {
// We're looking to make sure that we:
Token t1 = new Token(); // Don't count tokens without an increment
t1.setPositionIncrement(0);
Token t2 = new Token();
t2.setPositionIncrement(1); // Count normal tokens with one increment
Token t3 = new Token();
t2.setPositionIncrement(2); // Count funny tokens with more than one increment
int finalTokenIncrement = 4; // Count the final token increment on the rare token streams that have them
Token[] tokens = new Token[] {t1, t2, t3};
Collections.shuffle(Arrays.asList(tokens), random());
final TokenStream tokenStream = new CannedTokenStream(finalTokenIncrement, 0, tokens);
// TODO: we have no CannedAnalyzer?
Analyzer analyzer = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(new MockTokenizer(), tokenStream);
}
};
assertThat(TokenCountFieldMapper.countPositions(analyzer, "", ""), equalTo(7));
}
示例2: next
import org.apache.lucene.analysis.Token; //导入依赖的package包/类
@Override
public Token next(Token reusableToken) throws IOException {
Token token = reusableToken;
if (tokenizer() != null) {
Token t = tokenizer().next(token);
if (t != null) {
return t;
}
}
char[] sent = new char[] {};
do {
read = input.read(ioBuffer);
if (read > 0)
sent = add(sent, ioBuffer, read);
} while (read != -1);
if (sent.length == 0) {
return null;
}
if (tokenizer() == null) {
tokenizer = new Tknzr(sent);
} else {
tokenizer().reset(sent);
}
return tokenizer().next(token);
}
示例3: next
import org.apache.lucene.analysis.Token; //导入依赖的package包/类
@Override
public Token next(Token reusableToken) throws IOException {
Token token = reusableToken;
if (elementTokenizer() != null) {
Token t = elementTokenizer().next(token);
if (t != null) {
return t;
}
}
char[] sent = new char[] {};
do {
read = input.read(ioBuffer);
if (read > 0) sent = add(sent, ioBuffer, read);
} while (read != -1);
if (sent.length == 0) {
return null;
}
if (elementTokenizer() == null) {
elementTokenizer = new JsonSentenceParser(compressPayload);
}
elementTokenizer().parse(String.valueOf(sent));
return elementTokenizer().next(token);
}
示例4: next
import org.apache.lucene.analysis.Token; //导入依赖的package包/类
public Token next(Token token) {
if (currentPos == 0) return null;
if (tokenPos <= currentPos) {
token.setTermBuffer(sentence, textPositions[2 * tokenPos],
textPositions[2 * tokenPos + 1]
- textPositions[2 * tokenPos]);
Payload p = new Payload();
byte[] b = new byte[4];
b[0] = (byte) ((payloads[tokenPos] >>> 16) & 255);
b[1] = (byte) ((payloads[tokenPos] >>> 24) & 255);
b[2] = (byte) ((payloads[tokenPos] >>> 8) & 255);
b[3] = (byte) (payloads[tokenPos] & 255);
p.setData(b);
token.setPayload(p);
tokenPos++;
return token;
}
return null;
}
示例5: printlnToken
import org.apache.lucene.analysis.Token; //导入依赖的package包/类
private void printlnToken(String txt, Analyzer analyzer) throws IOException {
System.out.println("---------"+txt.length()+"\n"+txt);
TokenStream ts = analyzer.tokenStream("text", new StringReader(txt));
/*//lucene 2.9 以下
for(Token t= new Token(); (t=ts.next(t)) !=null;) {
System.out.println(t);
}*/
/*while(ts.incrementToken()) {
TermAttribute termAtt = (TermAttribute)ts.getAttribute(TermAttribute.class);
OffsetAttribute offsetAtt = (OffsetAttribute)ts.getAttribute(OffsetAttribute.class);
TypeAttribute typeAtt = (TypeAttribute)ts.getAttribute(TypeAttribute.class);
System.out.println("("+termAtt.term()+","+offsetAtt.startOffset()+","+offsetAtt.endOffset()+",type="+typeAtt.type()+")");
}*/
for(Token t= new Token(); (t=TokenUtils.nextToken(ts, t)) !=null;) {
System.out.println(t);
}
}
示例6: SetDictionary
import org.apache.lucene.analysis.Token; //导入依赖的package包/类
@SuppressWarnings("unused")
SetDictionary(String words, Analyzer analyzer) throws IOException {
wordSet = new HashSet<String>();
if (words != null) {
TokenStream tokenStream = analyzer.tokenStream(NodeDocument.TEXT_FIELD, new StringReader(words));
Token reusableToken = new Token();
Token nextToken = null;
//while ((nextToken = tokenStream.next(reusableToken)) != null) {
//String term = nextToken.term();
//if (term != null) {
//wordSet.add(term);
//}
//}
}
}
示例7: next
import org.apache.lucene.analysis.Token; //导入依赖的package包/类
@Override
public Token next() throws IOException {
if (segbuf == null) {
while (segbuf == null || segbuf.length == 0) {
String line = bufreader.readLine();
if (line == null) {
return null;
}
segbuf = segmentor.segment(line);
}
currentSeg = 0;
}
Token t = new Token(segbuf[currentSeg], currentPos, currentPos + segbuf[currentSeg].length());
currentPos += segbuf[currentSeg].length();
currentSeg++;
if (currentSeg >= segbuf.length)
segbuf = null;
return t;
}
示例8: testGetToken
import org.apache.lucene.analysis.Token; //导入依赖的package包/类
public void testGetToken() throws IOException {
String content = "我们的生活\n很美好";
String[] str = { "我们", "们的", "的生", "生活", "很美", "美好" };
StringReader reader = new StringReader(content);
WordSegment ws = new BigramWordSegment();
WordChineseTokenizer tokenizer = new WordChineseTokenizer(ws, reader);
LinkedList<Token> results = new LinkedList<Token>();
Token t;
while ((t = tokenizer.next()) != null) {
results.add(t);
}
Assert.assertEquals(str.length, results.size());
for (int i = 0; i < results.size(); i++) {
Assert.assertEquals(str[i], results.get(i).termText());
}
}
示例9: getLookupResults
import org.apache.lucene.analysis.Token; //导入依赖的package包/类
private List<LookupResult> getLookupResults(SpellingOptions options, Token currentToken) throws IOException {
CharsRef scratch = new CharsRef();
scratch.chars = currentToken.buffer();
scratch.offset = 0;
scratch.length = currentToken.length();
boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) &&
!(lookup instanceof WFSTCompletionLookup) &&
!(lookup instanceof AnalyzingSuggester);
List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count);
if (suggestions == null || suggestions.size() == 0) {
return null;
}
return suggestions;
}
示例10: incrementToken
import org.apache.lucene.analysis.Token; //导入依赖的package包/类
@Override
public boolean incrementToken() throws IOException {
if (index >= tokens.length)
return false;
else {
clearAttributes();
Token token = tokens[index++];
termAtt.setEmpty().append(token);
offsetAtt.setOffset(token.startOffset(), token.endOffset());
posIncAtt.setPositionIncrement(token.getPositionIncrement());
flagsAtt.setFlags(token.getFlags());
typeAtt.setType(token.type());
payloadAtt.setPayload(token.getPayload());
return true;
}
}
示例11: getSuggestions
import org.apache.lucene.analysis.Token; //导入依赖的package包/类
@Override
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
SpellingResult result = new SpellingResult();
//just spit back out the results
// sort the keys to make ordering predictable
Iterator<String> iterator = options.customParams.getParameterNamesIterator();
List<String> lst = new ArrayList<>();
while (iterator.hasNext()) {
lst.add(iterator.next());
}
Collections.sort(lst);
int i = 0;
for (String name : lst) {
String value = options.customParams.get(name);
result.add(new Token(name, i, i+1), Collections.singletonList(value));
i += 2;
}
return result;
}
示例12: shingleFilterTestCommon
import org.apache.lucene.analysis.Token; //导入依赖的package包/类
protected void shingleFilterTestCommon(ShingleFilter filter,
Token[] tokensToCompare,
int[] positionIncrements,
String[] types)
throws IOException {
String text[] = new String[tokensToCompare.length];
int startOffsets[] = new int[tokensToCompare.length];
int endOffsets[] = new int[tokensToCompare.length];
for (int i = 0; i < tokensToCompare.length; i++) {
text[i] = new String(tokensToCompare[i].buffer(),0, tokensToCompare[i].length());
startOffsets[i] = tokensToCompare[i].startOffset();
endOffsets[i] = tokensToCompare[i].endOffset();
}
assertTokenStreamContents(filter, text, startOffsets, endOffsets, types, positionIncrements);
}
示例13: testUnicode
import org.apache.lucene.analysis.Token; //导入依赖的package包/类
@Test
public void testUnicode() {
SpellingQueryConverter converter = new SpellingQueryConverter();
converter.init(new NamedList());
converter.setAnalyzer(new WhitespaceAnalyzer());
// chinese text value
Collection<Token> tokens = converter.convert("text_field:我购买了道具和服装。");
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
tokens = converter.convert("text_购field:我购买了道具和服装。");
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
tokens = converter.convert("text_field:我购xyz买了道具和服装。");
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
}
示例14: testLegalbutVeryLargePositions
import org.apache.lucene.analysis.Token; //导入依赖的package包/类
public void testLegalbutVeryLargePositions() throws Exception {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
Token t1 = new Token("foo", 0, 3);
t1.setPositionIncrement(Integer.MAX_VALUE-500);
if (random().nextBoolean()) {
t1.setPayload(new BytesRef(new byte[] { 0x1 } ));
}
TokenStream overflowingTokenStream = new CannedTokenStream(
new Token[] { t1 }
);
Field field = new TextField("foo", overflowingTokenStream);
doc.add(field);
iw.addDocument(doc);
iw.close();
dir.close();
}
示例15: testLegalbutVeryLargeOffsets
import org.apache.lucene.analysis.Token; //导入依赖的package包/类
public void testLegalbutVeryLargeOffsets() throws Exception {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
Token t1 = new Token("foo", 0, Integer.MAX_VALUE-500);
if (random().nextBoolean()) {
t1.setPayload(new BytesRef("test"));
}
Token t2 = new Token("foo", Integer.MAX_VALUE-500, Integer.MAX_VALUE);
TokenStream tokenStream = new CannedTokenStream(
new Token[] { t1, t2 }
);
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
// store some term vectors for the checkindex cross-check
ft.setStoreTermVectors(true);
ft.setStoreTermVectorPositions(true);
ft.setStoreTermVectorOffsets(true);
Field field = new Field("foo", tokenStream, ft);
doc.add(field);
iw.addDocument(doc);
iw.close();
dir.close();
}