当前位置: 首页>>代码示例>>Java>>正文


Java Token类代码示例

本文整理汇总了Java中org.apache.lucene.analysis.Token的典型用法代码示例。如果您正苦于以下问题:Java Token类的具体用法?Java Token怎么用?Java Token使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


Token类属于org.apache.lucene.analysis包,在下文中一共展示了Token类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testCountPositions

import org.apache.lucene.analysis.Token; //导入依赖的package包/类
public void testCountPositions() throws IOException {
    // We're looking to make sure that we:
    Token t1 = new Token();      // Don't count tokens without an increment
    t1.setPositionIncrement(0);
    Token t2 = new Token();
    t2.setPositionIncrement(1);  // Count normal tokens with one increment
    Token t3 = new Token();
    t2.setPositionIncrement(2);  // Count funny tokens with more than one increment
    int finalTokenIncrement = 4; // Count the final token increment on the rare token streams that have them
    Token[] tokens = new Token[] {t1, t2, t3};
    Collections.shuffle(Arrays.asList(tokens), random());
    final TokenStream tokenStream = new CannedTokenStream(finalTokenIncrement, 0, tokens);
    // TODO: we have no CannedAnalyzer?
    Analyzer analyzer = new Analyzer() {
            @Override
            public TokenStreamComponents createComponents(String fieldName) {
                return new TokenStreamComponents(new MockTokenizer(), tokenStream);
            }
        };
    assertThat(TokenCountFieldMapper.countPositions(analyzer, "", ""), equalTo(7));
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:22,代码来源:TokenCountFieldMapperTests.java

示例2: next

import org.apache.lucene.analysis.Token; //导入依赖的package包/类
@Override
public Token next(Token reusableToken) throws IOException {
	Token token = reusableToken;
	if (tokenizer() != null) {
		Token t = tokenizer().next(token);
		if (t != null) {
			return t;
		}
	}
	char[] sent = new char[] {};
	do {
		read = input.read(ioBuffer);
		if (read > 0)
			sent = add(sent, ioBuffer, read);
	} while (read != -1);
	if (sent.length == 0) {
		return null;
	}
	if (tokenizer() == null) {
		tokenizer = new Tknzr(sent);
	} else {
		tokenizer().reset(sent);
	}
	return tokenizer().next(token);

}
 
开发者ID:arne-cl,项目名称:fangorn,代码行数:27,代码来源:TreebankSentenceTokenizer.java

示例3: next

import org.apache.lucene.analysis.Token; //导入依赖的package包/类
@Override
public Token next(Token reusableToken) throws IOException {
	Token token = reusableToken;
	if (elementTokenizer() != null) {
		Token t = elementTokenizer().next(token);
		if (t != null) {
			return t;
		}
	}
	char[] sent = new char[] {};
	do {
		read = input.read(ioBuffer);
		if (read > 0) sent = add(sent, ioBuffer, read);
	} while (read != -1);
	if (sent.length == 0) {
		return null;
	}
	if (elementTokenizer() == null) {
		elementTokenizer = new JsonSentenceParser(compressPayload);
	} 
	elementTokenizer().parse(String.valueOf(sent));
	return elementTokenizer().next(token);

}
 
开发者ID:arne-cl,项目名称:fangorn,代码行数:25,代码来源:NodeTreebankSentenceTokenizer.java

示例4: next

import org.apache.lucene.analysis.Token; //导入依赖的package包/类
public Token next(Token token) {
	if (currentPos == 0) return null;
	if (tokenPos <= currentPos) {
		token.setTermBuffer(sentence, textPositions[2 * tokenPos],
				textPositions[2 * tokenPos + 1]
						- textPositions[2 * tokenPos]);
		Payload p = new Payload();
		byte[] b = new byte[4];
		b[0] = (byte) ((payloads[tokenPos] >>> 16) & 255);
		b[1] = (byte) ((payloads[tokenPos] >>> 24) & 255);
		b[2] = (byte) ((payloads[tokenPos] >>> 8) & 255);
		b[3] = (byte) (payloads[tokenPos] & 255);
		p.setData(b);
		token.setPayload(p);
		tokenPos++;
		return token;
	}
	return null;
}
 
开发者ID:arne-cl,项目名称:fangorn,代码行数:20,代码来源:FastStringParser.java

示例5: printlnToken

import org.apache.lucene.analysis.Token; //导入依赖的package包/类
private void printlnToken(String txt, Analyzer analyzer) throws IOException {
	System.out.println("---------"+txt.length()+"\n"+txt);
	TokenStream ts = analyzer.tokenStream("text", new StringReader(txt));
	/*//lucene 2.9 以下
	for(Token t= new Token(); (t=ts.next(t)) !=null;) {
		System.out.println(t);
	}*/
	/*while(ts.incrementToken()) {
		TermAttribute termAtt = (TermAttribute)ts.getAttribute(TermAttribute.class);
		OffsetAttribute offsetAtt = (OffsetAttribute)ts.getAttribute(OffsetAttribute.class);
		TypeAttribute typeAtt = (TypeAttribute)ts.getAttribute(TypeAttribute.class);

		System.out.println("("+termAtt.term()+","+offsetAtt.startOffset()+","+offsetAtt.endOffset()+",type="+typeAtt.type()+")");
	}*/
	for(Token t= new Token(); (t=TokenUtils.nextToken(ts, t)) !=null;) {
		System.out.println(t);
	}
}
 
开发者ID:wanghaile,项目名称:mmseg4j,代码行数:19,代码来源:AnalyzerTest.java

示例6: SetDictionary

import org.apache.lucene.analysis.Token; //导入依赖的package包/类
@SuppressWarnings("unused")
SetDictionary(String words, Analyzer analyzer) throws IOException {
	wordSet = new HashSet<String>();
	if (words != null) {
		TokenStream tokenStream = analyzer.tokenStream(NodeDocument.TEXT_FIELD, new StringReader(words));
		Token reusableToken = new Token();
		Token nextToken = null;

		//while ((nextToken = tokenStream.next(reusableToken)) != null) {
		//String term = nextToken.term();
		//if (term != null) {
		//wordSet.add(term);
		//}
		//}
	}
}
 
开发者ID:openkm,项目名称:document-management-system,代码行数:17,代码来源:IndexHelper.java

示例7: next

import org.apache.lucene.analysis.Token; //导入依赖的package包/类
@Override
public Token next() throws IOException {
	if (segbuf == null) {
		while (segbuf == null || segbuf.length == 0) {
			String line = bufreader.readLine();
			if (line == null) {
				return null;
			}
			segbuf = segmentor.segment(line);
		}
		currentSeg = 0;
	}

	Token t = new Token(segbuf[currentSeg], currentPos, currentPos + segbuf[currentSeg].length());
	currentPos += segbuf[currentSeg].length();
	currentSeg++;
	if (currentSeg >= segbuf.length)
		segbuf = null;

	return t;
}
 
开发者ID:thunlp,项目名称:THUTag,代码行数:22,代码来源:WordChineseTokenizer.java

示例8: testGetToken

import org.apache.lucene.analysis.Token; //导入依赖的package包/类
public void testGetToken() throws IOException {
	String content = "我们的生活\n很美好";
	String[] str = { "我们", "们的", "的生", "生活", "很美", "美好" };
	StringReader reader = new StringReader(content);
	WordSegment ws = new BigramWordSegment();
	WordChineseTokenizer tokenizer = new WordChineseTokenizer(ws, reader);
	LinkedList<Token> results = new LinkedList<Token>();
	Token t;
	while ((t = tokenizer.next()) != null) {
		results.add(t);
	}
	Assert.assertEquals(str.length, results.size());
	for (int i = 0; i < results.size(); i++) {
		Assert.assertEquals(str[i], results.get(i).termText());
	}
}
 
开发者ID:thunlp,项目名称:THUTag,代码行数:17,代码来源:WordChineseTokenizerTest.java

示例9: getLookupResults

import org.apache.lucene.analysis.Token; //导入依赖的package包/类
private List<LookupResult> getLookupResults(SpellingOptions options, Token currentToken) throws IOException {
    CharsRef scratch = new CharsRef();
    scratch.chars = currentToken.buffer();
    scratch.offset = 0;
    scratch.length = currentToken.length();
    boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) &&
            !(lookup instanceof WFSTCompletionLookup) &&
            !(lookup instanceof AnalyzingSuggester);

    List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count);
    if (suggestions == null || suggestions.size() == 0) {
        return null;
    }

    return suggestions;
}
 
开发者ID:DiceTechJobs,项目名称:SolrPlugins,代码行数:17,代码来源:DiceMultipleCaseSuggester.java

示例10: incrementToken

import org.apache.lucene.analysis.Token; //导入依赖的package包/类
@Override
public boolean incrementToken() throws IOException {
  if (index >= tokens.length)
    return false;
  else {
    clearAttributes();
    Token token = tokens[index++];
    termAtt.setEmpty().append(token);
    offsetAtt.setOffset(token.startOffset(), token.endOffset());
    posIncAtt.setPositionIncrement(token.getPositionIncrement());
    flagsAtt.setFlags(token.getFlags());
    typeAtt.setType(token.type());
    payloadAtt.setPayload(token.getPayload());
    return true;
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:17,代码来源:TestSlowSynonymFilter.java

示例11: getSuggestions

import org.apache.lucene.analysis.Token; //导入依赖的package包/类
@Override
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {

  SpellingResult result = new SpellingResult();
  //just spit back out the results

  // sort the keys to make ordering predictable
  Iterator<String> iterator = options.customParams.getParameterNamesIterator();
  List<String> lst = new ArrayList<>();
  while (iterator.hasNext()) {
    lst.add(iterator.next());
  }
  Collections.sort(lst);

  int i = 0;
  for (String name : lst) {
    String value = options.customParams.get(name);
    result.add(new Token(name, i, i+1),  Collections.singletonList(value));
    i += 2;
  }    
  return result;
}
 
开发者ID:europeana,项目名称:search,代码行数:23,代码来源:DummyCustomParamSpellChecker.java

示例12: shingleFilterTestCommon

import org.apache.lucene.analysis.Token; //导入依赖的package包/类
protected void shingleFilterTestCommon(ShingleFilter filter,
                                       Token[] tokensToCompare,
                                       int[] positionIncrements,
                                       String[] types)
  throws IOException {
  String text[] = new String[tokensToCompare.length];
  int startOffsets[] = new int[tokensToCompare.length];
  int endOffsets[] = new int[tokensToCompare.length];
  
  for (int i = 0; i < tokensToCompare.length; i++) {
    text[i] = new String(tokensToCompare[i].buffer(),0, tokensToCompare[i].length());
    startOffsets[i] = tokensToCompare[i].startOffset();
    endOffsets[i] = tokensToCompare[i].endOffset();
  }
  
  assertTokenStreamContents(filter, text, startOffsets, endOffsets, types, positionIncrements);
}
 
开发者ID:europeana,项目名称:search,代码行数:18,代码来源:ShingleFilterTest.java

示例13: testUnicode

import org.apache.lucene.analysis.Token; //导入依赖的package包/类
@Test
public void testUnicode() {
  SpellingQueryConverter converter = new SpellingQueryConverter();
  converter.init(new NamedList());
  converter.setAnalyzer(new WhitespaceAnalyzer());
  
  // chinese text value
  Collection<Token> tokens = converter.convert("text_field:我购买了道具和服装。");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());

  tokens = converter.convert("text_购field:我购买了道具和服装。");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());

  tokens = converter.convert("text_field:我购xyz买了道具和服装。");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
}
 
开发者ID:europeana,项目名称:search,代码行数:20,代码来源:SpellingQueryConverterTest.java

示例14: testLegalbutVeryLargePositions

import org.apache.lucene.analysis.Token; //导入依赖的package包/类
public void testLegalbutVeryLargePositions() throws Exception {
  Directory dir = newDirectory();
  IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
  Document doc = new Document();
  Token t1 = new Token("foo", 0, 3);
  t1.setPositionIncrement(Integer.MAX_VALUE-500);
  if (random().nextBoolean()) {
    t1.setPayload(new BytesRef(new byte[] { 0x1 } ));
  }
  TokenStream overflowingTokenStream = new CannedTokenStream(
      new Token[] { t1 }
  );
  Field field = new TextField("foo", overflowingTokenStream);
  doc.add(field);
  iw.addDocument(doc);
  iw.close();
  dir.close();
}
 
开发者ID:europeana,项目名称:search,代码行数:19,代码来源:TestIndexWriterExceptions.java

示例15: testLegalbutVeryLargeOffsets

import org.apache.lucene.analysis.Token; //导入依赖的package包/类
public void testLegalbutVeryLargeOffsets() throws Exception {
  Directory dir = newDirectory();
  IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
  Document doc = new Document();
  Token t1 = new Token("foo", 0, Integer.MAX_VALUE-500);
  if (random().nextBoolean()) {
    t1.setPayload(new BytesRef("test"));
  }
  Token t2 = new Token("foo", Integer.MAX_VALUE-500, Integer.MAX_VALUE);
  TokenStream tokenStream = new CannedTokenStream(
      new Token[] { t1, t2 }
  );
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
  // store some term vectors for the checkindex cross-check
  ft.setStoreTermVectors(true);
  ft.setStoreTermVectorPositions(true);
  ft.setStoreTermVectorOffsets(true);
  Field field = new Field("foo", tokenStream, ft);
  doc.add(field);
  iw.addDocument(doc);
  iw.close();
  dir.close();
}
 
开发者ID:europeana,项目名称:search,代码行数:25,代码来源:TestPostingsOffsets.java


注:本文中的org.apache.lucene.analysis.Token类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。