本文整理汇总了Java中org.apache.lucene.analysis.tokenattributes.CharTermAttribute.length方法的典型用法代码示例。如果您正苦于以下问题:Java CharTermAttribute.length方法的具体用法?Java CharTermAttribute.length怎么用?Java CharTermAttribute.length使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.analysis.tokenattributes.CharTermAttribute
的用法示例。
在下文中一共展示了CharTermAttribute.length方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: splitByTokenizer
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入方法依赖的package包/类
private static List<String> splitByTokenizer(String source, TokenizerFactory tokFactory) throws IOException{
StringReader reader = new StringReader( source );
TokenStream ts = loadTokenizer(tokFactory, reader);
List<String> tokList = new ArrayList<>();
try {
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
ts.reset();
while (ts.incrementToken()){
if( termAtt.length() > 0 )
tokList.add( termAtt.toString() );
}
} finally{
reader.close();
}
return tokList;
}
示例2: analyze
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入方法依赖的package包/类
private Set<String> analyze(String text) throws IOException {
Set<String> result = new HashSet<String>();
Analyzer analyzer = configuration.getAnalyzer();
try (TokenStream ts = analyzer.tokenStream("", text)) {
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
ts.reset();
while (ts.incrementToken()) {
int length = termAtt.length();
if (length == 0) {
throw new IllegalArgumentException("term: " + text + " analyzed to a zero-length token");
}
if (posIncAtt.getPositionIncrement() != 1) {
throw new IllegalArgumentException("term: " + text + " analyzed to a token with posinc != 1");
}
result.add(new String(termAtt.buffer(), 0, termAtt.length()));
}
ts.end();
return result;
}
}
示例3: tokenize
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入方法依赖的package包/类
private ArrayList<char[]> tokenize( String input ) throws IOException {
Log.debug( "tokenize '" + input + "'" );
ArrayList<char[]> tokens = new ArrayList<char[]>( );
Tokenizer tk = getTokenizerImpl( input );
CharTermAttribute term = tk.addAttribute( CharTermAttribute.class );
tk.reset( );
while (tk.incrementToken( ) ) {
int bufLen = term.length();
char[] copy = new char[ bufLen ];
System.arraycopy(term.buffer( ), 0, copy, 0, bufLen );
tokens.add( copy );
}
return tokens;
}
示例4: tokenize
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入方法依赖的package包/类
private ArrayList<char[]> tokenize( String input ) throws IOException {
Log.debug( "tokenize '" + input + "'" );
ArrayList<char[]> tokens = new ArrayList<char[]>( );
Tokenizer tk = getTokenizerImpl( input );
CharTermAttribute term = tk.addAttribute( CharTermAttribute.class );
tk.reset( );
while (tk.incrementToken( ) ) {
int bufLen = term.length();
char[] copy = new char[ bufLen ];
System.arraycopy(term.buffer( ), 0, copy, 0, bufLen );
tokens.add( copy );
}
return tokens;
}
示例5: analyze
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入方法依赖的package包/类
private CharsRef analyze(Analyzer analyzer, String text) throws IOException {
CharsRefBuilder charsRefBuilder = new CharsRefBuilder();
try (TokenStream ts = analyzer.tokenStream("", text)) {
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
ts.reset();
while (ts.incrementToken()) {
int length = termAtt.length();
if (length == 0) {
throw new IllegalArgumentException("term: " + text + " analyzed to a zero-length token");
}
charsRefBuilder.grow(charsRefBuilder.length() + length + 1); /* current + word + separator */
if (charsRefBuilder.length() > 0) {
charsRefBuilder.append(CcWordSet.WORD_SEPARATOR);
}
charsRefBuilder.append(termAtt);
}
ts.end();
}
if (charsRefBuilder.length() == 0) {
return null;
}
charsRefBuilder.append(CcWordSet.WORD_END);
return charsRefBuilder.get();
}
示例6: handleTokenStream
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入方法依赖的package包/类
private void handleTokenStream(Map<Integer, List<Token>> tokenPosMap, TokenStream tokenStream) throws IOException {
tokenStream.reset();
int pos = 0;
CharTermAttribute charTermAttribute = getCharTermAttribute(tokenStream);
OffsetAttribute offsetAttribute = getOffsetAttribute(tokenStream);
TypeAttribute typeAttribute = getTypeAttribute(tokenStream);
PositionIncrementAttribute positionIncrementAttribute = getPositionIncrementAttribute(tokenStream);
while (tokenStream.incrementToken()) {
if (null == charTermAttribute || null == offsetAttribute) {
return;
}
Token token = new Token(charTermAttribute.buffer(), 0, charTermAttribute.length(),
offsetAttribute.startOffset(), offsetAttribute.endOffset());
if (null != typeAttribute) {
token.setType(typeAttribute.type());
}
pos += null != positionIncrementAttribute ? positionIncrementAttribute.getPositionIncrement() : 1;
if (!tokenPosMap.containsKey(pos)) {
tokenPosMap.put(pos, new LinkedList<Token>());
}
tokenPosMap.get(pos).add(token);
}
tokenStream.close();
}
示例7: analyze
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入方法依赖的package包/类
/** Sugar: analyzes the text with the analyzer and
* separates by {@link SynonymMap#WORD_SEPARATOR}.
* reuse and its chars must not be null. */
public CharsRef analyze(String text, CharsRefBuilder reuse) throws IOException {
try (TokenStream ts = analyzer.tokenStream("", text)) {
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
ts.reset();
reuse.clear();
while (ts.incrementToken()) {
int length = termAtt.length();
if (length == 0) {
throw new IllegalArgumentException("term: " + text + " analyzed to a zero-length token");
}
if (posIncAtt.getPositionIncrement() != 1) {
throw new IllegalArgumentException("term: " + text + " analyzed to a token with posinc != 1");
}
reuse.grow(reuse.length() + length + 1); /* current + word + separator */
int end = reuse.length();
if (reuse.length() > 0) {
reuse.setCharAt(end++, SynonymMap.WORD_SEPARATOR);
reuse.setLength(reuse.length() + 1);
}
System.arraycopy(termAtt.buffer(), 0, reuse.chars(), end, length);
reuse.setLength(reuse.length() + length);
}
ts.end();
}
if (reuse.length() == 0) {
throw new IllegalArgumentException("term: " + text + " was completely eliminated by analyzer");
}
return reuse.get();
}
示例8: map
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入方法依赖的package包/类
@Override
protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
TokenStream stream = analyzer.reusableTokenStream(key.toString(), new StringReader(value.toString()));
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
StringTuple document = new StringTuple();
stream.reset();
while (stream.incrementToken()) {
if (termAtt.length() > 0) {
document.add(new String(termAtt.buffer(), 0, termAtt.length()));
}
}
context.write(key, document);
}
示例9: setText
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入方法依赖的package包/类
void setText(final CharTermAttribute token) {
this.token = token;
this.buffer = token.buffer();
this.length = token.length();
}
示例10: toFormattedString
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入方法依赖的package包/类
@Override
public String toFormattedString(Field f) throws IOException {
Map<String,Object> map = new LinkedHashMap<>();
map.put(VERSION_KEY, VERSION);
if (f.fieldType().stored()) {
String stringValue = f.stringValue();
if (stringValue != null) {
map.put(STRING_KEY, stringValue);
}
BytesRef binaryValue = f.binaryValue();
if (binaryValue != null) {
map.put(BINARY_KEY, Base64.byteArrayToBase64(binaryValue.bytes, binaryValue.offset, binaryValue.length));
}
}
TokenStream ts = f.tokenStreamValue();
if (ts != null) {
List<Map<String,Object>> tokens = new LinkedList<>();
while (ts.incrementToken()) {
Iterator<Class<? extends Attribute>> it = ts.getAttributeClassesIterator();
String cTerm = null;
String tTerm = null;
Map<String,Object> tok = new TreeMap<>();
while (it.hasNext()) {
Class<? extends Attribute> cl = it.next();
Attribute att = ts.getAttribute(cl);
if (att == null) {
continue;
}
if (cl.isAssignableFrom(CharTermAttribute.class)) {
CharTermAttribute catt = (CharTermAttribute)att;
cTerm = new String(catt.buffer(), 0, catt.length());
} else if (cl.isAssignableFrom(TermToBytesRefAttribute.class)) {
TermToBytesRefAttribute tatt = (TermToBytesRefAttribute)att;
tTerm = tatt.getBytesRef().utf8ToString();
} else {
if (cl.isAssignableFrom(FlagsAttribute.class)) {
tok.put(FLAGS_KEY, Integer.toHexString(((FlagsAttribute)att).getFlags()));
} else if (cl.isAssignableFrom(OffsetAttribute.class)) {
tok.put(OFFSET_START_KEY, ((OffsetAttribute)att).startOffset());
tok.put(OFFSET_END_KEY, ((OffsetAttribute)att).endOffset());
} else if (cl.isAssignableFrom(PayloadAttribute.class)) {
BytesRef p = ((PayloadAttribute)att).getPayload();
if (p != null && p.length > 0) {
tok.put(PAYLOAD_KEY, Base64.byteArrayToBase64(p.bytes, p.offset, p.length));
}
} else if (cl.isAssignableFrom(PositionIncrementAttribute.class)) {
tok.put(POSINCR_KEY, ((PositionIncrementAttribute)att).getPositionIncrement());
} else if (cl.isAssignableFrom(TypeAttribute.class)) {
tok.put(TYPE_KEY, ((TypeAttribute)att).type());
} else {
tok.put(cl.getName(), att.toString());
}
}
}
String term = null;
if (cTerm != null) {
term = cTerm;
} else {
term = tTerm;
}
if (term != null && term.length() > 0) {
tok.put(TOKEN_KEY, term);
}
tokens.add(tok);
}
map.put(TOKENS_KEY, tokens);
}
return JSONUtil.toJSON(map, -1);
}