本文整理汇总了Java中org.apache.lucene.analysis.tokenattributes.CharTermAttribute.buffer方法的典型用法代码示例。如果您正苦于以下问题:Java CharTermAttribute.buffer方法的具体用法?Java CharTermAttribute.buffer怎么用?Java CharTermAttribute.buffer使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.analysis.tokenattributes.CharTermAttribute
的用法示例。
在下文中一共展示了CharTermAttribute.buffer方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: stemHinglish
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入方法依赖的package包/类
public static void stemHinglish(CharTermAttribute termAtt)
{
char [] buffer = termAtt.buffer();
String strInput = new String(termAtt.toString());
//System.out.println("Before " + strInput + " " + termAtt.toString());
Iterator itr = lsRegexs.iterator();
while (itr.hasNext())
{
List<Object> lsInputs = (List<Object>)itr.next();
Matcher matcher = ((Pattern)lsInputs.get(0)).matcher(strInput);
if (matcher.matches())
{
Matcher replMatcher = ((Pattern)lsInputs.get(1)).matcher(strInput);
strInput = replMatcher.replaceAll((String)lsInputs.get(2));
}
}
//strInput = strInput.trim();
for (int iCounter = 0; iCounter < strInput.length(); iCounter++)
{
buffer[iCounter] = strInput.charAt(iCounter);
}
termAtt.setLength(strInput.length());
//System.out.println("After " + strInput + " " + termAtt.toString());
}
示例2: walkTokens
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入方法依赖的package包/类
private String[] walkTokens() throws IOException {
List<String> wordList = new ArrayList<>();
while (input.incrementToken()) {
CharTermAttribute textAtt = input.getAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = input.getAttribute(OffsetAttribute.class);
char[] buffer = textAtt.buffer();
String word = new String(buffer, 0, offsetAtt.endOffset() - offsetAtt.startOffset());
wordList.add(word);
AttributeSource attrs = input.cloneAttributes();
tokenAttrs.add(attrs);
}
String[] words = new String[wordList.size()];
for (int i = 0; i < words.length; i++) {
words[i] = wordList.get(i);
}
return words;
}
示例3: handleTokenStream
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入方法依赖的package包/类
private void handleTokenStream(Map<Integer, List<Token>> tokenPosMap, TokenStream tokenStream) throws IOException {
tokenStream.reset();
int pos = 0;
CharTermAttribute charTermAttribute = getCharTermAttribute(tokenStream);
OffsetAttribute offsetAttribute = getOffsetAttribute(tokenStream);
TypeAttribute typeAttribute = getTypeAttribute(tokenStream);
PositionIncrementAttribute positionIncrementAttribute = getPositionIncrementAttribute(tokenStream);
while (tokenStream.incrementToken()) {
if (null == charTermAttribute || null == offsetAttribute) {
return;
}
Token token = new Token(charTermAttribute.buffer(), 0, charTermAttribute.length(),
offsetAttribute.startOffset(), offsetAttribute.endOffset());
if (null != typeAttribute) {
token.setType(typeAttribute.type());
}
pos += null != positionIncrementAttribute ? positionIncrementAttribute.getPositionIncrement() : 1;
if (!tokenPosMap.containsKey(pos)) {
tokenPosMap.put(pos, new LinkedList<Token>());
}
tokenPosMap.get(pos).add(token);
}
tokenStream.close();
}
示例4: setText
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入方法依赖的package包/类
void setText(final CharTermAttribute token) {
this.token = token;
this.buffer = token.buffer();
this.length = token.length();
}
示例5: toFormattedString
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入方法依赖的package包/类
@Override
public String toFormattedString(Field f) throws IOException {
Map<String,Object> map = new LinkedHashMap<>();
map.put(VERSION_KEY, VERSION);
if (f.fieldType().stored()) {
String stringValue = f.stringValue();
if (stringValue != null) {
map.put(STRING_KEY, stringValue);
}
BytesRef binaryValue = f.binaryValue();
if (binaryValue != null) {
map.put(BINARY_KEY, Base64.byteArrayToBase64(binaryValue.bytes, binaryValue.offset, binaryValue.length));
}
}
TokenStream ts = f.tokenStreamValue();
if (ts != null) {
List<Map<String,Object>> tokens = new LinkedList<>();
while (ts.incrementToken()) {
Iterator<Class<? extends Attribute>> it = ts.getAttributeClassesIterator();
String cTerm = null;
String tTerm = null;
Map<String,Object> tok = new TreeMap<>();
while (it.hasNext()) {
Class<? extends Attribute> cl = it.next();
Attribute att = ts.getAttribute(cl);
if (att == null) {
continue;
}
if (cl.isAssignableFrom(CharTermAttribute.class)) {
CharTermAttribute catt = (CharTermAttribute)att;
cTerm = new String(catt.buffer(), 0, catt.length());
} else if (cl.isAssignableFrom(TermToBytesRefAttribute.class)) {
TermToBytesRefAttribute tatt = (TermToBytesRefAttribute)att;
tTerm = tatt.getBytesRef().utf8ToString();
} else {
if (cl.isAssignableFrom(FlagsAttribute.class)) {
tok.put(FLAGS_KEY, Integer.toHexString(((FlagsAttribute)att).getFlags()));
} else if (cl.isAssignableFrom(OffsetAttribute.class)) {
tok.put(OFFSET_START_KEY, ((OffsetAttribute)att).startOffset());
tok.put(OFFSET_END_KEY, ((OffsetAttribute)att).endOffset());
} else if (cl.isAssignableFrom(PayloadAttribute.class)) {
BytesRef p = ((PayloadAttribute)att).getPayload();
if (p != null && p.length > 0) {
tok.put(PAYLOAD_KEY, Base64.byteArrayToBase64(p.bytes, p.offset, p.length));
}
} else if (cl.isAssignableFrom(PositionIncrementAttribute.class)) {
tok.put(POSINCR_KEY, ((PositionIncrementAttribute)att).getPositionIncrement());
} else if (cl.isAssignableFrom(TypeAttribute.class)) {
tok.put(TYPE_KEY, ((TypeAttribute)att).type());
} else {
tok.put(cl.getName(), att.toString());
}
}
}
String term = null;
if (cTerm != null) {
term = cTerm;
} else {
term = tTerm;
}
if (term != null && term.length() > 0) {
tok.put(TOKEN_KEY, term);
}
tokens.add(tok);
}
map.put(TOKENS_KEY, tokens);
}
return JSONUtil.toJSON(map, -1);
}