本文整理汇总了Java中org.apache.lucene.util.Attribute类的典型用法代码示例。如果您正苦于以下问题:Java Attribute类的具体用法?Java Attribute怎么用?Java Attribute使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Attribute类属于org.apache.lucene.util包,在下文中一共展示了Attribute类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: ComboTokenStream
import org.apache.lucene.util.Attribute; //导入依赖的package包/类
public ComboTokenStream(TokenStream... tokenStreams) {
// Load the TokenStreams, track their position, and register their attributes
this.positionedTokenStreams = new PositionedTokenStream[tokenStreams.length];
for (int i = tokenStreams.length - 1; i >= 0; --i) {
if (tokenStreams[i] == null) {
continue;
}
this.positionedTokenStreams[i] = new PositionedTokenStream(tokenStreams[i]);
// Add each and every token seen in the current sub AttributeSource
Iterator<Class<? extends Attribute>> iterator = this.positionedTokenStreams[i].getAttributeClassesIterator();
while (iterator.hasNext()) {
addAttribute(iterator.next());
}
}
this.lastPosition = 0;
// Create an initially empty queue.
// It will be filled at first incrementToken() call, because
// it needs to call the same function on each sub-TokenStreams.
this.readQueue = new PriorityQueue<PositionedTokenStream>(tokenStreams.length);
readQueueResetted = false;
}
示例2: delegatingAttributeFactory
import org.apache.lucene.util.Attribute; //导入依赖的package包/类
/** Make this tokenizer get attributes from the delegate token stream. */
private static final AttributeFactory delegatingAttributeFactory(final AttributeSource source) {
return new AttributeFactory() {
@Override
public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
return (AttributeImpl) source.addAttribute(attClass);
}
};
}
示例3: NumericTokenizer
import org.apache.lucene.util.Attribute; //导入依赖的package包/类
protected NumericTokenizer(NumericTokenStream numericTokenStream, char[] buffer, Object extra) throws IOException {
super(delegatingAttributeFactory(numericTokenStream));
this.numericTokenStream = numericTokenStream;
// Add attributes from the numeric token stream, this works fine because the attribute factory delegates to numericTokenStream
for (Iterator<Class<? extends Attribute>> it = numericTokenStream.getAttributeClassesIterator(); it.hasNext();) {
addAttribute(it.next());
}
this.extra = extra;
this.buffer = buffer;
started = true;
}
示例4: extractExtendedAttributes
import org.apache.lucene.util.Attribute; //导入依赖的package包/类
/**
* other attribute extract object.
* Extracted object group by AttributeClassName
*
* @param stream current TokenStream
* @param includeAttributes filtering attributes
* @return Map<key value>
*/
private static Map<String, Object> extractExtendedAttributes(TokenStream stream, final Set<String> includeAttributes) {
final Map<String, Object> extendedAttributes = new TreeMap<>();
stream.reflectWith(new AttributeReflector() {
@Override
public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
if (CharTermAttribute.class.isAssignableFrom(attClass))
return;
if (PositionIncrementAttribute.class.isAssignableFrom(attClass))
return;
if (OffsetAttribute.class.isAssignableFrom(attClass))
return;
if (TypeAttribute.class.isAssignableFrom(attClass))
return;
if (includeAttributes == null || includeAttributes.isEmpty() || includeAttributes.contains(key.toLowerCase(Locale.ROOT))) {
if (value instanceof BytesRef) {
final BytesRef p = (BytesRef) value;
value = p.toString();
}
extendedAttributes.put(key, value);
}
}
});
return extendedAttributes;
}
示例5: incrementToken
import org.apache.lucene.util.Attribute; //导入依赖的package包/类
@Override
public final boolean incrementToken() throws IOException {
clearAttributes();
if (first) {
String[] words = walkTokens();
if (words.length == 0) {
return false;
}
createTags(words);
first = false;
indexToken = 0;
}
if (indexToken == tokenAttrs.size()) {
return false;
}
AttributeSource as = tokenAttrs.get(indexToken);
Iterator<? extends Class<? extends Attribute>> it = as.getAttributeClassesIterator();
while (it.hasNext()) {
Class<? extends Attribute> attrClass = it.next();
if (!hasAttribute(attrClass)) {
addAttribute(attrClass);
}
}
as.copyTo(this);
indexToken++;
return true;
}
示例6: transform
import org.apache.lucene.util.Attribute; //导入依赖的package包/类
public Tuple2<Double, Multiset<String>> transform(Row row) throws IOException {
Double label = row.getDouble(1);
StringReader document = new StringReader(row.getString(0).replaceAll("br2n", ""));
List<String> wordsList = new ArrayList<>();
try (BulgarianAnalyzer analyzer = new BulgarianAnalyzer(BULGARIAN_STOP_WORDS_SET)) {
TokenStream stream = analyzer.tokenStream("words", document);
TokenFilter lowerFilter = new LowerCaseFilter(stream);
TokenFilter numbers = new NumberFilter(lowerFilter);
TokenFilter length = new LengthFilter(numbers, 3, 1000);
TokenFilter stemmer = new BulgarianStemFilter(length);
TokenFilter ngrams = new ShingleFilter(stemmer, 2, 3);
try (TokenFilter filter = ngrams) {
Attribute termAtt = filter.addAttribute(CharTermAttribute.class);
filter.reset();
while (filter.incrementToken()) {
String word = termAtt.toString().replace(",", "(comma)").replaceAll("\n|\r", "");
if (word.contains("_")) {
continue;
}
wordsList.add(word);
}
}
}
Multiset<String> words = ConcurrentHashMultiset.create(wordsList);
return new Tuple2<>(label, words);
}
示例7: main
import org.apache.lucene.util.Attribute; //导入依赖的package包/类
public static void main(String[] args) throws IOException {
System.out.println(NumberUtils.isDigits("12345"));
System.out.println(NumberUtils.isDigits("12345.1"));
System.out.println(NumberUtils.isDigits("12345,2"));
System.out.println(NumberUtils.isNumber("12345"));
System.out.println(NumberUtils.isNumber("12345.1"));
System.out.println(NumberUtils.isNumber("12345,2".replace(",", ".")));
System.out.println(NumberUtils.isNumber("12345,2"));
StringReader input = new StringReader(
"Правя тест на класификатор и после др.Дулитъл, пада.br2n ще се оправя с данните! които,са много зле. Но това е по-добре. Но24"
.replaceAll("br2n", ""));
LetterTokenizer tokenizer = new LetterTokenizer();
tokenizer.setReader(input);
TokenFilter stopFilter = new StopFilter(tokenizer, BULGARIAN_STOP_WORDS_SET);
TokenFilter length = new LengthFilter(stopFilter, 3, 1000);
TokenFilter stemmer = new BulgarianStemFilter(length);
TokenFilter ngrams = new ShingleFilter(stemmer, 2, 2);
try (TokenFilter filter = ngrams) {
Attribute termAtt = filter.addAttribute(CharTermAttribute.class);
filter.reset();
while (filter.incrementToken()) {
String word = termAtt.toString().replaceAll(",", "\\.").replaceAll("\n|\r", "");
System.out.println(word);
}
}
}
示例8: createAttributeInstance
import org.apache.lucene.util.Attribute; //导入依赖的package包/类
@Override
public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
if (attClass == TermToBytesRefAttribute.class)
return new MyTermAttributeImpl();
if (CharTermAttribute.class.isAssignableFrom(attClass))
throw new IllegalArgumentException("no");
return delegate.createAttributeInstance(attClass);
}
示例9: incrementToken
import org.apache.lucene.util.Attribute; //导入依赖的package包/类
@Override
public boolean incrementToken() {
if (tokenIterator.hasNext()) {
clearAttributes();
AttributeSource next = tokenIterator.next();
Iterator<Class<? extends Attribute>> atts = next.getAttributeClassesIterator();
while (atts.hasNext()) // make sure all att impls in the token exist here
addAttribute(atts.next());
next.copyTo(this);
return true;
} else {
return false;
}
}
示例10: delegatingAttributeFactory
import org.apache.lucene.util.Attribute; //导入依赖的package包/类
/** Make this Tokenizer get attributes from the delegate token stream. */
private static final AttributeFactory delegatingAttributeFactory(final AttributeSource source) {
return new AttributeFactory() {
@Override
public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
return (AttributeImpl) source.addAttribute(attClass);
}
};
}
示例11: createAttributeInstance
import org.apache.lucene.util.Attribute; //导入依赖的package包/类
@Override
public AttributeImpl createAttributeInstance(
Class<? extends Attribute> attClass) {
return attClass.isAssignableFrom(ICUCollatedTermAttributeImpl.class)
? new ICUCollatedTermAttributeImpl(collator)
: delegate.createAttributeInstance(attClass);
}
示例12: createAttributeInstance
import org.apache.lucene.util.Attribute; //导入依赖的package包/类
@Override
public AttributeImpl createAttributeInstance(
Class<? extends Attribute> attClass) {
return attClass.isAssignableFrom(CollatedTermAttributeImpl.class)
? new CollatedTermAttributeImpl(collator)
: delegate.createAttributeInstance(attClass);
}
示例13: getAttrIfExists
import org.apache.lucene.util.Attribute; //导入依赖的package包/类
private <A extends Attribute> A getAttrIfExists(Class<A> att) {
if (hasAttribute(att)) {
return getAttribute(att);
} else {
return null;
}
}
示例14: createAttributeInstance
import org.apache.lucene.util.Attribute; //导入依赖的package包/类
@Override
public AttributeImpl createAttributeInstance(
Class<? extends Attribute> attClass) {
return attClass.isAssignableFrom(MockUTF16TermAttributeImpl.class)
? new MockUTF16TermAttributeImpl()
: delegate.createAttributeInstance(attClass);
}
示例15: incrementToken
import org.apache.lucene.util.Attribute; //导入依赖的package包/类
@Override
public boolean incrementToken() throws IOException {
//clearAttributes();
if (first) {
//gather all tokens from doc
String[] words = walkTokens();
if (words.length == 0) {
return false;
}
//tagging
posTags = createTags(words);
first = false;
tokenIdx = 0;
}
if (tokenIdx == tokenAttrs.size()) {
resetParams();
return false;
}
AttributeSource as = tokenAttrs.get(tokenIdx);
Iterator<? extends Class<? extends Attribute>> it = as.getAttributeClassesIterator();
while (it.hasNext()) {
Class<? extends Attribute> attrClass = it.next();
if (!hasAttribute(attrClass)) {
addAttribute(attrClass);
}
}
as.copyTo(this);
MWEMetadata metadata = exitingPayload.getPayload() == null ? new MWEMetadata() :
MWEMetadata.deserialize(exitingPayload.getPayload().utf8ToString());
metadata.addMetaData(MWEMetadataType.POS, posTags[tokenIdx]);
exitingPayload.setPayload(new BytesRef(MWEMetadata.serialize(metadata)));
tokenIdx++;
return true;
}