当前位置: 首页>>代码示例>>Java>>正文


Java PositionLengthAttribute类代码示例

本文整理汇总了Java中org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute的典型用法代码示例。如果您正苦于以下问题:Java PositionLengthAttribute类的具体用法?Java PositionLengthAttribute怎么用?Java PositionLengthAttribute使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


PositionLengthAttribute类属于org.apache.lucene.analysis.tokenattributes包,在下文中一共展示了PositionLengthAttribute类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; //导入依赖的package包/类
@SuppressWarnings("resource")
public static void main(String[] args) throws Exception {
    final Tokenizer tok = new WhitespaceTokenizer();
    tok.setReader(new StringReader("dark sea green sea green"));

    final SynonymMap.Builder builder = new SynonymMap.Builder(true);
    addSynonym("dark sea green", "color", builder);
    addSynonym("green", "color", builder);
    addSynonym("dark sea", "color", builder);
    addSynonym("sea green", "color", builder);
    final SynonymMap synMap = builder.build();
    final TokenStream ts = new SynonymFilter(tok, synMap, true);

    final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    final PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
    final PositionLengthAttribute posLengthAtt = ts.addAttribute(PositionLengthAttribute.class);

    ts.reset();
    int pos = -1;
    while (ts.incrementToken()) {
        pos += posIncrAtt.getPositionIncrement();
        System.out.println("term=" + termAtt + ", pos=" + pos + ", posLen=" + posLengthAtt.getPositionLength());
    }
    ts.end();
    ts.close();
}
 
开发者ID:shaie,项目名称:lucenelab,代码行数:27,代码来源:SynonymFilterExample.java

示例2: simpleAnalyze

import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; //导入依赖的package包/类
private static List<AnalyzeResponse.AnalyzeToken> simpleAnalyze(AnalyzeRequest request, Analyzer analyzer, String field) {
    List<AnalyzeResponse.AnalyzeToken> tokens = new ArrayList<>();
    int lastPosition = -1;
    int lastOffset = 0;
    for (String text : request.text()) {
        try (TokenStream stream = analyzer.tokenStream(field, text)) {
            stream.reset();
            CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
            PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
            OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
            TypeAttribute type = stream.addAttribute(TypeAttribute.class);
            PositionLengthAttribute posLen = stream.addAttribute(PositionLengthAttribute.class);

            while (stream.incrementToken()) {
                int increment = posIncr.getPositionIncrement();
                if (increment > 0) {
                    lastPosition = lastPosition + increment;
                }
                tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(),
                    lastOffset + offset.endOffset(), posLen.getPositionLength(), type.type(), null));

            }
            stream.end();
            lastOffset += offset.endOffset();
            lastPosition += posIncr.getPositionIncrement();

            lastPosition += analyzer.getPositionIncrementGap(field);
            lastOffset += analyzer.getOffsetGap(field);
        } catch (IOException e) {
            throw new ElasticsearchException("failed to analyze", e);
        }
    }
    return tokens;
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:35,代码来源:TransportAnalyzeAction.java

示例3: analyze

import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; //导入依赖的package包/类
private void analyze(TokenStream stream, Analyzer analyzer, String field, Set<String> includeAttributes) {
    try {
        stream.reset();
        CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
        PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
        OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
        TypeAttribute type = stream.addAttribute(TypeAttribute.class);
        PositionLengthAttribute posLen = stream.addAttribute(PositionLengthAttribute.class);

        while (stream.incrementToken()) {
            int increment = posIncr.getPositionIncrement();
            if (increment > 0) {
                lastPosition = lastPosition + increment;
            }
            tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(),
                lastOffset + offset.endOffset(), posLen.getPositionLength(), type.type(), extractExtendedAttributes(stream, includeAttributes)));

        }
        stream.end();
        lastOffset += offset.endOffset();
        lastPosition += posIncr.getPositionIncrement();

        lastPosition += analyzer.getPositionIncrementGap(field);
        lastOffset += analyzer.getOffsetGap(field);

    } catch (IOException e) {
        throw new ElasticsearchException("failed to analyze", e);
    } finally {
        IOUtils.closeWhileHandlingException(stream);
    }
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:32,代码来源:TransportAnalyzeAction.java

示例4: Lucene43NGramTokenFilter

import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; //导入依赖的package包/类
/**
 * Creates Lucene43NGramTokenFilter with given min and max n-grams.
 * @param input {@link org.apache.lucene.analysis.TokenStream} holding the input to be tokenized
 * @param minGram the smallest n-gram to generate
 * @param maxGram the largest n-gram to generate
 */
public Lucene43NGramTokenFilter(TokenStream input, int minGram, int maxGram) {
  super(new CodepointCountFilter(input, minGram, Integer.MAX_VALUE));
  this.charUtils = CharacterUtils.getJava4Instance();
  if (minGram < 1) {
    throw new IllegalArgumentException("minGram must be greater than zero");
  }
  if (minGram > maxGram) {
    throw new IllegalArgumentException("minGram must not be greater than maxGram");
  }
  this.minGram = minGram;
  this.maxGram = maxGram;

  posIncAtt = new PositionIncrementAttribute() {
    @Override
    public void setPositionIncrement(int positionIncrement) {}
    @Override
    public int getPositionIncrement() {
        return 0;
      }
  };
  posLenAtt = new PositionLengthAttribute() {
    @Override
    public void setPositionLength(int positionLength) {}
    @Override
    public int getPositionLength() {
        return 0;
      }
  };
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:36,代码来源:Lucene43NGramTokenFilter.java

示例5: setAttributes

import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; //导入依赖的package包/类
private void setAttributes() {
  charTermAtt = addAttribute(CharTermAttribute.class);
  posIncrAtt = addAttribute(PositionIncrementAttribute.class);
  posLenAtt = addAttribute(PositionLengthAttribute.class);
  offsetAtt = addAttribute(OffsetAttribute.class);
  typeAtt = addAttribute(TypeAttribute.class);
  posAtt = addAttribute(PartOfSpeechAttribute.class);
  semanticClassAtt = addAttribute(SemanticClassAttribute.class);
}
 
开发者ID:jaepil,项目名称:mecab-ko-lucene-analyzer,代码行数:10,代码来源:MeCabKoTokenizer.java

示例6: positionLength

import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; //导入依赖的package包/类
protected Matcher<TokenStream> positionLength(final int expectedLength) {
   return new TypeSafeMatcher<TokenStream>() {
      @Override
      public void describeTo(Description description) {
         description.appendText("positionLength=").appendValue(expectedLength);
      }

      @Override
      protected boolean matchesSafely(TokenStream stream) {
         PositionLengthAttribute attr = stream.addAttribute(PositionLengthAttribute.class);
         return attr.getPositionLength() == expectedLength;
      }
   };
}
 
开发者ID:shopping24,项目名称:solr-analyzers,代码行数:15,代码来源:AbstractTokenFilterTest.java

示例7: TokenStreamToDot

import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; //导入依赖的package包/类
/** If inputText is non-null, and the TokenStream has
 *  offsets, we include the surface form in each arc's
 *  label. */
public TokenStreamToDot(String inputText, TokenStream in, PrintWriter out) {
  this.in = in;
  this.out = out;
  this.inputText = inputText;
  termAtt = in.addAttribute(CharTermAttribute.class);
  posIncAtt = in.addAttribute(PositionIncrementAttribute.class);
  posLengthAtt = in.addAttribute(PositionLengthAttribute.class);
  if (in.hasAttribute(OffsetAttribute.class)) {
    offsetAtt = in.addAttribute(OffsetAttribute.class);
  } else {
    offsetAtt = null;
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:17,代码来源:TokenStreamToDot.java

示例8: printResultOfTokenStream

import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; //导入依赖的package包/类
public static void printResultOfTokenStream(PrintStream out, TokenStream ts) throws IOException {
	CharTermAttribute termAttr = ts.getAttribute(CharTermAttribute.class);
	TypeAttribute typeAttr = ts.getAttribute(TypeAttribute.class);
	OffsetAttribute offAttr = ts.getAttribute(OffsetAttribute.class);
	PositionIncrementAttribute posIncAttr = ts.getAttribute(PositionIncrementAttribute.class);
	PositionLengthAttribute posLenAttr = ts.getAttribute(PositionLengthAttribute.class);
	ts.reset();
	Table<String, String, String> contentTable = Tables.newCustomTable(new LinkedHashMap<String, Map<String, String>>(),
			new Supplier<Map<String, String>>() {
				@Override
				public Map<String, String> get() {
					return Maps.newLinkedHashMap();
				}
			});
	int lineNo = 1;
	int pos = 0;
	while (ts.incrementToken()) {
		String lineId = lineNo + ".";
		contentTable.put(lineId, "term", termAttr.toString());
		contentTable.put(lineId, "type", typeAttr.type());
		contentTable.put(lineId, "startOffset", offAttr.startOffset() + "");
		contentTable.put(lineId, "endOffset", offAttr.endOffset() + "");
		contentTable.put(lineId, "posInc", posIncAttr.getPositionIncrement() + "");
		contentTable.put(lineId, "posLen", posLenAttr.getPositionLength() + "");
		pos += posIncAttr.getPositionIncrement();
		contentTable.put(lineId, "pos", pos + "");

		lineNo++;
	}
	printTable(out, contentTable);
}
 
开发者ID:thihy,项目名称:cc-analysis,代码行数:32,代码来源:AnalysisTestHelper.java

示例9: testNGrams

import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; //导入依赖的package包/类
static void testNGrams(int minGram, int maxGram, String s, final String nonTokenChars, boolean edgesOnly) throws IOException {
  // convert the string to code points
  final int[] codePoints = toCodePoints(s);
  final int[] offsets = new int[codePoints.length + 1];
  for (int i = 0; i < codePoints.length; ++i) {
    offsets[i+1] = offsets[i] + Character.charCount(codePoints[i]);
  }
  final TokenStream grams = new NGramTokenizer(Version.LATEST, new StringReader(s), minGram, maxGram, edgesOnly) {
    @Override
    protected boolean isTokenChar(int chr) {
      return nonTokenChars.indexOf(chr) < 0;
    }
  };
  final CharTermAttribute termAtt = grams.addAttribute(CharTermAttribute.class);
  final PositionIncrementAttribute posIncAtt = grams.addAttribute(PositionIncrementAttribute.class);
  final PositionLengthAttribute posLenAtt = grams.addAttribute(PositionLengthAttribute.class);
  final OffsetAttribute offsetAtt = grams.addAttribute(OffsetAttribute.class);
  grams.reset();
  for (int start = 0; start < codePoints.length; ++start) {
    nextGram:
    for (int end = start + minGram; end <= start + maxGram && end <= codePoints.length; ++end) {
      if (edgesOnly && start > 0 && isTokenChar(nonTokenChars, codePoints[start - 1])) {
        // not on an edge
        continue nextGram;
      }
      for (int j = start; j < end; ++j) {
        if (!isTokenChar(nonTokenChars, codePoints[j])) {
          continue nextGram;
        }
      }
      assertTrue(grams.incrementToken());
      assertArrayEquals(Arrays.copyOfRange(codePoints, start, end), toCodePoints(termAtt));
      assertEquals(1, posIncAtt.getPositionIncrement());
      assertEquals(1, posLenAtt.getPositionLength());
      assertEquals(offsets[start], offsetAtt.startOffset());
      assertEquals(offsets[end], offsetAtt.endOffset());
    }
  }
  assertFalse(grams.incrementToken());
  grams.end();
  assertEquals(s.length(), offsetAtt.startOffset());
  assertEquals(s.length(), offsetAtt.endOffset());
}
 
开发者ID:europeana,项目名称:search,代码行数:44,代码来源:NGramTokenizerTest.java

示例10: getGraph

import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; //导入依赖的package包/类
/**
 * Now the graph is more interesting! For each token (arc), the PositionIncrementAttribute tells
 * us how many positions (nodes) ahead this arc starts from, while the new (as of 3.6.0)
 * PositionLengthAttribute tells us how many positions (nodes) ahead the arc arrives to.
 */
private static String getGraph(String input) throws IOException {
  final Tokenizer inputStream = new WhitespaceTokenizer();
  inputStream.setReader(new StringReader(input));
  //        final TokenStream inputStream = new LowerCaseFilter(in);

  TokenStream tokenStream = new SynonymGraphFilter(inputStream, builder.build(), false);
  PositionIncrementAttribute posIncAtt =
      tokenStream.addAttribute(PositionIncrementAttribute.class);
  PositionLengthAttribute posLenAtt = tokenStream.addAttribute(PositionLengthAttribute.class);
  CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class);
  tokenStream.reset();
  int srcNode = -1;
  int destNode;

  StringBuilder b = new StringBuilder();
  b.append("digraph Automaton {\n");
  b.append("  initial [shape=plaintext,label=\"\"]\n");
  b.append("  initial -> 0\n");

  while (tokenStream.incrementToken()) {
    int posInc = posIncAtt.getPositionIncrement();
    if (posInc != 0) {
      srcNode += posInc;
      b.append("  ");
      b.append(srcNode);
      b.append(" [shape=circle,label=\"" + srcNode + "\"]\n");
    }
    destNode = srcNode + posLenAtt.getPositionLength();
    b.append("  ");
    b.append(srcNode);
    b.append(" -> ");
    b.append(destNode);
    b.append(" [label=\"");
    b.append(termAtt);
    b.append("\"");
    b.append("]\n");
  }
  tokenStream.end();
  tokenStream.close();

  b.append('}');
  return b.toString();
}
 
开发者ID:MysterionRise,项目名称:information-retrieval-adventure,代码行数:49,代码来源:EntradaSalida.java

示例11: testNGrams

import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; //导入依赖的package包/类
static void testNGrams(int minGram, int maxGram, String s, final String nonTokenChars, boolean edgesOnly) throws IOException {
  // convert the string to code points
  final int[] codePoints = toCodePoints(s);
  final int[] offsets = new int[codePoints.length + 1];
  for (int i = 0; i < codePoints.length; ++i) {
    offsets[i+1] = offsets[i] + Character.charCount(codePoints[i]);
  }
  final TokenStream grams = new NGramTokenizer(TEST_VERSION_CURRENT, new StringReader(s), minGram, maxGram, edgesOnly) {
    @Override
    protected boolean isTokenChar(int chr) {
      return nonTokenChars.indexOf(chr) < 0;
    }
  };
  final CharTermAttribute termAtt = grams.addAttribute(CharTermAttribute.class);
  final PositionIncrementAttribute posIncAtt = grams.addAttribute(PositionIncrementAttribute.class);
  final PositionLengthAttribute posLenAtt = grams.addAttribute(PositionLengthAttribute.class);
  final OffsetAttribute offsetAtt = grams.addAttribute(OffsetAttribute.class);
  grams.reset();
  for (int start = 0; start < codePoints.length; ++start) {
    nextGram:
    for (int end = start + minGram; end <= start + maxGram && end <= codePoints.length; ++end) {
      if (edgesOnly && start > 0 && isTokenChar(nonTokenChars, codePoints[start - 1])) {
        // not on an edge
        continue nextGram;
      }
      for (int j = start; j < end; ++j) {
        if (!isTokenChar(nonTokenChars, codePoints[j])) {
          continue nextGram;
        }
      }
      assertTrue(grams.incrementToken());
      assertArrayEquals(Arrays.copyOfRange(codePoints, start, end), toCodePoints(termAtt));
      assertEquals(1, posIncAtt.getPositionIncrement());
      assertEquals(1, posLenAtt.getPositionLength());
      assertEquals(offsets[start], offsetAtt.startOffset());
      assertEquals(offsets[end], offsetAtt.endOffset());
    }
  }
  assertFalse(grams.incrementToken());
  grams.end();
  assertEquals(s.length(), offsetAtt.startOffset());
  assertEquals(s.length(), offsetAtt.endOffset());
}
 
开发者ID:jimaguere,项目名称:Maskana-Gestor-de-Conocimiento,代码行数:44,代码来源:NGramTokenizerTest.java


注:本文中的org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。