当前位置: 首页>>代码示例>>Java>>正文


Java FST.BytesReader方法代码示例

本文整理汇总了Java中org.apache.lucene.util.fst.FST.BytesReader方法的典型用法代码示例。如果您正苦于以下问题:Java FST.BytesReader方法的具体用法?Java FST.BytesReader怎么用?Java FST.BytesReader使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.util.fst.FST的用法示例。


在下文中一共展示了FST.BytesReader方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: walk

import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
static<T> void walk(FST<T> fst) throws IOException {
  final ArrayList<FST.Arc<T>> queue = new ArrayList<>();
  final BitSet seen = new BitSet();
  final FST.BytesReader reader = fst.getBytesReader();
  final FST.Arc<T> startArc = fst.getFirstArc(new FST.Arc<T>());
  queue.add(startArc);
  while (!queue.isEmpty()) {
    final FST.Arc<T> arc = queue.remove(0);
    final long node = arc.target;
    //System.out.println(arc);
    if (FST.targetHasArcs(arc) && !seen.get((int) node)) {
      seen.set((int) node);
      fst.readFirstRealTargetArc(node, arc, reader);
      while (true) {
        queue.add(new FST.Arc<T>().copyFrom(arc));
        if (arc.isLast()) {
          break;
        } else {
          fst.readNextRealArc(arc, reader);
        }
      }
    }
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:25,代码来源:FSTTermsReader.java

示例2: lookupPrefix

import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
private Long lookupPrefix(FST<Long> fst, FST.BytesReader bytesReader,
                          BytesRef scratch, Arc<Long> arc) throws /*Bogus*/IOException {

  Long output = fst.outputs.getNoOutput();
  
  fst.getFirstArc(arc);
  
  byte[] bytes = scratch.bytes;
  int pos = scratch.offset;
  int end = pos + scratch.length;
  while (pos < end) {
    if (fst.findTargetArc(bytes[pos++] & 0xff, arc, arc, bytesReader) == null) {
      return null;
    } else {
      output = fst.outputs.add(output, arc.output);
    }
  }
  
  return output;
}
 
开发者ID:europeana,项目名称:search,代码行数:21,代码来源:FreeTextSuggester.java

示例3: cacheRootArcs

import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
/**
 * Cache the root node's output arcs starting with completions with the
 * highest weights.
 */
@SuppressWarnings({"unchecked","rawtypes"})
private static Arc<Object>[] cacheRootArcs(FST<Object> automaton) {
  try {
    List<Arc<Object>> rootArcs = new ArrayList<>();
    Arc<Object> arc = automaton.getFirstArc(new Arc<>());
    FST.BytesReader fstReader = automaton.getBytesReader();
    automaton.readFirstTargetArc(arc, arc, fstReader);
    while (true) {
      rootArcs.add(new Arc<>().copyFrom(arc));
      if (arc.isLast()) break;
      automaton.readNextArc(arc, fstReader);
    }
    
    Collections.reverse(rootArcs); // we want highest weights first.
    return rootArcs.toArray(new Arc[rootArcs.size()]);
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:24,代码来源:FSTCompletion.java

示例4: cacheRootArcs

import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
/**
 * Cache the root node's output arcs starting with completions with the
 * highest weights.
 */
@SuppressWarnings({"unchecked","rawtypes"})
private static Arc<Object>[] cacheRootArcs(FST<Object> automaton) {
  try {
    List<Arc<Object>> rootArcs = new ArrayList<Arc<Object>>();
    Arc<Object> arc = automaton.getFirstArc(new Arc<Object>());
    FST.BytesReader fstReader = automaton.getBytesReader();
    automaton.readFirstTargetArc(arc, arc, fstReader);
    while (true) {
      rootArcs.add(new Arc<Object>().copyFrom(arc));
      if (arc.isLast()) break;
      automaton.readNextArc(arc, fstReader);
    }
    
    Collections.reverse(rootArcs); // we want highest weights first.
    return rootArcs.toArray(new Arc[rootArcs.size()]);
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}
 
开发者ID:yintaoxue,项目名称:read-open-source-code,代码行数:24,代码来源:FSTCompletion.java

示例5: walk

import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
static<T> void walk(FST<T> fst) throws IOException {
  final ArrayList<FST.Arc<T>> queue = new ArrayList<FST.Arc<T>>();
  final BitSet seen = new BitSet();
  final FST.BytesReader reader = fst.getBytesReader();
  final FST.Arc<T> startArc = fst.getFirstArc(new FST.Arc<T>());
  queue.add(startArc);
  while (!queue.isEmpty()) {
    final FST.Arc<T> arc = queue.remove(0);
    final long node = arc.target;
    //System.out.println(arc);
    if (FST.targetHasArcs(arc) && !seen.get((int) node)) {
      seen.set((int) node);
      fst.readFirstRealTargetArc(node, arc, reader);
      while (true) {
        queue.add(new FST.Arc<T>().copyFrom(arc));
        if (arc.isLast()) {
          break;
        } else {
          fst.readNextRealArc(arc, reader);
        }
      }
    }
  }
}
 
开发者ID:yintaoxue,项目名称:read-open-source-code,代码行数:25,代码来源:FSTOrdTermsReader.java

示例6: lookup

import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
IntsRef lookup(FST<IntsRef> fst, char word[], int offset, int length) {
  if (fst == null) {
    return null;
  }
  final FST.BytesReader bytesReader = fst.getBytesReader();
  final FST.Arc<IntsRef> arc = fst.getFirstArc(new FST.Arc<IntsRef>());
  // Accumulate output as we go
  final IntsRef NO_OUTPUT = fst.outputs.getNoOutput();
  IntsRef output = NO_OUTPUT;
  
  int l = offset + length;
  try {
    for (int i = offset, cp = 0; i < l; i += Character.charCount(cp)) {
      cp = Character.codePointAt(word, i, l);
      if (fst.findTargetArc(cp, arc, arc, bytesReader) == null) {
        return null;
      } else if (arc.output != NO_OUTPUT) {
        output = fst.outputs.add(output, arc.output);
      }
    }
    if (fst.findTargetArc(FST.END_LABEL, arc, arc, bytesReader) == null) {
      return null;
    } else if (arc.output != NO_OUTPUT) {
      return fst.outputs.add(output, arc.output);
    } else {
      return output;
    }
  } catch (IOException bogus) {
    throw new RuntimeException(bogus);
  }
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:32,代码来源:Dictionary.java

示例7: applyMappings

import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
static void applyMappings(FST<CharsRef> fst, StringBuilder sb) throws IOException {
  final FST.BytesReader bytesReader = fst.getBytesReader();
  final FST.Arc<CharsRef> firstArc = fst.getFirstArc(new FST.Arc<CharsRef>());
  final CharsRef NO_OUTPUT = fst.outputs.getNoOutput();
  
  // temporary stuff
  final FST.Arc<CharsRef> arc = new FST.Arc<>();
  int longestMatch;
  CharsRef longestOutput;
  
  for (int i = 0; i < sb.length(); i++) {
    arc.copyFrom(firstArc);
    CharsRef output = NO_OUTPUT;
    longestMatch = -1;
    longestOutput = null;
    
    for (int j = i; j < sb.length(); j++) {
      char ch = sb.charAt(j);
      if (fst.findTargetArc(ch, arc, arc, bytesReader) == null) {
        break;
      } else {
        output = fst.outputs.add(output, arc.output);
      }
      if (arc.isFinal()) {
        longestOutput = fst.outputs.add(output, arc.nextFinalOutput);
        longestMatch = j;
      }
    }
    
    if (longestMatch >= 0) {
      sb.delete(i, longestMatch+1);
      sb.insert(i, longestOutput);
      i += (longestOutput.length - 1);
    }
  }
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:37,代码来源:Dictionary.java

示例8: getExactMatchStartingFromRootArc

import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
/**
 * Returns the first exact match by traversing root arcs, starting from the
 * arc <code>rootArcIndex</code>.
 * 
 * @param rootArcIndex
 *          The first root arc index in {@link #rootArcs} to consider when
 *          matching.
 * 
 * @param utf8
 *          The sequence of utf8 bytes to follow.
 * 
 * @return Returns the bucket number of the match or <code>-1</code> if no
 *         match was found.
 */
private int getExactMatchStartingFromRootArc(
    int rootArcIndex, BytesRef utf8) {
  // Get the UTF-8 bytes representation of the input key.
  try {
    final FST.Arc<Object> scratch = new FST.Arc<>();
    FST.BytesReader fstReader = automaton.getBytesReader();
    for (; rootArcIndex < rootArcs.length; rootArcIndex++) {
      final FST.Arc<Object> rootArc = rootArcs[rootArcIndex];
      final FST.Arc<Object> arc = scratch.copyFrom(rootArc);
      
      // Descend into the automaton using the key as prefix.
      if (descendWithPrefix(arc, utf8)) {
        automaton.readFirstTargetArc(arc, arc, fstReader);
        if (arc.label == FST.END_LABEL) {
          // Normalize prefix-encoded weight.
          return rootArc.label;
        }
      }
    }
  } catch (IOException e) {
    // Should never happen, but anyway.
    throw new RuntimeException(e);
  }
  
  // No match.
  return -1;
}
 
开发者ID:europeana,项目名称:search,代码行数:42,代码来源:FSTCompletion.java

示例9: descendWithPrefix

import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
/**
 * Descend along the path starting at <code>arc</code> and going through bytes
 * in the argument.
 * 
 * @param arc
 *          The starting arc. This argument is modified in-place.
 * @param utf8
 *          The term to descend along.
 * @return If <code>true</code>, <code>arc</code> will be set to the arc
 *         matching last byte of <code>term</code>. <code>false</code> is
 *         returned if no such prefix exists.
 */
private boolean descendWithPrefix(Arc<Object> arc, BytesRef utf8)
    throws IOException {
  final int max = utf8.offset + utf8.length;
  // Cannot save as instance var since multiple threads
  // can use FSTCompletion at once...
  final FST.BytesReader fstReader = automaton.getBytesReader();
  for (int i = utf8.offset; i < max; i++) {
    if (automaton.findTargetArc(utf8.bytes[i] & 0xff, arc, arc, fstReader) == null) {
      // No matching prefixes, return an empty result.
      return false;
    }
  }
  return true;
}
 
开发者ID:europeana,项目名称:search,代码行数:27,代码来源:FSTCompletion.java

示例10: cacheRootArcs

import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
@SuppressWarnings({"rawtypes","unchecked"})
private FST.Arc<Long>[] cacheRootArcs() throws IOException {
  FST.Arc<Long> rootCache[] = new FST.Arc[1+(cacheCeiling-0x3040)];
  FST.Arc<Long> firstArc = new FST.Arc<>();
  fst.getFirstArc(firstArc);
  FST.Arc<Long> arc = new FST.Arc<>();
  final FST.BytesReader fstReader = fst.getBytesReader();
  // TODO: jump to 3040, readNextRealArc to ceiling? (just be careful we don't add bugs)
  for (int i = 0; i < rootCache.length; i++) {
    if (fst.findTargetArc(0x3040 + i, firstArc, arc, fstReader) != null) {
      rootCache[i] = new FST.Arc<Long>().copyFrom(arc);
    }
  }
  return rootCache;
}
 
开发者ID:europeana,项目名称:search,代码行数:16,代码来源:TokenInfoFST.java

示例11: findTargetArc

import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
public FST.Arc<Long> findTargetArc(int ch, FST.Arc<Long> follow, FST.Arc<Long> arc, boolean useCache, FST.BytesReader fstReader) throws IOException {
  if (useCache && ch >= 0x3040 && ch <= cacheCeiling) {
    assert ch != FST.END_LABEL;
    final Arc<Long> result = rootCache[ch - 0x3040];
    if (result == null) {
      return null;
    } else {
      arc.copyFrom(result);
      return arc;
    }
  } else {
    return fst.findTargetArc(ch, follow, arc, fstReader);
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:15,代码来源:TokenInfoFST.java

示例12: lookup

import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
/**
 * Lookup words in text
 * @param chars text
 * @param off offset into text
 * @param len length of text
 * @return array of {wordId, position, length}
 */
public int[][] lookup(char[] chars, int off, int len) throws IOException {
  // TODO: can we avoid this treemap/toIndexArray?
  TreeMap<Integer, int[]> result = new TreeMap<>(); // index, [length, length...]
  boolean found = false; // true if we found any results

  final FST.BytesReader fstReader = fst.getBytesReader();

  FST.Arc<Long> arc = new FST.Arc<>();
  int end = off + len;
  for (int startOffset = off; startOffset < end; startOffset++) {
    arc = fst.getFirstArc(arc);
    int output = 0;
    int remaining = end - startOffset;
    for (int i = 0; i < remaining; i++) {
      int ch = chars[startOffset+i];
      if (fst.findTargetArc(ch, arc, arc, i == 0, fstReader) == null) {
        break; // continue to next position
      }
      output += arc.output.intValue();
      if (arc.isFinal()) {
        final int finalOutput = output + arc.nextFinalOutput.intValue();
        result.put(startOffset-off, segmentations[finalOutput]);
        found = true;
      }
    }
  }
  
  return found ? toIndexArray(result) : EMPTY_RESULT;
}
 
开发者ID:europeana,项目名称:search,代码行数:37,代码来源:UserDictionary.java

示例13: getExactMatchStartingFromRootArc

import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
/**
 * Returns the first exact match by traversing root arcs, starting from the
 * arc <code>rootArcIndex</code>.
 * 
 * @param rootArcIndex
 *          The first root arc index in {@link #rootArcs} to consider when
 *          matching.
 * 
 * @param utf8
 *          The sequence of utf8 bytes to follow.
 * 
 * @return Returns the bucket number of the match or <code>-1</code> if no
 *         match was found.
 */
private int getExactMatchStartingFromRootArc(
    int rootArcIndex, BytesRef utf8) {
  // Get the UTF-8 bytes representation of the input key.
  try {
    final FST.Arc<Object> scratch = new FST.Arc<Object>();
    FST.BytesReader fstReader = automaton.getBytesReader();
    for (; rootArcIndex < rootArcs.length; rootArcIndex++) {
      final FST.Arc<Object> rootArc = rootArcs[rootArcIndex];
      final FST.Arc<Object> arc = scratch.copyFrom(rootArc);
      
      // Descend into the automaton using the key as prefix.
      if (descendWithPrefix(arc, utf8)) {
        automaton.readFirstTargetArc(arc, arc, fstReader);
        if (arc.label == FST.END_LABEL) {
          // Normalize prefix-encoded weight.
          return rootArc.label;
        }
      }
    }
  } catch (IOException e) {
    // Should never happen, but anyway.
    throw new RuntimeException(e);
  }
  
  // No match.
  return -1;
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:42,代码来源:FSTCompletion.java

示例14: cacheRootArcs

import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
@SuppressWarnings({"rawtypes","unchecked"})
private FST.Arc<Long>[] cacheRootArcs() throws IOException {
  FST.Arc<Long> rootCache[] = new FST.Arc[1+(cacheCeiling-0x3040)];
  FST.Arc<Long> firstArc = new FST.Arc<Long>();
  fst.getFirstArc(firstArc);
  FST.Arc<Long> arc = new FST.Arc<Long>();
  final FST.BytesReader fstReader = fst.getBytesReader();
  // TODO: jump to 3040, readNextRealArc to ceiling? (just be careful we don't add bugs)
  for (int i = 0; i < rootCache.length; i++) {
    if (fst.findTargetArc(0x3040 + i, firstArc, arc, fstReader) != null) {
      rootCache[i] = new FST.Arc<Long>().copyFrom(arc);
    }
  }
  return rootCache;
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:16,代码来源:TokenInfoFST.java

示例15: lookup

import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
/**
 * Lookup words in text
 * @param chars text
 * @param off offset into text
 * @param len length of text
 * @return array of {wordId, position, length}
 */
public int[][] lookup(char[] chars, int off, int len) throws IOException {
  // TODO: can we avoid this treemap/toIndexArray?
  TreeMap<Integer, int[]> result = new TreeMap<Integer, int[]>(); // index, [length, length...]
  boolean found = false; // true if we found any results

  final FST.BytesReader fstReader = fst.getBytesReader();

  FST.Arc<Long> arc = new FST.Arc<Long>();
  int end = off + len;
  for (int startOffset = off; startOffset < end; startOffset++) {
    arc = fst.getFirstArc(arc);
    int output = 0;
    int remaining = end - startOffset;
    for (int i = 0; i < remaining; i++) {
      int ch = chars[startOffset+i];
      if (fst.findTargetArc(ch, arc, arc, i == 0, fstReader) == null) {
        break; // continue to next position
      }
      output += arc.output.intValue();
      if (arc.isFinal()) {
        final int finalOutput = output + arc.nextFinalOutput.intValue();
        result.put(startOffset-off, segmentations[finalOutput]);
        found = true;
      }
    }
  }
  
  return found ? toIndexArray(result) : EMPTY_RESULT;
}
 
开发者ID:yintaoxue,项目名称:read-open-source-code,代码行数:37,代码来源:UserDictionary.java


注:本文中的org.apache.lucene.util.fst.FST.BytesReader方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。