本文整理汇总了Java中org.apache.lucene.util.fst.FST.BytesReader方法的典型用法代码示例。如果您正苦于以下问题:Java FST.BytesReader方法的具体用法?Java FST.BytesReader怎么用?Java FST.BytesReader使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.util.fst.FST
的用法示例。
在下文中一共展示了FST.BytesReader方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: walk
import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
static<T> void walk(FST<T> fst) throws IOException {
final ArrayList<FST.Arc<T>> queue = new ArrayList<>();
final BitSet seen = new BitSet();
final FST.BytesReader reader = fst.getBytesReader();
final FST.Arc<T> startArc = fst.getFirstArc(new FST.Arc<T>());
queue.add(startArc);
while (!queue.isEmpty()) {
final FST.Arc<T> arc = queue.remove(0);
final long node = arc.target;
//System.out.println(arc);
if (FST.targetHasArcs(arc) && !seen.get((int) node)) {
seen.set((int) node);
fst.readFirstRealTargetArc(node, arc, reader);
while (true) {
queue.add(new FST.Arc<T>().copyFrom(arc));
if (arc.isLast()) {
break;
} else {
fst.readNextRealArc(arc, reader);
}
}
}
}
}
示例2: lookupPrefix
import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
private Long lookupPrefix(FST<Long> fst, FST.BytesReader bytesReader,
BytesRef scratch, Arc<Long> arc) throws /*Bogus*/IOException {
Long output = fst.outputs.getNoOutput();
fst.getFirstArc(arc);
byte[] bytes = scratch.bytes;
int pos = scratch.offset;
int end = pos + scratch.length;
while (pos < end) {
if (fst.findTargetArc(bytes[pos++] & 0xff, arc, arc, bytesReader) == null) {
return null;
} else {
output = fst.outputs.add(output, arc.output);
}
}
return output;
}
示例3: cacheRootArcs
import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
/**
* Cache the root node's output arcs starting with completions with the
* highest weights.
*/
@SuppressWarnings({"unchecked","rawtypes"})
private static Arc<Object>[] cacheRootArcs(FST<Object> automaton) {
try {
List<Arc<Object>> rootArcs = new ArrayList<>();
Arc<Object> arc = automaton.getFirstArc(new Arc<>());
FST.BytesReader fstReader = automaton.getBytesReader();
automaton.readFirstTargetArc(arc, arc, fstReader);
while (true) {
rootArcs.add(new Arc<>().copyFrom(arc));
if (arc.isLast()) break;
automaton.readNextArc(arc, fstReader);
}
Collections.reverse(rootArcs); // we want highest weights first.
return rootArcs.toArray(new Arc[rootArcs.size()]);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
示例4: cacheRootArcs
import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
/**
* Cache the root node's output arcs starting with completions with the
* highest weights.
*/
@SuppressWarnings({"unchecked","rawtypes"})
private static Arc<Object>[] cacheRootArcs(FST<Object> automaton) {
try {
List<Arc<Object>> rootArcs = new ArrayList<Arc<Object>>();
Arc<Object> arc = automaton.getFirstArc(new Arc<Object>());
FST.BytesReader fstReader = automaton.getBytesReader();
automaton.readFirstTargetArc(arc, arc, fstReader);
while (true) {
rootArcs.add(new Arc<Object>().copyFrom(arc));
if (arc.isLast()) break;
automaton.readNextArc(arc, fstReader);
}
Collections.reverse(rootArcs); // we want highest weights first.
return rootArcs.toArray(new Arc[rootArcs.size()]);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
示例5: walk
import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
static<T> void walk(FST<T> fst) throws IOException {
final ArrayList<FST.Arc<T>> queue = new ArrayList<FST.Arc<T>>();
final BitSet seen = new BitSet();
final FST.BytesReader reader = fst.getBytesReader();
final FST.Arc<T> startArc = fst.getFirstArc(new FST.Arc<T>());
queue.add(startArc);
while (!queue.isEmpty()) {
final FST.Arc<T> arc = queue.remove(0);
final long node = arc.target;
//System.out.println(arc);
if (FST.targetHasArcs(arc) && !seen.get((int) node)) {
seen.set((int) node);
fst.readFirstRealTargetArc(node, arc, reader);
while (true) {
queue.add(new FST.Arc<T>().copyFrom(arc));
if (arc.isLast()) {
break;
} else {
fst.readNextRealArc(arc, reader);
}
}
}
}
}
示例6: lookup
import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
IntsRef lookup(FST<IntsRef> fst, char word[], int offset, int length) {
if (fst == null) {
return null;
}
final FST.BytesReader bytesReader = fst.getBytesReader();
final FST.Arc<IntsRef> arc = fst.getFirstArc(new FST.Arc<IntsRef>());
// Accumulate output as we go
final IntsRef NO_OUTPUT = fst.outputs.getNoOutput();
IntsRef output = NO_OUTPUT;
int l = offset + length;
try {
for (int i = offset, cp = 0; i < l; i += Character.charCount(cp)) {
cp = Character.codePointAt(word, i, l);
if (fst.findTargetArc(cp, arc, arc, bytesReader) == null) {
return null;
} else if (arc.output != NO_OUTPUT) {
output = fst.outputs.add(output, arc.output);
}
}
if (fst.findTargetArc(FST.END_LABEL, arc, arc, bytesReader) == null) {
return null;
} else if (arc.output != NO_OUTPUT) {
return fst.outputs.add(output, arc.output);
} else {
return output;
}
} catch (IOException bogus) {
throw new RuntimeException(bogus);
}
}
示例7: applyMappings
import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
static void applyMappings(FST<CharsRef> fst, StringBuilder sb) throws IOException {
final FST.BytesReader bytesReader = fst.getBytesReader();
final FST.Arc<CharsRef> firstArc = fst.getFirstArc(new FST.Arc<CharsRef>());
final CharsRef NO_OUTPUT = fst.outputs.getNoOutput();
// temporary stuff
final FST.Arc<CharsRef> arc = new FST.Arc<>();
int longestMatch;
CharsRef longestOutput;
for (int i = 0; i < sb.length(); i++) {
arc.copyFrom(firstArc);
CharsRef output = NO_OUTPUT;
longestMatch = -1;
longestOutput = null;
for (int j = i; j < sb.length(); j++) {
char ch = sb.charAt(j);
if (fst.findTargetArc(ch, arc, arc, bytesReader) == null) {
break;
} else {
output = fst.outputs.add(output, arc.output);
}
if (arc.isFinal()) {
longestOutput = fst.outputs.add(output, arc.nextFinalOutput);
longestMatch = j;
}
}
if (longestMatch >= 0) {
sb.delete(i, longestMatch+1);
sb.insert(i, longestOutput);
i += (longestOutput.length - 1);
}
}
}
示例8: getExactMatchStartingFromRootArc
import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
/**
* Returns the first exact match by traversing root arcs, starting from the
* arc <code>rootArcIndex</code>.
*
* @param rootArcIndex
* The first root arc index in {@link #rootArcs} to consider when
* matching.
*
* @param utf8
* The sequence of utf8 bytes to follow.
*
* @return Returns the bucket number of the match or <code>-1</code> if no
* match was found.
*/
private int getExactMatchStartingFromRootArc(
int rootArcIndex, BytesRef utf8) {
// Get the UTF-8 bytes representation of the input key.
try {
final FST.Arc<Object> scratch = new FST.Arc<>();
FST.BytesReader fstReader = automaton.getBytesReader();
for (; rootArcIndex < rootArcs.length; rootArcIndex++) {
final FST.Arc<Object> rootArc = rootArcs[rootArcIndex];
final FST.Arc<Object> arc = scratch.copyFrom(rootArc);
// Descend into the automaton using the key as prefix.
if (descendWithPrefix(arc, utf8)) {
automaton.readFirstTargetArc(arc, arc, fstReader);
if (arc.label == FST.END_LABEL) {
// Normalize prefix-encoded weight.
return rootArc.label;
}
}
}
} catch (IOException e) {
// Should never happen, but anyway.
throw new RuntimeException(e);
}
// No match.
return -1;
}
示例9: descendWithPrefix
import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
/**
* Descend along the path starting at <code>arc</code> and going through bytes
* in the argument.
*
* @param arc
* The starting arc. This argument is modified in-place.
* @param utf8
* The term to descend along.
* @return If <code>true</code>, <code>arc</code> will be set to the arc
* matching last byte of <code>term</code>. <code>false</code> is
* returned if no such prefix exists.
*/
private boolean descendWithPrefix(Arc<Object> arc, BytesRef utf8)
throws IOException {
final int max = utf8.offset + utf8.length;
// Cannot save as instance var since multiple threads
// can use FSTCompletion at once...
final FST.BytesReader fstReader = automaton.getBytesReader();
for (int i = utf8.offset; i < max; i++) {
if (automaton.findTargetArc(utf8.bytes[i] & 0xff, arc, arc, fstReader) == null) {
// No matching prefixes, return an empty result.
return false;
}
}
return true;
}
示例10: cacheRootArcs
import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
@SuppressWarnings({"rawtypes","unchecked"})
private FST.Arc<Long>[] cacheRootArcs() throws IOException {
FST.Arc<Long> rootCache[] = new FST.Arc[1+(cacheCeiling-0x3040)];
FST.Arc<Long> firstArc = new FST.Arc<>();
fst.getFirstArc(firstArc);
FST.Arc<Long> arc = new FST.Arc<>();
final FST.BytesReader fstReader = fst.getBytesReader();
// TODO: jump to 3040, readNextRealArc to ceiling? (just be careful we don't add bugs)
for (int i = 0; i < rootCache.length; i++) {
if (fst.findTargetArc(0x3040 + i, firstArc, arc, fstReader) != null) {
rootCache[i] = new FST.Arc<Long>().copyFrom(arc);
}
}
return rootCache;
}
示例11: findTargetArc
import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
public FST.Arc<Long> findTargetArc(int ch, FST.Arc<Long> follow, FST.Arc<Long> arc, boolean useCache, FST.BytesReader fstReader) throws IOException {
if (useCache && ch >= 0x3040 && ch <= cacheCeiling) {
assert ch != FST.END_LABEL;
final Arc<Long> result = rootCache[ch - 0x3040];
if (result == null) {
return null;
} else {
arc.copyFrom(result);
return arc;
}
} else {
return fst.findTargetArc(ch, follow, arc, fstReader);
}
}
示例12: lookup
import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
/**
* Lookup words in text
* @param chars text
* @param off offset into text
* @param len length of text
* @return array of {wordId, position, length}
*/
public int[][] lookup(char[] chars, int off, int len) throws IOException {
// TODO: can we avoid this treemap/toIndexArray?
TreeMap<Integer, int[]> result = new TreeMap<>(); // index, [length, length...]
boolean found = false; // true if we found any results
final FST.BytesReader fstReader = fst.getBytesReader();
FST.Arc<Long> arc = new FST.Arc<>();
int end = off + len;
for (int startOffset = off; startOffset < end; startOffset++) {
arc = fst.getFirstArc(arc);
int output = 0;
int remaining = end - startOffset;
for (int i = 0; i < remaining; i++) {
int ch = chars[startOffset+i];
if (fst.findTargetArc(ch, arc, arc, i == 0, fstReader) == null) {
break; // continue to next position
}
output += arc.output.intValue();
if (arc.isFinal()) {
final int finalOutput = output + arc.nextFinalOutput.intValue();
result.put(startOffset-off, segmentations[finalOutput]);
found = true;
}
}
}
return found ? toIndexArray(result) : EMPTY_RESULT;
}
示例13: getExactMatchStartingFromRootArc
import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
/**
* Returns the first exact match by traversing root arcs, starting from the
* arc <code>rootArcIndex</code>.
*
* @param rootArcIndex
* The first root arc index in {@link #rootArcs} to consider when
* matching.
*
* @param utf8
* The sequence of utf8 bytes to follow.
*
* @return Returns the bucket number of the match or <code>-1</code> if no
* match was found.
*/
private int getExactMatchStartingFromRootArc(
int rootArcIndex, BytesRef utf8) {
// Get the UTF-8 bytes representation of the input key.
try {
final FST.Arc<Object> scratch = new FST.Arc<Object>();
FST.BytesReader fstReader = automaton.getBytesReader();
for (; rootArcIndex < rootArcs.length; rootArcIndex++) {
final FST.Arc<Object> rootArc = rootArcs[rootArcIndex];
final FST.Arc<Object> arc = scratch.copyFrom(rootArc);
// Descend into the automaton using the key as prefix.
if (descendWithPrefix(arc, utf8)) {
automaton.readFirstTargetArc(arc, arc, fstReader);
if (arc.label == FST.END_LABEL) {
// Normalize prefix-encoded weight.
return rootArc.label;
}
}
}
} catch (IOException e) {
// Should never happen, but anyway.
throw new RuntimeException(e);
}
// No match.
return -1;
}
示例14: cacheRootArcs
import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
@SuppressWarnings({"rawtypes","unchecked"})
private FST.Arc<Long>[] cacheRootArcs() throws IOException {
FST.Arc<Long> rootCache[] = new FST.Arc[1+(cacheCeiling-0x3040)];
FST.Arc<Long> firstArc = new FST.Arc<Long>();
fst.getFirstArc(firstArc);
FST.Arc<Long> arc = new FST.Arc<Long>();
final FST.BytesReader fstReader = fst.getBytesReader();
// TODO: jump to 3040, readNextRealArc to ceiling? (just be careful we don't add bugs)
for (int i = 0; i < rootCache.length; i++) {
if (fst.findTargetArc(0x3040 + i, firstArc, arc, fstReader) != null) {
rootCache[i] = new FST.Arc<Long>().copyFrom(arc);
}
}
return rootCache;
}
示例15: lookup
import org.apache.lucene.util.fst.FST; //导入方法依赖的package包/类
/**
* Lookup words in text
* @param chars text
* @param off offset into text
* @param len length of text
* @return array of {wordId, position, length}
*/
public int[][] lookup(char[] chars, int off, int len) throws IOException {
// TODO: can we avoid this treemap/toIndexArray?
TreeMap<Integer, int[]> result = new TreeMap<Integer, int[]>(); // index, [length, length...]
boolean found = false; // true if we found any results
final FST.BytesReader fstReader = fst.getBytesReader();
FST.Arc<Long> arc = new FST.Arc<Long>();
int end = off + len;
for (int startOffset = off; startOffset < end; startOffset++) {
arc = fst.getFirstArc(arc);
int output = 0;
int remaining = end - startOffset;
for (int i = 0; i < remaining; i++) {
int ch = chars[startOffset+i];
if (fst.findTargetArc(ch, arc, arc, i == 0, fstReader) == null) {
break; // continue to next position
}
output += arc.output.intValue();
if (arc.isFinal()) {
final int finalOutput = output + arc.nextFinalOutput.intValue();
result.put(startOffset-off, segmentations[finalOutput]);
found = true;
}
}
}
return found ? toIndexArray(result) : EMPTY_RESULT;
}