本文整理汇总了Java中org.apache.lucene.util.fst.FST类的典型用法代码示例。如果您正苦于以下问题:Java FST类的具体用法?Java FST怎么用?Java FST使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
FST类属于org.apache.lucene.util.fst包,在下文中一共展示了FST类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getFullPrefixPaths
import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
@Override
protected List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> getFullPrefixPaths(
List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> prefixPaths, Automaton lookupAutomaton,
FST<PairOutputs.Pair<Long,BytesRef>> fst)
throws IOException {
// TODO: right now there's no penalty for fuzzy/edits,
// ie a completion whose prefix matched exactly what the
// user typed gets no boost over completions that
// required an edit, which get no boost over completions
// requiring two edits. I suspect a multiplicative
// factor is appropriate (eg, say a fuzzy match must be at
// least 2X better weight than the non-fuzzy match to
// "compete") ... in which case I think the wFST needs
// to be log weights or something ...
Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton));
/*
Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
w.write(levA.toDot());
w.close();
System.out.println("Wrote LevA to out.dot");
*/
return FSTUtil.intersectPrefixPaths(levA, fst);
}
示例2: NormalizeCharMap
import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
private NormalizeCharMap(FST<CharsRef> map) {
this.map = map;
if (map != null) {
try {
// Pre-cache root arcs:
final FST.Arc<CharsRef> scratchArc = new FST.Arc<>();
final FST.BytesReader fstReader = map.getBytesReader();
map.getFirstArc(scratchArc);
if (FST.targetHasArcs(scratchArc)) {
map.readFirstRealTargetArc(scratchArc.target, scratchArc, fstReader);
while(true) {
assert scratchArc.label != FST.END_LABEL;
cachedRootArcs.put(Character.valueOf((char) scratchArc.label), new FST.Arc<CharsRef>().copyFrom(scratchArc));
if (scratchArc.isLast()) {
break;
}
map.readNextRealArc(scratchArc, fstReader);
}
}
//System.out.println("cached " + cachedRootArcs.size() + " root arcs");
} catch (IOException ioe) {
// Bogus FST IOExceptions!! (will never happen)
throw new RuntimeException(ioe);
}
}
}
示例3: build
import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
/** Builds the NormalizeCharMap; call this once you
* are done calling {@link #add}. */
public NormalizeCharMap build() {
final FST<CharsRef> map;
try {
final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
final IntsRefBuilder scratch = new IntsRefBuilder();
for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
builder.add(Util.toUTF16(ent.getKey(), scratch),
new CharsRef(ent.getValue()));
}
map = builder.finish();
pendingPairs.clear();
} catch (IOException ioe) {
// Bogus FST IOExceptions!! (will never happen)
throw new RuntimeException(ioe);
}
return new NormalizeCharMap(map);
}
示例4: Stemmer
import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
/**
* Constructs a new Stemmer which will use the provided Dictionary to create its stems.
*
* @param dictionary Dictionary that will be used to create the stems
*/
public Stemmer(Dictionary dictionary) {
this.dictionary = dictionary;
this.affixReader = new ByteArrayDataInput(dictionary.affixData);
for (int level = 0; level < 3; level++) {
if (dictionary.prefixes != null) {
prefixArcs[level] = new FST.Arc<>();
prefixReaders[level] = dictionary.prefixes.getBytesReader();
}
if (dictionary.suffixes != null) {
suffixArcs[level] = new FST.Arc<>();
suffixReaders[level] = dictionary.suffixes.getBytesReader();
}
}
formStep = dictionary.hasStemExceptions ? 2 : 1;
}
示例5: parseConversions
import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException {
Map<String,String> mappings = new TreeMap<>();
for (int i = 0; i < num; i++) {
String line = reader.readLine();
String parts[] = line.split("\\s+");
if (parts.length != 3) {
throw new ParseException("invalid syntax: " + line, reader.getLineNumber());
}
if (mappings.put(parts[1], parts[2]) != null) {
throw new IllegalStateException("duplicate mapping specified for: " + parts[1]);
}
}
Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
IntsRefBuilder scratchInts = new IntsRefBuilder();
for (Map.Entry<String,String> entry : mappings.entrySet()) {
Util.toUTF16(entry.getKey(), scratchInts);
builder.add(scratchInts.get(), new CharsRef(entry.getValue()));
}
return builder.finish();
}
示例6: build
import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
/**
* Returns an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
* @return an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
* @throws IOException if an {@link IOException} occurs;
*/
public StemmerOverrideMap build() throws IOException {
ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
org.apache.lucene.util.fst.Builder<BytesRef> builder = new org.apache.lucene.util.fst.Builder<>(
FST.INPUT_TYPE.BYTE4, outputs);
final int[] sort = hash.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
IntsRefBuilder intsSpare = new IntsRefBuilder();
final int size = hash.size();
BytesRef spare = new BytesRef();
for (int i = 0; i < size; i++) {
int id = sort[i];
BytesRef bytesRef = hash.get(id, spare);
intsSpare.copyUTF8Bytes(bytesRef);
builder.add(intsSpare.get(), new BytesRef(outputValues.get(id)));
}
return new StemmerOverrideMap(builder.finish(), ignoreCase);
}
示例7: getFullPrefixPaths
import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
@Override
protected List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> prefixPaths,
Automaton lookupAutomaton,
FST<PairOutputs.Pair<Long,BytesRef>> fst)
throws IOException {
// TODO: right now there's no penalty for fuzzy/edits,
// ie a completion whose prefix matched exactly what the
// user typed gets no boost over completions that
// required an edit, which get no boost over completions
// requiring two edits. I suspect a multiplicative
// factor is appropriate (eg, say a fuzzy match must be at
// least 2X better weight than the non-fuzzy match to
// "compete") ... in which case I think the wFST needs
// to be log weights or something ...
Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton));
/*
Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
w.write(levA.toDot());
w.close();
System.out.println("Wrote LevA to out.dot");
*/
return FSTUtil.intersectPrefixPaths(levA, fst);
}
示例8: update
import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
/**
* 增加update逻辑,此方法中所有赋值的属性皆为final改造,注意只能在此方法中使用,否则可能导致bug
*
* @param synonymMap
*/
@Override
public void update(SynonymMap synonymMap) {
this.synonyms = synonymMap;
this.fst = synonyms.fst;
if(this.fst == null) {
throw new IllegalArgumentException("fst must be non-null");
} else {
this.fstReader = this.fst.getBytesReader();
this.rollBufferSize = 1 + synonyms.maxHorizontalContext;
this.futureInputs = new DynamicSynonymFilter.PendingInput[this.rollBufferSize];
this.futureOutputs = new DynamicSynonymFilter.PendingOutputs[this.rollBufferSize];
for(int pos = 0; pos < this.rollBufferSize; ++pos) {
this.futureInputs[pos] = new DynamicSynonymFilter.PendingInput();
this.futureOutputs[pos] = new DynamicSynonymFilter.PendingOutputs();
}
this.scratchArc = new FST.Arc();
}
}
示例9: loadMetaData
import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
/** Lazily accumulate meta data, when we got a accepted term */
void loadMetaData() throws IOException {
FST.Arc<FSTTermOutputs.TermData> last, next;
last = stack[metaUpto].fstArc;
while (metaUpto != level) {
metaUpto++;
next = stack[metaUpto].fstArc;
next.output = fstOutputs.add(next.output, last.output);
last = next;
}
if (last.isFinal()) {
meta = fstOutputs.add(last.output, last.nextFinalOutput);
} else {
meta = last.output;
}
state.docFreq = meta.docFreq;
state.totalTermFreq = meta.totalTermFreq;
}
示例10: walk
import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
static<T> void walk(FST<T> fst) throws IOException {
final ArrayList<FST.Arc<T>> queue = new ArrayList<>();
final BitSet seen = new BitSet();
final FST.BytesReader reader = fst.getBytesReader();
final FST.Arc<T> startArc = fst.getFirstArc(new FST.Arc<T>());
queue.add(startArc);
while (!queue.isEmpty()) {
final FST.Arc<T> arc = queue.remove(0);
final long node = arc.target;
//System.out.println(arc);
if (FST.targetHasArcs(arc) && !seen.get((int) node)) {
seen.set((int) node);
fst.readFirstRealTargetArc(node, arc, reader);
while (true) {
queue.add(new FST.Arc<T>().copyFrom(arc));
if (arc.isLast()) {
break;
} else {
fst.readNextRealArc(arc, reader);
}
}
}
}
}
示例11: writeFST
import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
private void writeFST(FieldInfo field, Iterable<BytesRef> values) throws IOException {
meta.writeVInt(field.number);
meta.writeByte(FST);
meta.writeLong(data.getFilePointer());
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
Builder<Long> builder = new Builder<>(INPUT_TYPE.BYTE1, outputs);
IntsRefBuilder scratch = new IntsRefBuilder();
long ord = 0;
for (BytesRef v : values) {
builder.add(Util.toIntsRef(v, scratch), ord);
ord++;
}
FST<Long> fst = builder.finish();
if (fst != null) {
fst.save(data);
}
meta.writeVLong(ord);
}
示例12: pushFrame
import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
OrdsSegmentTermsEnumFrame pushFrame(FST.Arc<Output> arc, Output frameData, int length) throws IOException {
scratchReader.reset(frameData.bytes.bytes, frameData.bytes.offset, frameData.bytes.length);
final long code = scratchReader.readVLong();
final long fpSeek = code >>> OrdsBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS;
// System.out.println(" fpSeek=" + fpSeek);
final OrdsSegmentTermsEnumFrame f = getFrame(1+currentFrame.ord);
f.hasTerms = (code & OrdsBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS) != 0;
f.hasTermsOrig = f.hasTerms;
f.isFloor = (code & OrdsBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR) != 0;
// Must setFloorData before pushFrame in case pushFrame tries to rewind:
if (f.isFloor) {
f.termOrdOrig = frameData.startOrd;
f.setFloorData(scratchReader, frameData.bytes);
}
pushFrame(arc, fpSeek, length, frameData.startOrd);
return f;
}
示例13: getFullPrefixPaths
import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
@Override
protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
Automaton lookupAutomaton,
FST<Pair<Long,BytesRef>> fst)
throws IOException {
// TODO: right now there's no penalty for fuzzy/edits,
// ie a completion whose prefix matched exactly what the
// user typed gets no boost over completions that
// required an edit, which get no boost over completions
// requiring two edits. I suspect a multiplicative
// factor is appropriate (eg, say a fuzzy match must be at
// least 2X better weight than the non-fuzzy match to
// "compete") ... in which case I think the wFST needs
// to be log weights or something ...
Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton));
/*
Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), StandardCharsets.UTF_8);
w.write(levA.toDot());
w.close();
System.out.println("Wrote LevA to out.dot");
*/
return FSTUtil.intersectPrefixPaths(levA, fst);
}
示例14: load
import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
@Override
public boolean load(DataInput input) throws IOException {
CodecUtil.checkHeader(input, CODEC_NAME, VERSION_START, VERSION_START);
count = input.readVLong();
byte separatorOrig = input.readByte();
if (separatorOrig != separator) {
throw new IllegalStateException("separator=" + separator + " is incorrect: original model was built with separator=" + separatorOrig);
}
int gramsOrig = input.readVInt();
if (gramsOrig != grams) {
throw new IllegalStateException("grams=" + grams + " is incorrect: original model was built with grams=" + gramsOrig);
}
totTokens = input.readVLong();
fst = new FST<>(input, PositiveIntOutputs.getSingleton());
return true;
}
示例15: lookupPrefix
import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
private Long lookupPrefix(FST<Long> fst, FST.BytesReader bytesReader,
BytesRef scratch, Arc<Long> arc) throws /*Bogus*/IOException {
Long output = fst.outputs.getNoOutput();
fst.getFirstArc(arc);
byte[] bytes = scratch.bytes;
int pos = scratch.offset;
int end = pos + scratch.length;
while (pos < end) {
if (fst.findTargetArc(bytes[pos++] & 0xff, arc, arc, bytesReader) == null) {
return null;
} else {
output = fst.outputs.add(output, arc.output);
}
}
return output;
}