当前位置: 首页>>代码示例>>Java>>正文


Java FST类代码示例

本文整理汇总了Java中org.apache.lucene.util.fst.FST的典型用法代码示例。如果您正苦于以下问题:Java FST类的具体用法?Java FST怎么用?Java FST使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


FST类属于org.apache.lucene.util.fst包,在下文中一共展示了FST类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getFullPrefixPaths

import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
@Override
protected List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> getFullPrefixPaths(
    List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> prefixPaths, Automaton lookupAutomaton,
    FST<PairOutputs.Pair<Long,BytesRef>> fst)
        throws IOException {

    // TODO: right now there's no penalty for fuzzy/edits,
    // ie a completion whose prefix matched exactly what the
    // user typed gets no boost over completions that
    // required an edit, which get no boost over completions
    // requiring two edits.  I suspect a multiplicative
    // factor is appropriate (eg, say a fuzzy match must be at
    // least 2X better weight than the non-fuzzy match to
    // "compete") ... in which case I think the wFST needs
    // to be log weights or something ...

    Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton));
/*
  Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
  w.write(levA.toDot());
  w.close();
  System.out.println("Wrote LevA to out.dot");
*/
    return FSTUtil.intersectPrefixPaths(levA, fst);
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:26,代码来源:XFuzzySuggester.java

示例2: NormalizeCharMap

import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
private NormalizeCharMap(FST<CharsRef> map) {
  this.map = map;
  if (map != null) {
    try {
      // Pre-cache root arcs:
      final FST.Arc<CharsRef> scratchArc = new FST.Arc<>();
      final FST.BytesReader fstReader = map.getBytesReader();
      map.getFirstArc(scratchArc);
      if (FST.targetHasArcs(scratchArc)) {
        map.readFirstRealTargetArc(scratchArc.target, scratchArc, fstReader);
        while(true) {
          assert scratchArc.label != FST.END_LABEL;
          cachedRootArcs.put(Character.valueOf((char) scratchArc.label), new FST.Arc<CharsRef>().copyFrom(scratchArc));
          if (scratchArc.isLast()) {
            break;
          }
          map.readNextRealArc(scratchArc, fstReader);
        }
      }
      //System.out.println("cached " + cachedRootArcs.size() + " root arcs");
    } catch (IOException ioe) {
      // Bogus FST IOExceptions!!  (will never happen)
      throw new RuntimeException(ioe);
    }
  }
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:27,代码来源:NormalizeCharMap.java

示例3: build

import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
/** Builds the NormalizeCharMap; call this once you
 *  are done calling {@link #add}. */
public NormalizeCharMap build() {

  final FST<CharsRef> map;
  try {
    final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
    final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
    final IntsRefBuilder scratch = new IntsRefBuilder();
    for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
      builder.add(Util.toUTF16(ent.getKey(), scratch),
                  new CharsRef(ent.getValue()));
    }
    map = builder.finish();
    pendingPairs.clear();
  } catch (IOException ioe) {
    // Bogus FST IOExceptions!!  (will never happen)
    throw new RuntimeException(ioe);
  }

  return new NormalizeCharMap(map);
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:23,代码来源:NormalizeCharMap.java

示例4: Stemmer

import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
/**
 * Constructs a new Stemmer which will use the provided Dictionary to create its stems.
 *
 * @param dictionary Dictionary that will be used to create the stems
 */
public Stemmer(Dictionary dictionary) {
  this.dictionary = dictionary;
  this.affixReader = new ByteArrayDataInput(dictionary.affixData);
  for (int level = 0; level < 3; level++) {
    if (dictionary.prefixes != null) {
      prefixArcs[level] = new FST.Arc<>();
      prefixReaders[level] = dictionary.prefixes.getBytesReader();
    }
    if (dictionary.suffixes != null) {
      suffixArcs[level] = new FST.Arc<>();
      suffixReaders[level] = dictionary.suffixes.getBytesReader();
    }
  }
  formStep = dictionary.hasStemExceptions ? 2 : 1;
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:21,代码来源:Stemmer.java

示例5: parseConversions

import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException {
  Map<String,String> mappings = new TreeMap<>();
  
  for (int i = 0; i < num; i++) {
    String line = reader.readLine();
    String parts[] = line.split("\\s+");
    if (parts.length != 3) {
      throw new ParseException("invalid syntax: " + line, reader.getLineNumber());
    }
    if (mappings.put(parts[1], parts[2]) != null) {
      throw new IllegalStateException("duplicate mapping specified for: " + parts[1]);
    }
  }
  
  Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
  Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
  IntsRefBuilder scratchInts = new IntsRefBuilder();
  for (Map.Entry<String,String> entry : mappings.entrySet()) {
    Util.toUTF16(entry.getKey(), scratchInts);
    builder.add(scratchInts.get(), new CharsRef(entry.getValue()));
  }
  
  return builder.finish();
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:25,代码来源:Dictionary.java

示例6: build

import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
/**
 * Returns an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
 * @return an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
 * @throws IOException if an {@link IOException} occurs;
 */
public StemmerOverrideMap build() throws IOException {
  ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
  org.apache.lucene.util.fst.Builder<BytesRef> builder = new org.apache.lucene.util.fst.Builder<>(
      FST.INPUT_TYPE.BYTE4, outputs);
  final int[] sort = hash.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
  IntsRefBuilder intsSpare = new IntsRefBuilder();
  final int size = hash.size();
  BytesRef spare = new BytesRef();
  for (int i = 0; i < size; i++) {
    int id = sort[i];
    BytesRef bytesRef = hash.get(id, spare);
    intsSpare.copyUTF8Bytes(bytesRef);
    builder.add(intsSpare.get(), new BytesRef(outputValues.get(id)));
  }
  return new StemmerOverrideMap(builder.finish(), ignoreCase);
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:22,代码来源:StemmerOverrideFilter.java

示例7: getFullPrefixPaths

import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
@Override
protected List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> prefixPaths,
                                                                                 Automaton lookupAutomaton,
                                                                                 FST<PairOutputs.Pair<Long,BytesRef>> fst)
        throws IOException {

    // TODO: right now there's no penalty for fuzzy/edits,
    // ie a completion whose prefix matched exactly what the
    // user typed gets no boost over completions that
    // required an edit, which get no boost over completions
    // requiring two edits.  I suspect a multiplicative
    // factor is appropriate (eg, say a fuzzy match must be at
    // least 2X better weight than the non-fuzzy match to
    // "compete") ... in which case I think the wFST needs
    // to be log weights or something ...

    Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton));
/*
  Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
  w.write(levA.toDot());
  w.close();
  System.out.println("Wrote LevA to out.dot");
*/
    return FSTUtil.intersectPrefixPaths(levA, fst);
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:26,代码来源:XFuzzySuggester.java

示例8: update

import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
/**
 * 增加update逻辑,此方法中所有赋值的属性皆为final改造,注意只能在此方法中使用,否则可能导致bug
 *
 * @param synonymMap
 */
@Override
public void update(SynonymMap synonymMap) {
    this.synonyms = synonymMap;
    this.fst = synonyms.fst;
    if(this.fst == null) {
        throw new IllegalArgumentException("fst must be non-null");
    } else {
        this.fstReader = this.fst.getBytesReader();
        this.rollBufferSize = 1 + synonyms.maxHorizontalContext;
        this.futureInputs = new DynamicSynonymFilter.PendingInput[this.rollBufferSize];
        this.futureOutputs = new DynamicSynonymFilter.PendingOutputs[this.rollBufferSize];

        for(int pos = 0; pos < this.rollBufferSize; ++pos) {
            this.futureInputs[pos] = new DynamicSynonymFilter.PendingInput();
            this.futureOutputs[pos] = new DynamicSynonymFilter.PendingOutputs();
        }

        this.scratchArc = new FST.Arc();
    }
}
 
开发者ID:hailin0,项目名称:elasticsearch-analysis-dynamic-synonym,代码行数:26,代码来源:DynamicSynonymFilter.java

示例9: loadMetaData

import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
/** Lazily accumulate meta data, when we got a accepted term */
void loadMetaData() throws IOException {
  FST.Arc<FSTTermOutputs.TermData> last, next;
  last = stack[metaUpto].fstArc;
  while (metaUpto != level) {
    metaUpto++;
    next = stack[metaUpto].fstArc;
    next.output = fstOutputs.add(next.output, last.output);
    last = next;
  }
  if (last.isFinal()) {
    meta = fstOutputs.add(last.output, last.nextFinalOutput);
  } else {
    meta = last.output;
  }
  state.docFreq = meta.docFreq;
  state.totalTermFreq = meta.totalTermFreq;
}
 
开发者ID:europeana,项目名称:search,代码行数:19,代码来源:FSTTermsReader.java

示例10: walk

import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
static<T> void walk(FST<T> fst) throws IOException {
  final ArrayList<FST.Arc<T>> queue = new ArrayList<>();
  final BitSet seen = new BitSet();
  final FST.BytesReader reader = fst.getBytesReader();
  final FST.Arc<T> startArc = fst.getFirstArc(new FST.Arc<T>());
  queue.add(startArc);
  while (!queue.isEmpty()) {
    final FST.Arc<T> arc = queue.remove(0);
    final long node = arc.target;
    //System.out.println(arc);
    if (FST.targetHasArcs(arc) && !seen.get((int) node)) {
      seen.set((int) node);
      fst.readFirstRealTargetArc(node, arc, reader);
      while (true) {
        queue.add(new FST.Arc<T>().copyFrom(arc));
        if (arc.isLast()) {
          break;
        } else {
          fst.readNextRealArc(arc, reader);
        }
      }
    }
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:25,代码来源:FSTTermsReader.java

示例11: writeFST

import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
private void writeFST(FieldInfo field, Iterable<BytesRef> values) throws IOException {
  meta.writeVInt(field.number);
  meta.writeByte(FST);
  meta.writeLong(data.getFilePointer());
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
  Builder<Long> builder = new Builder<>(INPUT_TYPE.BYTE1, outputs);
  IntsRefBuilder scratch = new IntsRefBuilder();
  long ord = 0;
  for (BytesRef v : values) {
    builder.add(Util.toIntsRef(v, scratch), ord);
    ord++;
  }
  FST<Long> fst = builder.finish();
  if (fst != null) {
    fst.save(data);
  }
  meta.writeVLong(ord);
}
 
开发者ID:europeana,项目名称:search,代码行数:19,代码来源:MemoryDocValuesConsumer.java

示例12: pushFrame

import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
OrdsSegmentTermsEnumFrame pushFrame(FST.Arc<Output> arc, Output frameData, int length) throws IOException {
  scratchReader.reset(frameData.bytes.bytes, frameData.bytes.offset, frameData.bytes.length);
  final long code = scratchReader.readVLong();
  final long fpSeek = code >>> OrdsBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS;
  // System.out.println("    fpSeek=" + fpSeek);
  final OrdsSegmentTermsEnumFrame f = getFrame(1+currentFrame.ord);
  f.hasTerms = (code & OrdsBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS) != 0;
  f.hasTermsOrig = f.hasTerms;
  f.isFloor = (code & OrdsBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR) != 0;

  // Must setFloorData before pushFrame in case pushFrame tries to rewind:
  if (f.isFloor) {
    f.termOrdOrig = frameData.startOrd;
    f.setFloorData(scratchReader, frameData.bytes);
  }

  pushFrame(arc, fpSeek, length, frameData.startOrd);

  return f;
}
 
开发者ID:europeana,项目名称:search,代码行数:21,代码来源:OrdsSegmentTermsEnum.java

示例13: getFullPrefixPaths

import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
@Override
protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
                                                                     Automaton lookupAutomaton,
                                                                     FST<Pair<Long,BytesRef>> fst)
  throws IOException {

  // TODO: right now there's no penalty for fuzzy/edits,
  // ie a completion whose prefix matched exactly what the
  // user typed gets no boost over completions that
  // required an edit, which get no boost over completions
  // requiring two edits.  I suspect a multiplicative
  // factor is appropriate (eg, say a fuzzy match must be at
  // least 2X better weight than the non-fuzzy match to
  // "compete") ... in which case I think the wFST needs
  // to be log weights or something ...

  Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton));
  /*
    Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), StandardCharsets.UTF_8);
    w.write(levA.toDot());
    w.close();
    System.out.println("Wrote LevA to out.dot");
  */
  return FSTUtil.intersectPrefixPaths(levA, fst);
}
 
开发者ID:europeana,项目名称:search,代码行数:26,代码来源:FuzzySuggester.java

示例14: load

import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
@Override
public boolean load(DataInput input) throws IOException {
  CodecUtil.checkHeader(input, CODEC_NAME, VERSION_START, VERSION_START);
  count = input.readVLong();
  byte separatorOrig = input.readByte();
  if (separatorOrig != separator) {
    throw new IllegalStateException("separator=" + separator + " is incorrect: original model was built with separator=" + separatorOrig);
  }
  int gramsOrig = input.readVInt();
  if (gramsOrig != grams) {
    throw new IllegalStateException("grams=" + grams + " is incorrect: original model was built with grams=" + gramsOrig);
  }
  totTokens = input.readVLong();

  fst = new FST<>(input, PositiveIntOutputs.getSingleton());

  return true;
}
 
开发者ID:europeana,项目名称:search,代码行数:19,代码来源:FreeTextSuggester.java

示例15: lookupPrefix

import org.apache.lucene.util.fst.FST; //导入依赖的package包/类
private Long lookupPrefix(FST<Long> fst, FST.BytesReader bytesReader,
                          BytesRef scratch, Arc<Long> arc) throws /*Bogus*/IOException {

  Long output = fst.outputs.getNoOutput();
  
  fst.getFirstArc(arc);
  
  byte[] bytes = scratch.bytes;
  int pos = scratch.offset;
  int end = pos + scratch.length;
  while (pos < end) {
    if (fst.findTargetArc(bytes[pos++] & 0xff, arc, arc, bytesReader) == null) {
      return null;
    } else {
      output = fst.outputs.add(output, arc.output);
    }
  }
  
  return output;
}
 
开发者ID:europeana,项目名称:search,代码行数:21,代码来源:FreeTextSuggester.java


注:本文中的org.apache.lucene.util.fst.FST类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。