当前位置: 首页>>代码示例>>Java>>正文


Java Util类代码示例

本文整理汇总了Java中org.apache.lucene.util.fst.Util的典型用法代码示例。如果您正苦于以下问题:Java Util类的具体用法?Java Util怎么用?Java Util使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


Util类属于org.apache.lucene.util.fst包,在下文中一共展示了Util类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: finishTerm

import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
public void finishTerm(long defaultWeight) throws IOException {
    ArrayUtil.timSort(surfaceFormsAndPayload, 0, count);
    int deduplicator = 0;
    analyzed.append((byte) 0);
    analyzed.setLength(analyzed.length() + 1);
    analyzed.grow(analyzed.length());
    for (int i = 0; i < count; i++) {
        analyzed.setByteAt(analyzed.length() - 1, (byte) deduplicator++);
        Util.toIntsRef(analyzed.get(), scratchInts);
        SurfaceFormAndPayload candiate = surfaceFormsAndPayload[i];
        long cost = candiate.weight == -1 ? encodeWeight(Math.min(Integer.MAX_VALUE, defaultWeight)) : candiate.weight;
        builder.add(scratchInts.get(), outputs.newPair(cost, candiate.payload));
    }
    seenSurfaceForms.clear();
    count = 0;
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:17,代码来源:XAnalyzingSuggester.java

示例2: build

import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
/** Builds the NormalizeCharMap; call this once you
 *  are done calling {@link #add}. */
public NormalizeCharMap build() {

  final FST<CharsRef> map;
  try {
    final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
    final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
    final IntsRefBuilder scratch = new IntsRefBuilder();
    for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
      builder.add(Util.toUTF16(ent.getKey(), scratch),
                  new CharsRef(ent.getValue()));
    }
    map = builder.finish();
    pendingPairs.clear();
  } catch (IOException ioe) {
    // Bogus FST IOExceptions!!  (will never happen)
    throw new RuntimeException(ioe);
  }

  return new NormalizeCharMap(map);
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:23,代码来源:NormalizeCharMap.java

示例3: parseConversions

import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException {
  Map<String,String> mappings = new TreeMap<>();
  
  for (int i = 0; i < num; i++) {
    String line = reader.readLine();
    String parts[] = line.split("\\s+");
    if (parts.length != 3) {
      throw new ParseException("invalid syntax: " + line, reader.getLineNumber());
    }
    if (mappings.put(parts[1], parts[2]) != null) {
      throw new IllegalStateException("duplicate mapping specified for: " + parts[1]);
    }
  }
  
  Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
  Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
  IntsRefBuilder scratchInts = new IntsRefBuilder();
  for (Map.Entry<String,String> entry : mappings.entrySet()) {
    Util.toUTF16(entry.getKey(), scratchInts);
    builder.add(scratchInts.get(), new CharsRef(entry.getValue()));
  }
  
  return builder.finish();
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:25,代码来源:Dictionary.java

示例4: finishTerm

import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
  // write term meta data into fst
  final BlockTermState state = postingsWriter.newTermState();
  final FSTTermOutputs.TermData meta = new FSTTermOutputs.TermData();
  meta.longs = new long[longsSize];
  meta.bytes = null;
  meta.docFreq = state.docFreq = stats.docFreq;
  meta.totalTermFreq = state.totalTermFreq = stats.totalTermFreq;
  postingsWriter.finishTerm(state);
  postingsWriter.encodeTerm(meta.longs, metaWriter, fieldInfo, state, true);
  final int bytesSize = (int)metaWriter.getFilePointer();
  if (bytesSize > 0) {
    meta.bytes = new byte[bytesSize];
    metaWriter.writeTo(meta.bytes, 0);
    metaWriter.reset();
  }
  builder.add(Util.toIntsRef(text, scratchTerm), meta);
  numTerms++;
}
 
开发者ID:europeana,项目名称:search,代码行数:21,代码来源:FSTTermsWriter.java

示例5: writeFST

import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
private void writeFST(FieldInfo field, Iterable<BytesRef> values) throws IOException {
  meta.writeVInt(field.number);
  meta.writeByte(FST);
  meta.writeLong(data.getFilePointer());
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
  Builder<Long> builder = new Builder<>(INPUT_TYPE.BYTE1, outputs);
  IntsRefBuilder scratch = new IntsRefBuilder();
  long ord = 0;
  for (BytesRef v : values) {
    builder.add(Util.toIntsRef(v, scratch), ord);
    ord++;
  }
  FST<Long> fst = builder.finish();
  if (fst != null) {
    fst.save(data);
  }
  meta.writeVLong(ord);
}
 
开发者ID:europeana,项目名称:search,代码行数:19,代码来源:MemoryDocValuesConsumer.java

示例6: build

import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
@Override
public void build(TermFreqIterator iterator) throws IOException {
  BytesRef scratch = new BytesRef();
  TermFreqIterator iter = new WFSTTermFreqIteratorWrapper(iterator);
  IntsRef scratchInts = new IntsRef();
  BytesRef previous = null;
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
  Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
  while ((scratch = iter.next()) != null) {
    long cost = iter.weight();
    
    if (previous == null) {
      previous = new BytesRef();
    } else if (scratch.equals(previous)) {
      continue; // for duplicate suggestions, the best weight is actually
                // added
    }
    Util.toIntsRef(scratch, scratchInts);
    builder.add(scratchInts, cost);
    previous.copyBytes(scratch);
  }
  fst = builder.finish();
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:24,代码来源:WFSTCompletionLookup.java

示例7: build

import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
/** Builds the NormalizeCharMap; call this once you
 *  are done calling {@link #add}. */
public NormalizeCharMap build() {

  final FST<CharsRef> map;
  try {
    final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
    final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<CharsRef>(FST.INPUT_TYPE.BYTE2, outputs);
    final IntsRef scratch = new IntsRef();
    for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
      builder.add(Util.toUTF16(ent.getKey(), scratch),
                  new CharsRef(ent.getValue()));
    }
    map = builder.finish();
    pendingPairs.clear();
  } catch (IOException ioe) {
    // Bogus FST IOExceptions!!  (will never happen)
    throw new RuntimeException(ioe);
  }

  return new NormalizeCharMap(map);
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:23,代码来源:NormalizeCharMap.java

示例8: writeFST

import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
private void writeFST(FieldInfo field, Iterable<BytesRef> values) throws IOException {
  meta.writeVInt(field.number);
  meta.writeByte(FST);
  meta.writeLong(data.getFilePointer());
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
  Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1, outputs);
  IntsRef scratch = new IntsRef();
  long ord = 0;
  for (BytesRef v : values) {
    builder.add(Util.toIntsRef(v, scratch), ord);
    ord++;
  }
  FST<Long> fst = builder.finish();
  if (fst != null) {
    fst.save(data);
  }
  meta.writeVLong(ord);
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:19,代码来源:Lucene42DocValuesConsumer.java

示例9: writeFST

import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
private void writeFST(FieldInfo field, Iterable<BytesRef> values) throws IOException {
  meta.writeVInt(field.number);
  meta.writeByte(FST);
  meta.writeLong(data.getFilePointer());
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
  Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1, outputs);
  IntsRef scratch = new IntsRef();
  long ord = 0;
  for (BytesRef v : values) {
    builder.add(Util.toIntsRef(v, scratch), ord);
    ord++;
  }
  FST<Long> fst = builder.finish();
  if (fst != null) {
    fst.save(data);
  }
  meta.writeVLong(ord);
}
 
开发者ID:yintaoxue,项目名称:read-open-source-code,代码行数:19,代码来源:MemoryDocValuesConsumer.java

示例10: build

import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
@Override
public void build(InputIterator iterator) throws IOException {
  if (iterator.hasPayloads()) {
    throw new IllegalArgumentException("this suggester doesn't support payloads");
  }
  BytesRef scratch = new BytesRef();
  InputIterator iter = new WFSTInputIterator(iterator);
  IntsRef scratchInts = new IntsRef();
  BytesRef previous = null;
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
  Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
  while ((scratch = iter.next()) != null) {
    long cost = iter.weight();
    
    if (previous == null) {
      previous = new BytesRef();
    } else if (scratch.equals(previous)) {
      continue; // for duplicate suggestions, the best weight is actually
                // added
    }
    Util.toIntsRef(scratch, scratchInts);
    builder.add(scratchInts, cost);
    previous.copyBytes(scratch);
  }
  fst = builder.finish();
}
 
开发者ID:jimaguere,项目名称:Maskana-Gestor-de-Conocimiento,代码行数:27,代码来源:WFSTCompletionLookup.java

示例11: append

import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
private void append(Builder<BytesRef> builder, FST<BytesRef> subIndex, IntsRefBuilder scratchIntsRef) throws IOException {
  final BytesRefFSTEnum<BytesRef> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
  BytesRefFSTEnum.InputOutput<BytesRef> indexEnt;
  while((indexEnt = subIndexEnum.next()) != null) {
    //if (DEBUG) {
    //  System.out.println("      add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
    //}
    builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output);
  }
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:11,代码来源:BlockTreeTermsWriter.java

示例12: seekExact

import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
@Override
public void seekExact(long ord) throws IOException {
  // TODO: would be better to make this simpler and faster.
  // but we dont want to introduce a bug that corrupts our enum state!
  bytesReader.setPosition(0);
  fst.getFirstArc(firstArc);
  IntsRef output = Util.getByOutput(fst, ord, bytesReader, firstArc, scratchArc, scratchInts);
  BytesRefBuilder scratchBytes = new BytesRefBuilder();
  scratchBytes.clear();
  Util.toBytesRef(output, scratchBytes);
  // TODO: we could do this lazily, better to try to push into FSTEnum though?
  in.seekExact(scratchBytes.get());
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:14,代码来源:Lucene42DocValuesProducer.java

示例13: affixFST

import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
private FST<IntsRef> affixFST(TreeMap<String,List<Integer>> affixes) throws IOException {
  IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton();
  Builder<IntsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE4, outputs);
  IntsRefBuilder scratch = new IntsRefBuilder();
  for (Map.Entry<String,List<Integer>> entry : affixes.entrySet()) {
    Util.toUTF32(entry.getKey(), scratch);
    List<Integer> entries = entry.getValue();
    IntsRef output = new IntsRef(entries.size());
    for (Integer c : entries) {
      output.ints[output.length++] = c;
    }
    builder.add(scratch.get(), output);
  }
  return builder.finish();
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:16,代码来源:Dictionary.java

示例14: incrementToken

import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
@Override
public boolean incrementToken() throws IOException {
    clearAttributes();
    if (finiteStrings == null) {
        Set<IntsRef> strings = toFiniteStrings.toFiniteStrings(input);

        if (strings.size() > MAX_PATHS) {
            throw new IllegalArgumentException("TokenStream expanded to " + strings.size() + " finite strings. Only <= " + MAX_PATHS
                    + " finite strings are supported");
        }
        posInc = strings.size();
        finiteStrings = strings.iterator();
    }
    if (finiteStrings.hasNext()) {
        posAttr.setPositionIncrement(posInc);
        /*
         * this posInc encodes the number of paths that this surface form
         * produced. Multi Fields have the same surface form and therefore sum up
         */
        posInc = 0;
        Util.toBytesRef(finiteStrings.next(), bytesAtt.builder()); // now we have UTF-8
        if (charTermAttribute != null) {
            charTermAttribute.setLength(0);
            charTermAttribute.append(bytesAtt.toUTF16());
        }
        if (payload != null) {
            payloadAttr.setPayload(this.payload);
        }
        return true;
    }

    return false;
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:34,代码来源:CompletionTokenStream.java

示例15: seekExact

import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
@Override
public void seekExact(long ord) throws IOException {
  // TODO: would be better to make this simpler and faster.
  // but we dont want to introduce a bug that corrupts our enum state!
  bytesReader.setPosition(0);
  fst.getFirstArc(firstArc);
  IntsRef output = Util.getByOutput(fst, ord, bytesReader, firstArc, scratchArc, scratchInts);
  // TODO: we could do this lazily, better to try to push into FSTEnum though?
  in.seekExact(Util.toBytesRef(output, new BytesRefBuilder()));
}
 
开发者ID:europeana,项目名称:search,代码行数:11,代码来源:MemoryDocValuesProducer.java


注:本文中的org.apache.lucene.util.fst.Util类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。