当前位置: 首页>>代码示例>>Java>>正文


Java PairOutputs.Pair方法代码示例

本文整理汇总了Java中org.apache.lucene.util.fst.PairOutputs.Pair方法的典型用法代码示例。如果您正苦于以下问题:Java PairOutputs.Pair方法的具体用法?Java PairOutputs.Pair怎么用?Java PairOutputs.Pair使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.util.fst.PairOutputs的用法示例。


在下文中一共展示了PairOutputs.Pair方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getFullPrefixPaths

import org.apache.lucene.util.fst.PairOutputs; //导入方法依赖的package包/类
@Override
protected List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> getFullPrefixPaths(
    List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> prefixPaths, Automaton lookupAutomaton,
    FST<PairOutputs.Pair<Long,BytesRef>> fst)
        throws IOException {

    // TODO: right now there's no penalty for fuzzy/edits,
    // ie a completion whose prefix matched exactly what the
    // user typed gets no boost over completions that
    // required an edit, which get no boost over completions
    // requiring two edits.  I suspect a multiplicative
    // factor is appropriate (eg, say a fuzzy match must be at
    // least 2X better weight than the non-fuzzy match to
    // "compete") ... in which case I think the wFST needs
    // to be log weights or something ...

    Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton));
/*
  Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
  w.write(levA.toDot());
  w.close();
  System.out.println("Wrote LevA to out.dot");
*/
    return FSTUtil.intersectPrefixPaths(levA, fst);
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:26,代码来源:XFuzzySuggester.java

示例2: getFullPrefixPaths

import org.apache.lucene.util.fst.PairOutputs; //导入方法依赖的package包/类
@Override
protected List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> prefixPaths,
                                                                                 Automaton lookupAutomaton,
                                                                                 FST<PairOutputs.Pair<Long,BytesRef>> fst)
        throws IOException {

    // TODO: right now there's no penalty for fuzzy/edits,
    // ie a completion whose prefix matched exactly what the
    // user typed gets no boost over completions that
    // required an edit, which get no boost over completions
    // requiring two edits.  I suspect a multiplicative
    // factor is appropriate (eg, say a fuzzy match must be at
    // least 2X better weight than the non-fuzzy match to
    // "compete") ... in which case I think the wFST needs
    // to be log weights or something ...

    Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton));
/*
  Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
  w.write(levA.toDot());
  w.close();
  System.out.println("Wrote LevA to out.dot");
*/
    return FSTUtil.intersectPrefixPaths(levA, fst);
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:26,代码来源:XFuzzySuggester.java

示例3: seekCeil

import org.apache.lucene.util.fst.PairOutputs; //导入方法依赖的package包/类
@Override
public SeekStatus seekCeil(BytesRef text) throws IOException {

  //System.out.println("seek to text=" + text.utf8ToString());
  final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> result = fstEnum.seekCeil(text);
  if (result == null) {
    //System.out.println("  end");
    return SeekStatus.END;
  } else {
    //System.out.println("  got text=" + term.utf8ToString());
    PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>> pair1 = result.output;
    PairOutputs.Pair<Long,Long> pair2 = pair1.output2;
    docsStart = pair1.output1;
    docFreq = pair2.output1.intValue();
    totalTermFreq = pair2.output2;

    if (result.input.equals(text)) {
      //System.out.println("  match docsStart=" + docsStart);
      return SeekStatus.FOUND;
    } else {
      //System.out.println("  not match docsStart=" + docsStart);
      return SeekStatus.NOT_FOUND;
    }
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:26,代码来源:SimpleTextFieldsReader.java

示例4: seekCeil

import org.apache.lucene.util.fst.PairOutputs; //导入方法依赖的package包/类
@Override
public SeekStatus seekCeil(BytesRef text, boolean useCache /* ignored */) throws IOException {

  //System.out.println("seek to text=" + text.utf8ToString());
  final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> result = fstEnum.seekCeil(text);
  if (result == null) {
    //System.out.println("  end");
    return SeekStatus.END;
  } else {
    //System.out.println("  got text=" + term.utf8ToString());
    PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>> pair1 = result.output;
    PairOutputs.Pair<Long,Long> pair2 = pair1.output2;
    docsStart = pair1.output1;
    docFreq = pair2.output1.intValue();
    totalTermFreq = pair2.output2;

    if (result.input.equals(text)) {
      //System.out.println("  match docsStart=" + docsStart);
      return SeekStatus.FOUND;
    } else {
      //System.out.println("  not match docsStart=" + docsStart);
      return SeekStatus.NOT_FOUND;
    }
  }
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:26,代码来源:SimpleTextFieldsReader.java

示例5: seekExact

import org.apache.lucene.util.fst.PairOutputs; //导入方法依赖的package包/类
@Override
public boolean seekExact(BytesRef text) throws IOException {

  final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> result = fstEnum.seekExact(text);
  if (result != null) {
    PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>> pair1 = result.output;
    PairOutputs.Pair<Long,Long> pair2 = pair1.output2;
    docsStart = pair1.output1;
    docFreq = pair2.output1.intValue();
    totalTermFreq = pair2.output2;
    return true;
  } else {
    return false;
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:16,代码来源:SimpleTextFieldsReader.java

示例6: seekExact

import org.apache.lucene.util.fst.PairOutputs; //导入方法依赖的package包/类
@Override
public boolean seekExact(BytesRef text, boolean useCache /* ignored */) throws IOException {

  final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> result = fstEnum.seekExact(text);
  if (result != null) {
    PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>> pair1 = result.output;
    PairOutputs.Pair<Long,Long> pair2 = pair1.output2;
    docsStart = pair1.output1;
    docFreq = pair2.output1.intValue();
    totalTermFreq = pair2.output2;
    return true;
  } else {
    return false;
  }
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:16,代码来源:SimpleTextFieldsReader.java

示例7: XFuzzySuggester

import org.apache.lucene.util.fst.PairOutputs; //导入方法依赖的package包/类
/**
 * Creates a {@link FuzzySuggester} instance.
 *
 * @param indexAnalyzer Analyzer that will be used for
 *        analyzing suggestions while building the index.
 * @param queryAnalyzer Analyzer that will be used for
 *        analyzing query text during lookup
 * @param options see {@link #EXACT_FIRST}, {@link #PRESERVE_SEP}
 * @param maxSurfaceFormsPerAnalyzedForm Maximum number of
 *        surface forms to keep for a single analyzed form.
 *        When there are too many surface forms we discard the
 *        lowest weighted ones.
 * @param maxGraphExpansions Maximum number of graph paths
 *        to expand from the analyzed form.  Set this to -1 for
 *        no limit.
 * @param maxEdits must be &gt;= 0 and &lt;= {@link org.apache.lucene.util.automaton.LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE} .
 * @param transpositions <code>true</code> if transpositions should be treated as a primitive
 *        edit operation. If this is false, comparisons will implement the classic
 *        Levenshtein algorithm.
 * @param nonFuzzyPrefix length of common (non-fuzzy) prefix (see default {@link #DEFAULT_NON_FUZZY_PREFIX}
 * @param minFuzzyLength minimum length of lookup key before any edits are allowed (see default {@link #DEFAULT_MIN_FUZZY_LENGTH})
 * @param sepLabel separation label
 * @param payloadSep payload separator byte
 * @param endByte end byte marker byte
 */
public XFuzzySuggester(Analyzer indexAnalyzer, Automaton queryPrefix, Analyzer queryAnalyzer,
                       int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
                       int maxEdits, boolean transpositions, int nonFuzzyPrefix, int minFuzzyLength,
                       boolean unicodeAware, FST<PairOutputs.Pair<Long, BytesRef>> fst, boolean hasPayloads,
                       int maxAnalyzedPathsForOneInput, int sepLabel, int payloadSep, int endByte, int holeCharacter) {
    super(indexAnalyzer, queryPrefix, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions,
        true, fst, hasPayloads, maxAnalyzedPathsForOneInput, sepLabel, payloadSep, endByte, holeCharacter);
    if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
        throw new IllegalArgumentException(
            "maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
    }
    if (nonFuzzyPrefix < 0) {
        throw new IllegalArgumentException("nonFuzzyPrefix must not be >= 0 (got " + nonFuzzyPrefix + ")");
    }
    if (minFuzzyLength < 0) {
        throw new IllegalArgumentException("minFuzzyLength must not be >= 0 (got " + minFuzzyLength + ")");
    }

    this.maxEdits = maxEdits;
    this.transpositions = transpositions;
    this.nonFuzzyPrefix = nonFuzzyPrefix;
    this.minFuzzyLength = minFuzzyLength;
    this.unicodeAware = unicodeAware;
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:50,代码来源:XFuzzySuggester.java

示例8: XFuzzySuggester

import org.apache.lucene.util.fst.PairOutputs; //导入方法依赖的package包/类
/**
 * Creates a {@link FuzzySuggester} instance.
 *
 * @param indexAnalyzer Analyzer that will be used for
 *        analyzing suggestions while building the index.
 * @param queryAnalyzer Analyzer that will be used for
 *        analyzing query text during lookup
 * @param options see {@link #EXACT_FIRST}, {@link #PRESERVE_SEP}
 * @param maxSurfaceFormsPerAnalyzedForm Maximum number of
 *        surface forms to keep for a single analyzed form.
 *        When there are too many surface forms we discard the
 *        lowest weighted ones.
 * @param maxGraphExpansions Maximum number of graph paths
 *        to expand from the analyzed form.  Set this to -1 for
 *        no limit.
 * @param maxEdits must be &gt;= 0 and &lt;= {@link org.apache.lucene.util.automaton.LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE} .
 * @param transpositions <code>true</code> if transpositions should be treated as a primitive
 *        edit operation. If this is false, comparisons will implement the classic
 *        Levenshtein algorithm.
 * @param nonFuzzyPrefix length of common (non-fuzzy) prefix (see default {@link #DEFAULT_NON_FUZZY_PREFIX}
 * @param minFuzzyLength minimum length of lookup key before any edits are allowed (see default {@link #DEFAULT_MIN_FUZZY_LENGTH})
 * @param sepLabel separation label
 * @param payloadSep payload separator byte
 * @param endByte end byte marker byte
 */
public XFuzzySuggester(Analyzer indexAnalyzer, Automaton queryPrefix, Analyzer queryAnalyzer, int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
                       int maxEdits, boolean transpositions, int nonFuzzyPrefix, int minFuzzyLength, boolean unicodeAware,
                       FST<PairOutputs.Pair<Long, BytesRef>> fst, boolean hasPayloads, int maxAnalyzedPathsForOneInput,
                       int sepLabel, int payloadSep, int endByte, int holeCharacter) {
    super(indexAnalyzer, queryPrefix, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, true, fst, hasPayloads, maxAnalyzedPathsForOneInput, sepLabel, payloadSep, endByte, holeCharacter);
    if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
        throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
    }
    if (nonFuzzyPrefix < 0) {
        throw new IllegalArgumentException("nonFuzzyPrefix must not be >= 0 (got " + nonFuzzyPrefix + ")");
    }
    if (minFuzzyLength < 0) {
        throw new IllegalArgumentException("minFuzzyLength must not be >= 0 (got " + minFuzzyLength + ")");
    }

    this.maxEdits = maxEdits;
    this.transpositions = transpositions;
    this.nonFuzzyPrefix = nonFuzzyPrefix;
    this.minFuzzyLength = minFuzzyLength;
    this.unicodeAware = unicodeAware;
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:47,代码来源:XFuzzySuggester.java

示例9: SimpleTextTermsEnum

import org.apache.lucene.util.fst.PairOutputs; //导入方法依赖的package包/类
public SimpleTextTermsEnum(FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fst, IndexOptions indexOptions) {
  this.indexOptions = indexOptions;
  fstEnum = new BytesRefFSTEnum<>(fst);
}
 
开发者ID:europeana,项目名称:search,代码行数:5,代码来源:SimpleTextFieldsReader.java

示例10: loadTerms

import org.apache.lucene.util.fst.PairOutputs; //导入方法依赖的package包/类
private void loadTerms() throws IOException {
  PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton();
  final Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> b;
  final PairOutputs<Long,Long> outputsInner = new PairOutputs<>(posIntOutputs, posIntOutputs);
  final PairOutputs<Long,PairOutputs.Pair<Long,Long>> outputs = new PairOutputs<>(posIntOutputs,
                                                                                                                  outputsInner);
  b = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
  IndexInput in = SimpleTextFieldsReader.this.in.clone();
  in.seek(termsStart);
  final BytesRefBuilder lastTerm = new BytesRefBuilder();
  long lastDocsStart = -1;
  int docFreq = 0;
  long totalTermFreq = 0;
  FixedBitSet visitedDocs = new FixedBitSet(maxDoc);
  final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
  while(true) {
    SimpleTextUtil.readLine(in, scratch);
    if (scratch.get().equals(END) || StringHelper.startsWith(scratch.get(), FIELD)) {
      if (lastDocsStart != -1) {
        b.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef),
              outputs.newPair(lastDocsStart,
                              outputsInner.newPair((long) docFreq, totalTermFreq)));
        sumTotalTermFreq += totalTermFreq;
      }
      break;
    } else if (StringHelper.startsWith(scratch.get(), DOC)) {
      docFreq++;
      sumDocFreq++;
      scratchUTF16.copyUTF8Bytes(scratch.bytes(), DOC.length, scratch.length()-DOC.length);
      int docID = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
      visitedDocs.set(docID);
    } else if (StringHelper.startsWith(scratch.get(), FREQ)) {
      scratchUTF16.copyUTF8Bytes(scratch.bytes(), FREQ.length, scratch.length()-FREQ.length);
      totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
    } else if (StringHelper.startsWith(scratch.get(), TERM)) {
      if (lastDocsStart != -1) {
        b.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef), outputs.newPair(lastDocsStart,
                                                                        outputsInner.newPair((long) docFreq, totalTermFreq)));
      }
      lastDocsStart = in.getFilePointer();
      final int len = scratch.length() - TERM.length;
      lastTerm.grow(len);
      System.arraycopy(scratch.bytes(), TERM.length, lastTerm.bytes(), 0, len);
      lastTerm.setLength(len);
      docFreq = 0;
      sumTotalTermFreq += totalTermFreq;
      totalTermFreq = 0;
      termCount++;
    }
  }
  docCount = visitedDocs.cardinality();
  fst = b.finish();
  /*
  PrintStream ps = new PrintStream("out.dot");
  fst.toDot(ps);
  ps.close();
  System.out.println("SAVED out.dot");
  */
  //System.out.println("FST " + fst.sizeInBytes());
}
 
开发者ID:europeana,项目名称:search,代码行数:61,代码来源:SimpleTextFieldsReader.java

示例11: SimpleTextTermsEnum

import org.apache.lucene.util.fst.PairOutputs; //导入方法依赖的package包/类
public SimpleTextTermsEnum(FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fst, IndexOptions indexOptions) {
  this.indexOptions = indexOptions;
  fstEnum = new BytesRefFSTEnum<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>(fst);
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:5,代码来源:SimpleTextFieldsReader.java

示例12: loadTerms

import org.apache.lucene.util.fst.PairOutputs; //导入方法依赖的package包/类
private void loadTerms() throws IOException {
  PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton(false);
  final Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> b;
  final PairOutputs<Long,Long> outputsInner = new PairOutputs<Long,Long>(posIntOutputs, posIntOutputs);
  final PairOutputs<Long,PairOutputs.Pair<Long,Long>> outputs = new PairOutputs<Long,PairOutputs.Pair<Long,Long>>(posIntOutputs,
                                                                                                                  outputsInner);
  b = new Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>(FST.INPUT_TYPE.BYTE1, outputs);
  IndexInput in = SimpleTextFieldsReader.this.in.clone();
  in.seek(termsStart);
  final BytesRef lastTerm = new BytesRef(10);
  long lastDocsStart = -1;
  int docFreq = 0;
  long totalTermFreq = 0;
  OpenBitSet visitedDocs = new OpenBitSet();
  final IntsRef scratchIntsRef = new IntsRef();
  while(true) {
    SimpleTextUtil.readLine(in, scratch);
    if (scratch.equals(END) || StringHelper.startsWith(scratch, FIELD)) {
      if (lastDocsStart != -1) {
        b.add(Util.toIntsRef(lastTerm, scratchIntsRef),
              outputs.newPair(lastDocsStart,
                              outputsInner.newPair((long) docFreq, totalTermFreq)));
        sumTotalTermFreq += totalTermFreq;
      }
      break;
    } else if (StringHelper.startsWith(scratch, DOC)) {
      docFreq++;
      sumDocFreq++;
      UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length, scratchUTF16);
      int docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
      visitedDocs.set(docID);
    } else if (StringHelper.startsWith(scratch, FREQ)) {
      UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+FREQ.length, scratch.length-FREQ.length, scratchUTF16);
      totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
    } else if (StringHelper.startsWith(scratch, TERM)) {
      if (lastDocsStart != -1) {
        b.add(Util.toIntsRef(lastTerm, scratchIntsRef), outputs.newPair(lastDocsStart,
                                                                        outputsInner.newPair((long) docFreq, totalTermFreq)));
      }
      lastDocsStart = in.getFilePointer();
      final int len = scratch.length - TERM.length;
      if (len > lastTerm.length) {
        lastTerm.grow(len);
      }
      System.arraycopy(scratch.bytes, TERM.length, lastTerm.bytes, 0, len);
      lastTerm.length = len;
      docFreq = 0;
      sumTotalTermFreq += totalTermFreq;
      totalTermFreq = 0;
      termCount++;
    }
  }
  docCount = (int) visitedDocs.cardinality();
  fst = b.finish();
  /*
  PrintStream ps = new PrintStream("out.dot");
  fst.toDot(ps);
  ps.close();
  System.out.println("SAVED out.dot");
  */
  //System.out.println("FST " + fst.sizeInBytes());
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:63,代码来源:SimpleTextFieldsReader.java

示例13: loadTerms

import org.apache.lucene.util.fst.PairOutputs; //导入方法依赖的package包/类
private void loadTerms() throws IOException {
  PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton();
  final Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> b;
  final PairOutputs<Long,Long> outputsInner = new PairOutputs<Long,Long>(posIntOutputs, posIntOutputs);
  final PairOutputs<Long,PairOutputs.Pair<Long,Long>> outputs = new PairOutputs<Long,PairOutputs.Pair<Long,Long>>(posIntOutputs,
                                                                                                                  outputsInner);
  b = new Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>(FST.INPUT_TYPE.BYTE1, outputs);
  IndexInput in = SimpleTextFieldsReader.this.in.clone();
  in.seek(termsStart);
  final BytesRef lastTerm = new BytesRef(10);
  long lastDocsStart = -1;
  int docFreq = 0;
  long totalTermFreq = 0;
  FixedBitSet visitedDocs = new FixedBitSet(maxDoc);
  final IntsRef scratchIntsRef = new IntsRef();
  while(true) {
    SimpleTextUtil.readLine(in, scratch);
    if (scratch.equals(END) || StringHelper.startsWith(scratch, FIELD)) {
      if (lastDocsStart != -1) {
        b.add(Util.toIntsRef(lastTerm, scratchIntsRef),
              outputs.newPair(lastDocsStart,
                              outputsInner.newPair((long) docFreq, totalTermFreq)));
        sumTotalTermFreq += totalTermFreq;
      }
      break;
    } else if (StringHelper.startsWith(scratch, DOC)) {
      docFreq++;
      sumDocFreq++;
      UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length, scratchUTF16);
      int docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
      visitedDocs.set(docID);
    } else if (StringHelper.startsWith(scratch, FREQ)) {
      UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+FREQ.length, scratch.length-FREQ.length, scratchUTF16);
      totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
    } else if (StringHelper.startsWith(scratch, TERM)) {
      if (lastDocsStart != -1) {
        b.add(Util.toIntsRef(lastTerm, scratchIntsRef), outputs.newPair(lastDocsStart,
                                                                        outputsInner.newPair((long) docFreq, totalTermFreq)));
      }
      lastDocsStart = in.getFilePointer();
      final int len = scratch.length - TERM.length;
      if (len > lastTerm.length) {
        lastTerm.grow(len);
      }
      System.arraycopy(scratch.bytes, TERM.length, lastTerm.bytes, 0, len);
      lastTerm.length = len;
      docFreq = 0;
      sumTotalTermFreq += totalTermFreq;
      totalTermFreq = 0;
      termCount++;
    }
  }
  docCount = (int) visitedDocs.cardinality();
  fst = b.finish();
  /*
  PrintStream ps = new PrintStream("out.dot");
  fst.toDot(ps);
  ps.close();
  System.out.println("SAVED out.dot");
  */
  //System.out.println("FST " + fst.sizeInBytes());
}
 
开发者ID:yintaoxue,项目名称:read-open-source-code,代码行数:63,代码来源:SimpleTextFieldsReader.java

示例14: loadTerms

import org.apache.lucene.util.fst.PairOutputs; //导入方法依赖的package包/类
private void loadTerms() throws IOException {
  PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton();
  final Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> b;
  final PairOutputs<Long,Long> outputsInner = new PairOutputs<Long,Long>(posIntOutputs, posIntOutputs);
  final PairOutputs<Long,PairOutputs.Pair<Long,Long>> outputs = new PairOutputs<Long,PairOutputs.Pair<Long,Long>>(posIntOutputs,
                                                                                                                  outputsInner);
  b = new Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>(FST.INPUT_TYPE.BYTE1, outputs);
  IndexInput in = SimpleTextFieldsReader.this.in.clone();
  in.seek(termsStart);
  final BytesRef lastTerm = new BytesRef(10);
  long lastDocsStart = -1;
  int docFreq = 0;
  long totalTermFreq = 0;
  OpenBitSet visitedDocs = new OpenBitSet();
  final IntsRef scratchIntsRef = new IntsRef();
  while(true) {
    SimpleTextUtil.readLine(in, scratch);
    if (scratch.equals(END) || StringHelper.startsWith(scratch, FIELD)) {
      if (lastDocsStart != -1) {
        b.add(Util.toIntsRef(lastTerm, scratchIntsRef),
              outputs.newPair(lastDocsStart,
                              outputsInner.newPair((long) docFreq, totalTermFreq)));
        sumTotalTermFreq += totalTermFreq;
      }
      break;
    } else if (StringHelper.startsWith(scratch, DOC)) {
      docFreq++;
      sumDocFreq++;
      UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length, scratchUTF16);
      int docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
      visitedDocs.set(docID);
    } else if (StringHelper.startsWith(scratch, FREQ)) {
      UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+FREQ.length, scratch.length-FREQ.length, scratchUTF16);
      totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
    } else if (StringHelper.startsWith(scratch, TERM)) {
      if (lastDocsStart != -1) {
        b.add(Util.toIntsRef(lastTerm, scratchIntsRef), outputs.newPair(lastDocsStart,
                                                                        outputsInner.newPair((long) docFreq, totalTermFreq)));
      }
      lastDocsStart = in.getFilePointer();
      final int len = scratch.length - TERM.length;
      if (len > lastTerm.length) {
        lastTerm.grow(len);
      }
      System.arraycopy(scratch.bytes, TERM.length, lastTerm.bytes, 0, len);
      lastTerm.length = len;
      docFreq = 0;
      sumTotalTermFreq += totalTermFreq;
      totalTermFreq = 0;
      termCount++;
    }
  }
  docCount = (int) visitedDocs.cardinality();
  fst = b.finish();
  /*
  PrintStream ps = new PrintStream("out.dot");
  fst.toDot(ps);
  ps.close();
  System.out.println("SAVED out.dot");
  */
  //System.out.println("FST " + fst.sizeInBytes());
}
 
开发者ID:jimaguere,项目名称:Maskana-Gestor-de-Conocimiento,代码行数:63,代码来源:SimpleTextFieldsReader.java


注:本文中的org.apache.lucene.util.fst.PairOutputs.Pair方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。