本文整理汇总了Java中org.apache.lucene.util.CharsRefBuilder类的典型用法代码示例。如果您正苦于以下问题:Java CharsRefBuilder类的具体用法?Java CharsRefBuilder怎么用?Java CharsRefBuilder使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
CharsRefBuilder类属于org.apache.lucene.util包,在下文中一共展示了CharsRefBuilder类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testMaxPosition3WithSynomyms
import org.apache.lucene.util.CharsRefBuilder; //导入依赖的package包/类
public void testMaxPosition3WithSynomyms() throws IOException {
for (final boolean consumeAll : new boolean[]{true, false}) {
MockTokenizer tokenizer = new MockTokenizer(new StringReader("one two three four five"), MockTokenizer.WHITESPACE, false);
// if we are consuming all tokens, we can use the checks, otherwise we can't
tokenizer.setEnableChecks(consumeAll);
SynonymMap.Builder builder = new SynonymMap.Builder(true);
builder.add(new CharsRef("one"), new CharsRef("first"), true);
builder.add(new CharsRef("one"), new CharsRef("alpha"), true);
builder.add(new CharsRef("one"), new CharsRef("beguine"), true);
CharsRefBuilder multiWordCharsRef = new CharsRefBuilder();
SynonymMap.Builder.join(new String[]{"and", "indubitably", "single", "only"}, multiWordCharsRef);
builder.add(new CharsRef("one"), multiWordCharsRef.get(), true);
SynonymMap.Builder.join(new String[]{"dopple", "ganger"}, multiWordCharsRef);
builder.add(new CharsRef("two"), multiWordCharsRef.get(), true);
SynonymMap synonymMap = builder.build();
TokenStream stream = new SynonymFilter(tokenizer, synonymMap, true);
stream = new LimitTokenPositionFilter(stream, 3, consumeAll);
// "only", the 4th word of multi-word synonym "and indubitably single only" is not emitted, since its position is greater than 3.
assertTokenStreamContents(stream,
new String[]{"one", "first", "alpha", "beguine", "and", "two", "indubitably", "dopple", "three", "single", "ganger"},
new int[]{1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0});
}
}
示例2: getLookupResult
import org.apache.lucene.util.CharsRefBuilder; //导入依赖的package包/类
private LookupResult getLookupResult(Long output1, BytesRef output2, CharsRefBuilder spare) {
LookupResult result;
if (hasPayloads) {
int sepIndex = -1;
for(int i=0;i<output2.length;i++) {
if (output2.bytes[output2.offset+i] == payloadSep) {
sepIndex = i;
break;
}
}
assert sepIndex != -1;
final int payloadLen = output2.length - sepIndex - 1;
spare.copyUTF8Bytes(output2.bytes, output2.offset, sepIndex);
BytesRef payload = new BytesRef(payloadLen);
System.arraycopy(output2.bytes, sepIndex+1, payload.bytes, 0, payloadLen);
payload.length = payloadLen;
result = new LookupResult(spare.toString(), decodeWeight(output1), payload);
} else {
spare.copyUTF8Bytes(output2);
result = new LookupResult(spare.toString(), decodeWeight(output1));
}
return result;
}
示例3: toContextQuery
import org.apache.lucene.util.CharsRefBuilder; //导入依赖的package包/类
/**
* Wraps a {@link CompletionQuery} with context queries
*
* @param query base completion query to wrap
* @param queryContexts a map of context mapping name and collected query contexts
* @return a context-enabled query
*/
public ContextQuery toContextQuery(CompletionQuery query, Map<String, List<ContextMapping.InternalQueryContext>> queryContexts) {
ContextQuery typedContextQuery = new ContextQuery(query);
if (queryContexts.isEmpty() == false) {
CharsRefBuilder scratch = new CharsRefBuilder();
scratch.grow(1);
for (int typeId = 0; typeId < contextMappings.size(); typeId++) {
scratch.setCharAt(0, (char) typeId);
scratch.setLength(1);
ContextMapping mapping = contextMappings.get(typeId);
List<ContextMapping.InternalQueryContext> internalQueryContext = queryContexts.get(mapping.name());
if (internalQueryContext != null) {
for (ContextMapping.InternalQueryContext context : internalQueryContext) {
scratch.append(context.context);
typedContextQuery.addContext(scratch.toCharsRef(), context.boost, !context.isPrefix);
scratch.setLength(1);
}
}
}
}
return typedContextQuery;
}
示例4: innerExecute
import org.apache.lucene.util.CharsRefBuilder; //导入依赖的package包/类
@Override
public TermSuggestion innerExecute(String name, TermSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare)
throws IOException {
DirectSpellChecker directSpellChecker = suggestion.getDirectSpellCheckerSettings().createDirectSpellChecker();
final IndexReader indexReader = searcher.getIndexReader();
TermSuggestion response = new TermSuggestion(
name, suggestion.getSize(), suggestion.getDirectSpellCheckerSettings().sort()
);
List<Token> tokens = queryTerms(suggestion, spare);
for (Token token : tokens) {
// TODO: Extend DirectSpellChecker in 4.1, to get the raw suggested words as BytesRef
SuggestWord[] suggestedWords = directSpellChecker.suggestSimilar(
token.term, suggestion.getShardSize(), indexReader, suggestion.getDirectSpellCheckerSettings().suggestMode()
);
Text key = new Text(new BytesArray(token.term.bytes()));
TermSuggestion.Entry resultEntry = new TermSuggestion.Entry(key, token.startOffset, token.endOffset - token.startOffset);
for (SuggestWord suggestWord : suggestedWords) {
Text word = new Text(suggestWord.string);
resultEntry.addOption(new TermSuggestion.Entry.Option(word, suggestWord.freq, suggestWord.score));
}
response.addTerm(resultEntry);
}
return response;
}
示例5: buildTerm
import org.apache.lucene.util.CharsRefBuilder; //导入依赖的package包/类
private void buildTerm(XContentBuilder builder, final CharsRefBuilder spare, Terms curTerms, TermsEnum termIter, BoostAttribute boostAtt) throws IOException {
// start term, optimized writing
BytesRef term = termIter.next();
spare.copyUTF8Bytes(term);
builder.startObject(spare.toString());
buildTermStatistics(builder, termIter);
// finally write the term vectors
PostingsEnum posEnum = termIter.postings(null, PostingsEnum.ALL);
int termFreq = posEnum.freq();
builder.field(FieldStrings.TERM_FREQ, termFreq);
initMemory(curTerms, termFreq);
initValues(curTerms, posEnum, termFreq);
buildValues(builder, curTerms, termFreq);
buildScore(builder, boostAtt);
builder.endObject();
}
示例6: innerExecute
import org.apache.lucene.util.CharsRefBuilder; //导入依赖的package包/类
@Override
public Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>> innerExecute(String name, CustomSuggestionsContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
// Get the suggestion context
String text = suggestion.getText().utf8ToString();
// create two suggestions with 12 and 123 appended
Suggest.Suggestion<Suggest.Suggestion.Entry<Suggest.Suggestion.Entry.Option>> response = new Suggest.Suggestion<>(name, suggestion.getSize());
String firstSuggestion = String.format(Locale.ROOT, "%s-%s-%s-%s", text, suggestion.getField(), suggestion.options.get("suffix"), "12");
Suggest.Suggestion.Entry<Suggest.Suggestion.Entry.Option> resultEntry12 = new Suggest.Suggestion.Entry<>(new Text(firstSuggestion), 0, text.length() + 2);
response.addTerm(resultEntry12);
String secondSuggestion = String.format(Locale.ROOT, "%s-%s-%s-%s", text, suggestion.getField(), suggestion.options.get("suffix"), "123");
Suggest.Suggestion.Entry<Suggest.Suggestion.Entry.Option> resultEntry123 = new Suggest.Suggestion.Entry<>(new Text(secondSuggestion), 0, text.length() + 3);
response.addTerm(resultEntry123);
return response;
}
示例7: join
import org.apache.lucene.util.CharsRefBuilder; //导入依赖的package包/类
/** Sugar: just joins the provided terms with {@link
* SynonymMap#WORD_SEPARATOR}. reuse and its chars
* must not be null. */
public static CharsRef join(String[] words, CharsRefBuilder reuse) {
int upto = 0;
char[] buffer = reuse.chars();
for (String word : words) {
final int wordLen = word.length();
final int needed = (0 == upto ? wordLen : 1 + upto + wordLen); // Add 1 for WORD_SEPARATOR
if (needed > buffer.length) {
reuse.grow(needed);
buffer = reuse.chars();
}
if (upto > 0) {
buffer[upto++] = SynonymMap.WORD_SEPARATOR;
}
word.getChars(0, wordLen, buffer, upto);
upto += wordLen;
}
reuse.setLength(upto);
return reuse.get();
}
示例8: QueryAutoStopWordAnalyzer
import org.apache.lucene.util.CharsRefBuilder; //导入依赖的package包/类
/**
* Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
* given selection of fields from terms with a document frequency greater than
* the given maxDocFreq
*
* @param delegate Analyzer whose TokenStream will be filtered
* @param indexReader IndexReader to identify the stopwords from
* @param fields Selection of fields to calculate stopwords for
* @param maxDocFreq Document frequency terms should be above in order to be stopwords
* @throws IOException Can be thrown while reading from the IndexReader
*/
public QueryAutoStopWordAnalyzer(
Analyzer delegate,
IndexReader indexReader,
Collection<String> fields,
int maxDocFreq) throws IOException {
super(delegate.getReuseStrategy());
this.delegate = delegate;
for (String field : fields) {
Set<String> stopWords = new HashSet<>();
Terms terms = MultiFields.getTerms(indexReader, field);
CharsRefBuilder spare = new CharsRefBuilder();
if (terms != null) {
TermsEnum te = terms.iterator(null);
BytesRef text;
while ((text = te.next()) != null) {
if (te.docFreq() > maxDocFreq) {
spare.copyUTF8Bytes(text);
stopWords.add(spare.toString());
}
}
}
stopWordsPerField.put(field, stopWords);
}
}
示例9: innerExecute
import org.apache.lucene.util.CharsRefBuilder; //导入依赖的package包/类
@Override
public TermSuggestion innerExecute(String name, TermSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(suggestion.getDirectSpellCheckerSettings());
final IndexReader indexReader = searcher.getIndexReader();
TermSuggestion response = new TermSuggestion(
name, suggestion.getSize(), suggestion.getDirectSpellCheckerSettings().sort()
);
List<Token> tokens = queryTerms(suggestion, spare);
for (Token token : tokens) {
// TODO: Extend DirectSpellChecker in 4.1, to get the raw suggested words as BytesRef
SuggestWord[] suggestedWords = directSpellChecker.suggestSimilar(
token.term, suggestion.getShardSize(), indexReader, suggestion.getDirectSpellCheckerSettings().suggestMode()
);
Text key = new Text(new BytesArray(token.term.bytes()));
TermSuggestion.Entry resultEntry = new TermSuggestion.Entry(key, token.startOffset, token.endOffset - token.startOffset);
for (SuggestWord suggestWord : suggestedWords) {
Text word = new Text(suggestWord.string);
resultEntry.addOption(new TermSuggestion.Entry.Option(word, suggestWord.freq, suggestWord.score));
}
response.addTerm(resultEntry);
}
return response;
}
示例10: LocalEnv
import org.apache.lucene.util.CharsRefBuilder; //导入依赖的package包/类
public LocalEnv(int offset, int limit, int startTermIndex, int adjust, int targetIdx, int nTerms, Predicate<BytesRef> termFilter,
int mincount, int[] counts, CharsRefBuilder charsRef, boolean extend, SortedSetDocValues si,
SolrIndexSearcher searcher, List<Entry<LeafReader, Bits>> leaves, String fieldName, T ft, NamedList res) {
super(offset, limit, targetIdx, mincount, fieldName, ft, res);
if (startTermIndex == -1) {
// weird case where missing is counted at counts[0].
this.startTermOrd = 0;
this.endTermOrd = nTerms - 1;
} else if (startTermIndex >= 0) {
this.startTermOrd = startTermIndex;
this.endTermOrd = startTermIndex + nTerms;
} else {
throw new IllegalStateException();
}
this.startTermIndex = startTermIndex;
this.adjust = adjust;
this.nTerms = nTerms;
this.termFilter = termFilter;
this.counts = counts;
this.charsRef = charsRef;
this.extend = extend;
this.si = si;
this.searcher = searcher;
this.leaves = leaves;
}
示例11: addTermFrequencies
import org.apache.lucene.util.CharsRefBuilder; //导入依赖的package包/类
/**
* Adds terms and frequencies found in vector into the Map termFreqMap
*
* @param termFreqMap
* a Map of terms and their frequencies
* @param vector
* List of terms and their frequencies for a doc/field
*/
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector) throws IOException
{
final TermsEnum termsEnum = vector.iterator();
final CharsRefBuilder spare = new CharsRefBuilder();
BytesRef text;
while ((text = termsEnum.next()) != null) {
spare.copyUTF8Bytes(text);
final String term = spare.toString();
if (isNoiseWord(term)) {
continue;
}
final int freq = (int) termsEnum.totalTermFreq();
// increment frequency
Int cnt = termFreqMap.get(term);
if (cnt == null) {
cnt = new Int();
termFreqMap.put(term, cnt);
cnt.x = freq;
}
else {
cnt.x += freq;
}
}
}
示例12: getTerms
import org.apache.lucene.util.CharsRefBuilder; //导入依赖的package包/类
protected static void getTerms(AllTermsShardRequest request, List<String> terms, List<LeafReaderContext> leaves) {
List<TermsEnum> termIters = getTermsEnums(request, leaves);
CharsRefBuilder spare = new CharsRefBuilder();
BytesRef lastTerm = null;
int[] exhausted = new int[termIters.size()];
for (int i = 0; i < exhausted.length; i++) {
exhausted[i] = 0;
}
try {
lastTerm = findSmallestTermAfter(request, termIters, lastTerm, exhausted);
if (lastTerm == null) {
return;
}
findNMoreTerms(request, terms, termIters, spare, lastTerm, exhausted);
} catch (IOException e) {
}
}
示例13: findNMoreTerms
import org.apache.lucene.util.CharsRefBuilder; //导入依赖的package包/类
protected static void findNMoreTerms(AllTermsShardRequest request, List<String> terms, List<TermsEnum> termIters, CharsRefBuilder spare,
BytesRef lastTerm, int[] exhausted) {
if (getDocFreq(termIters, lastTerm, exhausted) >= request.minDocFreq()) {
spare.copyUTF8Bytes(lastTerm);
terms.add(spare.toString());
}
BytesRef bytesRef = new BytesRef(lastTerm.utf8ToString());
lastTerm = bytesRef;
while (terms.size() < request.size() && lastTerm != null) {
moveIterators(exhausted, termIters, lastTerm);
lastTerm = findMinimum(exhausted, termIters);
if (lastTerm != null) {
if (getDocFreq(termIters, lastTerm, exhausted) >= request.minDocFreq()) {
spare.copyUTF8Bytes(lastTerm);
terms.add(spare.toString());
}
}
}
}
示例14: addTermFrequencies
import org.apache.lucene.util.CharsRefBuilder; //导入依赖的package包/类
/**
* Adds terms and frequencies found in vector into the Map termFreqMap
*
* @param termFreqMap a Map of terms and their frequencies
* @param vector List of terms and their frequencies for a doc/field
*/
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector) throws IOException {
final TermsEnum termsEnum = vector.iterator(null);
final CharsRefBuilder spare = new CharsRefBuilder();
BytesRef text;
while((text = termsEnum.next()) != null) {
spare.copyUTF8Bytes(text);
final String term = spare.toString();
if (isNoiseWord(term)) {
continue;
}
final int freq = (int) termsEnum.totalTermFreq();
// increment frequency
Int cnt = termFreqMap.get(term);
if (cnt == null) {
cnt = new Int();
termFreqMap.put(term, cnt);
cnt.x = freq;
} else {
cnt.x += freq;
}
}
}
示例15: getLookupResult
import org.apache.lucene.util.CharsRefBuilder; //导入依赖的package包/类
private LookupResult getLookupResult(Long output1, BytesRef output2, CharsRefBuilder spare) {
LookupResult result;
if (hasPayloads) {
int sepIndex = -1;
for(int i=0;i<output2.length;i++) {
if (output2.bytes[output2.offset+i] == PAYLOAD_SEP) {
sepIndex = i;
break;
}
}
assert sepIndex != -1;
spare.grow(sepIndex);
final int payloadLen = output2.length - sepIndex - 1;
spare.copyUTF8Bytes(output2.bytes, output2.offset, sepIndex);
BytesRef payload = new BytesRef(payloadLen);
System.arraycopy(output2.bytes, sepIndex+1, payload.bytes, 0, payloadLen);
payload.length = payloadLen;
result = new LookupResult(spare.toString(), decodeWeight(output1), payload);
} else {
spare.grow(output2.length);
spare.copyUTF8Bytes(output2);
result = new LookupResult(spare.toString(), decodeWeight(output1));
}
return result;
}