本文整理汇总了Java中org.apache.lucene.util.StringHelper.startsWith方法的典型用法代码示例。如果您正苦于以下问题:Java StringHelper.startsWith方法的具体用法?Java StringHelper.startsWith怎么用?Java StringHelper.startsWith使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.util.StringHelper
的用法示例。
在下文中一共展示了StringHelper.startsWith方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: readFields
import org.apache.lucene.util.StringHelper; //导入方法依赖的package包/类
private TreeMap<String,Long> readFields(IndexInput in) throws IOException {
ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
BytesRefBuilder scratch = new BytesRefBuilder();
TreeMap<String,Long> fields = new TreeMap<>();
while (true) {
SimpleTextUtil.readLine(input, scratch);
if (scratch.get().equals(END)) {
SimpleTextUtil.checkFooter(input);
return fields;
} else if (StringHelper.startsWith(scratch.get(), FIELD)) {
String fieldName = new String(scratch.bytes(), FIELD.length, scratch.length() - FIELD.length, StandardCharsets.UTF_8);
fields.put(fieldName, input.getFilePointer());
}
}
}
示例2: scan
import org.apache.lucene.util.StringHelper; //导入方法依赖的package包/类
/**
* Scans ({@code termsEnum.next()}) terms until a term is found that does
* not start with curVNode's cell. If it finds a leaf cell or a cell at
* level {@code scanDetailLevel} then it calls {@link
* #visitScanned(org.apache.lucene.spatial.prefix.tree.Cell)}.
*/
protected void scan(int scanDetailLevel) throws IOException {
for (;
thisTerm != null && StringHelper.startsWith(thisTerm, curVNodeTerm);//TODO refactor to use method on curVNode.cell
thisTerm = termsEnum.next()) {
scanCell = grid.getCell(thisTerm.bytes, thisTerm.offset, thisTerm.length, scanCell);
int termLevel = scanCell.getLevel();
if (termLevel < scanDetailLevel) {
if (scanCell.isLeaf())
visitScanned(scanCell);
} else if (termLevel == scanDetailLevel) {
if (!scanCell.isLeaf())//LUCENE-5529
visitScanned(scanCell);
}
}//term loop
}
示例3: getPrefixTerms
import org.apache.lucene.util.StringHelper; //导入方法依赖的package包/类
private void getPrefixTerms(ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException {
// SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms
// instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually.
List<LeafReaderContext> leaves = reader.leaves();
for (LeafReaderContext leaf : leaves) {
Terms _terms = leaf.reader().terms(field);
if (_terms == null) {
continue;
}
TermsEnum termsEnum = _terms.iterator();
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes());
if (TermsEnum.SeekStatus.END == seekStatus) {
continue;
}
for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) {
if (!StringHelper.startsWith(term, prefix.bytes())) {
break;
}
terms.add(new Term(field, BytesRef.deepCopyOf(term)));
if (terms.size() >= maxExpansions) {
return;
}
}
}
}
示例4: accept
import org.apache.lucene.util.StringHelper; //导入方法依赖的package包/类
@Override
protected AcceptStatus accept(BytesRef term) {
if (StringHelper.startsWith(term, prefixRef)) {
return AcceptStatus.YES;
} else {
return AcceptStatus.END;
}
}
示例5: setTerm
import org.apache.lucene.util.StringHelper; //导入方法依赖的package包/类
private BytesRef setTerm() throws IOException {
term = termsEnum.term();
//System.out.println(" setTerm() term=" + term.utf8ToString() + " vs prefix=" + (prefix == null ? "null" : prefix.utf8ToString()));
if (prefix != null && !StringHelper.startsWith(term, prefix)) {
term = null;
}
return term;
}
示例6: visitMatchingTerms
import org.apache.lucene.util.StringHelper; //导入方法依赖的package包/类
@Override
public void visitMatchingTerms(
IndexReader reader,
String fieldName,
MatchingTermVisitor mtv) throws IOException
{
int prefixLength = prefix.length();
Terms terms = MultiFields.getTerms(reader, fieldName);
if (terms != null) {
Matcher matcher = pattern.matcher("");
try {
TermsEnum termsEnum = terms.iterator(null);
TermsEnum.SeekStatus status = termsEnum.seekCeil(prefixRef);
BytesRef text;
if (status == TermsEnum.SeekStatus.FOUND) {
text = prefixRef;
} else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
text = termsEnum.term();
} else {
text = null;
}
while(text != null) {
if (text != null && StringHelper.startsWith(text, prefixRef)) {
String textString = text.utf8ToString();
matcher.reset(textString.substring(prefixLength));
if (matcher.matches()) {
mtv.visitMatchingTerm(new Term(fieldName, textString));
}
} else {
break;
}
text = termsEnum.next();
}
} finally {
matcher.reset();
}
}
}
示例7: visitMatchingTerms
import org.apache.lucene.util.StringHelper; //导入方法依赖的package包/类
@Override
public void visitMatchingTerms(
IndexReader reader,
String fieldName,
MatchingTermVisitor mtv) throws IOException
{
/* inspired by PrefixQuery.rewrite(): */
Terms terms = MultiFields.getTerms(reader, fieldName);
if (terms != null) {
TermsEnum termsEnum = terms.iterator(null);
boolean skip = false;
TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getPrefix()));
if (status == TermsEnum.SeekStatus.FOUND) {
mtv.visitMatchingTerm(getLucenePrefixTerm(fieldName));
} else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
if (StringHelper.startsWith(termsEnum.term(), prefixRef)) {
mtv.visitMatchingTerm(new Term(fieldName, termsEnum.term().utf8ToString()));
} else {
skip = true;
}
} else {
// EOF
skip = true;
}
if (!skip) {
while(true) {
BytesRef text = termsEnum.next();
if (text != null && StringHelper.startsWith(text, prefixRef)) {
mtv.visitMatchingTerm(new Term(fieldName, text.utf8ToString()));
} else {
break;
}
}
}
}
}
示例8: checkFooter
import org.apache.lucene.util.StringHelper; //导入方法依赖的package包/类
public static void checkFooter(ChecksumIndexInput input) throws IOException {
BytesRefBuilder scratch = new BytesRefBuilder();
String expectedChecksum = String.format(Locale.ROOT, "%020d", input.getChecksum());
SimpleTextUtil.readLine(input, scratch);
if (StringHelper.startsWith(scratch.get(), CHECKSUM) == false) {
throw new CorruptIndexException("SimpleText failure: expected checksum line but got " + scratch.get().utf8ToString() + " (resource=" + input + ")");
}
String actualChecksum = new BytesRef(scratch.bytes(), CHECKSUM.length, scratch.length() - CHECKSUM.length).utf8ToString();
if (!expectedChecksum.equals(actualChecksum)) {
throw new CorruptIndexException("SimpleText checksum failure: " + actualChecksum + " != " + expectedChecksum + " (resource=" + input + ")");
}
if (input.length() != input.getFilePointer()) {
throw new CorruptIndexException("Unexpected stuff at the end of file, please be careful with your text editor! (resource=" + input + ")");
}
}
示例9: accept
import org.apache.lucene.util.StringHelper; //导入方法依赖的package包/类
@Override
protected AcceptStatus accept(BytesRef term) {
if (StringHelper.startsWith(term, prefixRef)) {
// TODO: set BoostAttr based on distance of
// searchTerm.text() and term().text()
return regexImpl.match(term) ? AcceptStatus.YES : AcceptStatus.NO;
} else {
return AcceptStatus.NO;
}
}
示例10: accept
import org.apache.lucene.util.StringHelper; //导入方法依赖的package包/类
/**
* <p>The termCompare method in FuzzyTermEnum uses Levenshtein distance to
* calculate the distance between the given term and the comparing term.
* </p>
* <p>If the minSimilarity is >= 1.0, this uses the maxEdits as the comparison.
* Otherwise, this method uses the following logic to calculate similarity.
* <pre>
* similarity = 1 - ((float)distance / (float) (prefixLength + Math.min(textlen, targetlen)));
* </pre>
* where distance is the Levenshtein distance for the two words.
* </p>
*
*/
@Override
protected final AcceptStatus accept(BytesRef term) {
if (StringHelper.startsWith(term, prefixBytesRef)) {
utf32.copyUTF8Bytes(term);
final int distance = calcDistance(utf32.ints(), realPrefixLength, utf32.length() - realPrefixLength);
//Integer.MIN_VALUE is the sentinel that Levenshtein stopped early
if (distance == Integer.MIN_VALUE){
return AcceptStatus.NO;
}
//no need to calc similarity, if raw is true and distance > maxEdits
if (raw == true && distance > maxEdits){
return AcceptStatus.NO;
}
final float similarity = calcSimilarity(distance, (utf32.length() - realPrefixLength), text.length);
//if raw is true, then distance must also be <= maxEdits by now
//given the previous if statement
if (raw == true ||
(raw == false && similarity > minSimilarity)) {
boostAtt.setBoost((similarity - minSimilarity) * scale_factor);
return AcceptStatus.YES;
} else {
return AcceptStatus.NO;
}
} else {
return AcceptStatus.END;
}
}
示例11: readFields
import org.apache.lucene.util.StringHelper; //导入方法依赖的package包/类
private TreeMap<String,Long> readFields(IndexInput in) throws IOException {
BytesRef scratch = new BytesRef(10);
TreeMap<String,Long> fields = new TreeMap<String,Long>();
while (true) {
SimpleTextUtil.readLine(in, scratch);
if (scratch.equals(END)) {
return fields;
} else if (StringHelper.startsWith(scratch, FIELD)) {
String fieldName = new String(scratch.bytes, scratch.offset + FIELD.length, scratch.length - FIELD.length, "UTF-8");
fields.put(fieldName, in.getFilePointer());
}
}
}
示例12: accept
import org.apache.lucene.util.StringHelper; //导入方法依赖的package包/类
/**
* <p>The termCompare method in FuzzyTermEnum uses Levenshtein distance to
* calculate the distance between the given term and the comparing term.
* </p>
* <p>If the minSimilarity is >= 1.0, this uses the maxEdits as the comparison.
* Otherwise, this method uses the following logic to calculate similarity.
* <pre>
* similarity = 1 - ((float)distance / (float) (prefixLength + Math.min(textlen, targetlen)));
* </pre>
* where distance is the Levenshtein distance for the two words.
* </p>
*
*/
@Override
protected final AcceptStatus accept(BytesRef term) {
if (StringHelper.startsWith(term, prefixBytesRef)) {
UnicodeUtil.UTF8toUTF32(term, utf32);
final int distance = calcDistance(utf32.ints, realPrefixLength, utf32.length - realPrefixLength);
//Integer.MIN_VALUE is the sentinel that Levenshtein stopped early
if (distance == Integer.MIN_VALUE){
return AcceptStatus.NO;
}
//no need to calc similarity, if raw is true and distance > maxEdits
if (raw == true && distance > maxEdits){
return AcceptStatus.NO;
}
final float similarity = calcSimilarity(distance, (utf32.length - realPrefixLength), text.length);
//if raw is true, then distance must also be <= maxEdits by now
//given the previous if statement
if (raw == true ||
(raw == false && similarity > minSimilarity)) {
boostAtt.setBoost((similarity - minSimilarity) * scale_factor);
return AcceptStatus.YES;
} else {
return AcceptStatus.NO;
}
} else {
return AcceptStatus.END;
}
}
示例13: scan
import org.apache.lucene.util.StringHelper; //导入方法依赖的package包/类
/**
* Scans ({@code termsEnum.next()}) terms until a term is found that does
* not start with curVNode's cell. If it finds a leaf cell or a cell at
* level {@code scanDetailLevel} then it calls {@link
* #visitScanned(org.apache.lucene.spatial.prefix.tree.Cell)}.
*/
protected void scan(int scanDetailLevel) throws IOException {
for (;
thisTerm != null && StringHelper.startsWith(thisTerm, curVNodeTerm);
thisTerm = termsEnum.next()) {
scanCell = grid.getCell(thisTerm.bytes, thisTerm.offset, thisTerm.length, scanCell);
int termLevel = scanCell.getLevel();
if (termLevel > scanDetailLevel)
continue;
if (termLevel == scanDetailLevel || scanCell.isLeaf()) {
visitScanned(scanCell);
}
}//term loop
}
示例14: startsWith
import org.apache.lucene.util.StringHelper; //导入方法依赖的package包/类
/** Used only in ctor: */
private boolean startsWith(BytesRef prefix) {
return StringHelper.startsWith(scratch.get(), prefix);
}
示例15: loadTerms
import org.apache.lucene.util.StringHelper; //导入方法依赖的package包/类
private void loadTerms() throws IOException {
PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton();
final Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> b;
final PairOutputs<Long,Long> outputsInner = new PairOutputs<>(posIntOutputs, posIntOutputs);
final PairOutputs<Long,PairOutputs.Pair<Long,Long>> outputs = new PairOutputs<>(posIntOutputs,
outputsInner);
b = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
IndexInput in = SimpleTextFieldsReader.this.in.clone();
in.seek(termsStart);
final BytesRefBuilder lastTerm = new BytesRefBuilder();
long lastDocsStart = -1;
int docFreq = 0;
long totalTermFreq = 0;
FixedBitSet visitedDocs = new FixedBitSet(maxDoc);
final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
while(true) {
SimpleTextUtil.readLine(in, scratch);
if (scratch.get().equals(END) || StringHelper.startsWith(scratch.get(), FIELD)) {
if (lastDocsStart != -1) {
b.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef),
outputs.newPair(lastDocsStart,
outputsInner.newPair((long) docFreq, totalTermFreq)));
sumTotalTermFreq += totalTermFreq;
}
break;
} else if (StringHelper.startsWith(scratch.get(), DOC)) {
docFreq++;
sumDocFreq++;
scratchUTF16.copyUTF8Bytes(scratch.bytes(), DOC.length, scratch.length()-DOC.length);
int docID = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
visitedDocs.set(docID);
} else if (StringHelper.startsWith(scratch.get(), FREQ)) {
scratchUTF16.copyUTF8Bytes(scratch.bytes(), FREQ.length, scratch.length()-FREQ.length);
totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
} else if (StringHelper.startsWith(scratch.get(), TERM)) {
if (lastDocsStart != -1) {
b.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef), outputs.newPair(lastDocsStart,
outputsInner.newPair((long) docFreq, totalTermFreq)));
}
lastDocsStart = in.getFilePointer();
final int len = scratch.length() - TERM.length;
lastTerm.grow(len);
System.arraycopy(scratch.bytes(), TERM.length, lastTerm.bytes(), 0, len);
lastTerm.setLength(len);
docFreq = 0;
sumTotalTermFreq += totalTermFreq;
totalTermFreq = 0;
termCount++;
}
}
docCount = visitedDocs.cardinality();
fst = b.finish();
/*
PrintStream ps = new PrintStream("out.dot");
fst.toDot(ps);
ps.close();
System.out.println("SAVED out.dot");
*/
//System.out.println("FST " + fst.sizeInBytes());
}