本文整理汇总了Java中org.lemurproject.galago.utility.CmpUtil类的典型用法代码示例。如果您正苦于以下问题:Java CmpUtil类的具体用法?Java CmpUtil怎么用?Java CmpUtil使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
CmpUtil类属于org.lemurproject.galago.utility包,在下文中一共展示了CmpUtil类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getIterator
import org.lemurproject.galago.utility.CmpUtil; //导入依赖的package包/类
/**
* Returns an iterator pointing at a specific key. Returns null if the key is
* not found in the index.
*/
@Override
public DiskBTreeIterator getIterator(byte[] key) throws IOException {
// read from offset to offset in the vocab structure (right?)
VocabularyReader.IndexBlockInfo slot = vocabulary.get(key);
if (slot == null) {
return null;
}
DiskBTreeIterator i = new DiskBTreeIterator(this, slot);
i.find(key);
if (CmpUtil.equals(key, i.getKey())) {
return i;
}
return null;
}
示例2: skipTo
import org.lemurproject.galago.utility.CmpUtil; //导入依赖的package包/类
@Override
public void skipTo(byte[] key) throws IOException {
// if the key is not in this block:
if (CmpUtil.compare(key, this.blockInfo.nextSlotKey) >= 0) {
// restrict the vocab search to only search forward from the current block
VocabularyReader.IndexBlockInfo newBlock = vocabulary.get(key, this.blockInfo.slotId);
this.loadBlockHeader(newBlock);
}
// now linearly scan the block to find the desired key
while (keyIndex < keyCount) {
while (keyIndex >= cacheKeyCount) {
this.cacheKeys();
}
if (CmpUtil.compare(keyCache[keyIndex], key) >= 0) {
// we have found or passed the desired key
return;
}
keyIndex++;
}
// if we got here - we have not yet found the correct key
nextKey();
}
示例3: process
import org.lemurproject.galago.utility.CmpUtil; //导入依赖的package包/类
@Override
public void process(TextFeature tf) throws IOException {
debug_total_count++;
// first feature - record the feature + store the tf in the buffer
if (currentFeature == null) {
currentFeature = tf.feature;
currentBuffer.offerLast(tf);
// no point emitting here - threshold should be > 1
} else if (CmpUtil.equals(tf.feature, currentFeature)) {
currentBuffer.offerLast(tf);
emitExtents();
} else {
notPassing.incrementBy(currentBuffer.size());
currentBuffer.clear();
// now prepare for the next feature
currentFeature = tf.feature;
currentBuffer.offerLast(tf);
currentPassesThreshold = false;
}
}
示例4: process
import org.lemurproject.galago.utility.CmpUtil; //导入依赖的package包/类
@Override
public void process(NumberedExtent ne) throws IOException {
debug_total_count++;
// first feature - record the feature + store the tf in the buffer
if (currentFeature == null) {
currentFeature = ne.extentName;
currentBuffer.offerLast(ne);
// no point emitting here - threshold should be > 1
} else if (CmpUtil.equals(ne.extentName, currentFeature)) {
currentBuffer.offerLast(ne);
emitExtents();
} else {
emitExtents();
discards.incrementBy(currentBuffer.size());
currentBuffer.clear();
// now prepare for the next feature
currentFeature = ne.extentName;
currentBuffer.offerLast(ne);
currentPassesThreshold = false;
}
}
示例5: process
import org.lemurproject.galago.utility.CmpUtil; //导入依赖的package包/类
@Override
public void process(ExtractedLinkIndri link) throws IOException {
// while current.url preceeds destUrl -- read on
while (current != null && CmpUtil.compare(current.url, link.destUrl) < 0) {
current = documentUrls.read();
}
if (current != null && current.url.equals(link.destUrl)) {
link.destName = current.identifier;
link.filePath = current.filePath;
link.fileLocation = current.fileLocation;
}
if (acceptExternalUrls && link.destName.isEmpty()) {
link.destName = EXTERNAL_PREFIX + link.destUrl;
externalLinks.increment();
} else {
internalLinks.increment();
}
// only named destinations can be emited.
if (!link.destName.isEmpty()) {
processor.process(link);
}
}
示例6: process
import org.lemurproject.galago.utility.CmpUtil; //导入依赖的package包/类
@Override
public void process(PageRankScore docScore) throws IOException {
PageRankScore newDocScore = new PageRankScore(docScore.docName, rndJump);
// This should never happen -- but I want to be sure.
while (curr != null && CmpUtil.compare(docScore.docName, curr.docName) > 0) {
logger.log(Level.INFO, "Processing : {0}, IGNORED PARTIAL SCORE!!: {1}-{2}", new Object[]{docScore.docName, curr.docName, curr.score});
curr = partialScores.read();
}
while (curr != null && docScore.docName.equals(curr.docName)) {
totalWalk += curr.score;
newDocScore.score += curr.score;
curr = partialScores.read();
}
// now curr points to the next document.
pageCount += 1.0;
totalScore += newDocScore.score;
processor.process(newDocScore);
}
示例7: nextDocument
import org.lemurproject.galago.utility.CmpUtil; //导入依赖的package包/类
@Override
public Document nextDocument() throws IOException {
if (reader != null && iterator.isDone()) {
return null;
}
byte[] keyBytes = iterator.getKey();
// Don't go past the end of the split.
if (split.endKey != null && split.endKey.length > 0 && CmpUtil.compare(keyBytes, split.endKey) >= 0) {
return null;
}
Document document = iterator.getDocument(extractionParameters);
iterator.nextKey();
return document;
}
示例8: testCountUnigrams
import org.lemurproject.galago.utility.CmpUtil; //导入依赖的package包/类
@Test
public void testCountUnigrams() throws IOException, IncompatibleProcessorException {
WordCounter counter = new WordCounter(new FakeParameters(Parameters.create()));
Document document = new Document();
PostStep post = new PostStep();
counter.setProcessor(post);
document.terms = new ArrayList<>();
document.terms.add("one");
document.terms.add("two");
document.terms.add("one");
counter.process(document);
assertEquals(2, post.results.size());
for (int i = 0; i < post.results.size(); ++i) {
WordCount wc = post.results.get(i);
if (CmpUtil.equals(wc.word, ByteUtil.fromString("one"))) {
assertEquals(2, wc.collectionFrequency);
} else if (CmpUtil.equals(wc.word, ByteUtil.fromString("one"))) {
assertEquals(1, wc.collectionFrequency);
}
}
}
示例9: get
import org.lemurproject.galago.utility.CmpUtil; //导入依赖的package包/类
/**
* Binary search for a key, with a minimum block id.
*/
public IndexBlockInfo get(byte[] key, int minBlock) {
if (slots.isEmpty()) {
return null;
}
int big = slots.size() - 1;
int small = minBlock;
while (big - small > 1) {
int middle = small + (big - small) / 2;
byte[] middleKey = slots.get(middle).firstKey;
if (CmpUtil.compare(middleKey, key) <= 0) {
small = middle;
} else {
big = middle;
}
}
IndexBlockInfo one = slots.get(small);
IndexBlockInfo two = slots.get(big);
if (CmpUtil.compare(two.firstKey, key) <= 0) {
return two;
} else {
return one;
}
}
示例10: find
import org.lemurproject.galago.utility.CmpUtil; //导入依赖的package包/类
@Override
public void find(byte[] key) throws IOException {
// if the key is not in this block:
if ((CmpUtil.compare(this.blockInfo.firstKey, key) > 0)
|| (CmpUtil.compare(key, this.blockInfo.nextSlotKey) >= 0)) {
VocabularyReader.IndexBlockInfo newBlock = vocabulary.get(key);
this.loadBlockHeader(newBlock);
}
// since we are 'finding' the key we can move backwards in the current block
if (CmpUtil.compare(key, keyCache[keyIndex]) < 0) {
this.keyIndex = 0;
}
// now linearly scan the block to find the desired key
while (keyIndex < keyCount) {
while (keyIndex >= cacheKeyCount) {
this.cacheKeys();
}
if (CmpUtil.compare(keyCache[keyIndex], key) >= 0) {
// we have found or passed the desired key
return;
}
keyIndex++;
}
// if we got here - we have not yet found the correct key
// this function will ensure we are consistent
nextKey();
}
示例11: maxMinNormalizeTest
import org.lemurproject.galago.utility.CmpUtil; //导入依赖的package包/类
@Test
public void maxMinNormalizeTest() {
List<ScoredTerm> terms = Arrays.asList(
new ScoredTerm("foo", 3.0),
new ScoredTerm("bar", 2.0),
new ScoredTerm("baz", 1.0)
);
List<ScoredTerm> normalized = Scored.maxMinNormalize(terms);
assertEquals(1.0, normalized.get(0).score, CmpUtil.epsilon);
assertEquals(0.5, normalized.get(1).score, CmpUtil.epsilon);
assertEquals(0.0, normalized.get(2).score, CmpUtil.epsilon);
}
示例12: makeUniformTest
import org.lemurproject.galago.utility.CmpUtil; //导入依赖的package包/类
@Test
public void makeUniformTest() {
List<ScoredTerm> terms = Arrays.asList(
new ScoredTerm("foo", 3.0),
new ScoredTerm("bar", 2.0),
new ScoredTerm("baz", 1.0)
);
List<ScoredTerm> normalized = Scored.makeUniform(terms);
assertEquals(1.0, normalized.get(0).score, CmpUtil.epsilon);
assertEquals(1.0, normalized.get(1).score, CmpUtil.epsilon);
assertEquals(1.0, normalized.get(2).score, CmpUtil.epsilon);
}
示例13: testCopyStream
import org.lemurproject.galago.utility.CmpUtil; //导入依赖的package包/类
@Test
public void testCopyStream() throws IOException {
byte[] data = {0, 1, 2, 3, 4, 5};
ByteArrayInputStream input = new ByteArrayInputStream(data);
ByteArrayOutputStream output = new ByteArrayOutputStream();
StreamUtil.copyStream(input, output);
byte[] result = output.toByteArray();
assertEquals(0, CmpUtil.compare(data, result));
assertTrue(CmpUtil.equals(data, result));
}
示例14: getIterator
import org.lemurproject.galago.utility.CmpUtil; //导入依赖的package包/类
@Override
public BaseIterator getIterator(Node node) throws IOException {
if (node.getOperator().equals("counts")) {
String stem = stemAsRequired(node.getDefaultParameter());
KeyIterator ki = new KeyIterator(reader);
ki.findKey(ByteUtil.fromString(stem));
if (CmpUtil.compare(ki.getKey(), ByteUtil.fromString(stem)) == 0) {
return new BackgroundStatsIterator(ki);
}
return null;
} else {
throw new UnsupportedOperationException(
"Index doesn't support operator: " + node.getOperator());
}
}
示例15: compareTo
import org.lemurproject.galago.utility.CmpUtil; //导入依赖的package包/类
@Override
public int compareTo(ScoredDocument other) {
int cmp = CmpUtil.compare(score, other.score);
if (cmp != 0) {
return cmp;
}
if ((source != null) && (other.source != null)
&& (!source.equals(other.source))) {
return source.compareTo(other.source);
}
return CmpUtil.compare(other.document, document);
}