本文整理汇总了Java中org.apache.lucene.index.MultiFields.getTermDocsEnum方法的典型用法代码示例。如果您正苦于以下问题:Java MultiFields.getTermDocsEnum方法的具体用法?Java MultiFields.getTermDocsEnum怎么用?Java MultiFields.getTermDocsEnum使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.index.MultiFields
的用法示例。
在下文中一共展示了MultiFields.getTermDocsEnum方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getFeatures
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
static double[] getFeatures(IndexReader ir, String fieldName, BytesRef rawPhrase, int docId, int docSize, int numDocs, boolean inc)
throws IOException {
PostingsEnum de = MultiFields.getTermDocsEnum(ir, fieldName, rawPhrase);
int ret = de.advance(docId);
if(ret == PostingsEnum.NO_MORE_DOCS){
throw new RuntimeException("no more docs...");
}
else{
int freq = de.freq();
if(freq < 2) return null;
PostingsEnum pe = MultiFields.getTermPositionsEnum(ir, fieldName, rawPhrase);
int ret2 = pe.advance(docId);
if(ret2 == PostingsEnum.NO_MORE_DOCS){
throw new RuntimeException("no more docs...");
}
else{
double[] features = new double[2];
int pos = pe.nextPosition();
int docFreq = ir.docFreq(new Term(fieldName, rawPhrase));
if(inc){
docFreq++;
numDocs++;
}
features[0] = Commons.calcTfIdf(freq, docSize, docFreq, numDocs);
features[1] = Commons.calcFirstOccurrence(pos, docSize);
return features;
}
}
}
示例2: getDoc
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
private Document getDoc(String s, IndexReader reader) throws IOException {
//TODO: normalize s?
BytesRef bytesRef = new BytesRef(s);
PostingsEnum docsEnum = MultiFields.getTermDocsEnum(reader,
SyntacticSynsConfig.getSynsTargetFieldName(), bytesRef);
if (docsEnum == null) {
//couldn't find search term
return null;
}
int i = 0;
int tmpDocID = docsEnum.nextDoc();
int docID = -1;
while (tmpDocID != PostingsEnum.NO_MORE_DOCS) {
docID = tmpDocID;
tmpDocID = docsEnum.nextDoc();
i++;
}
if (i > 1) {
//TODO: log or do something "there should only be one key term!"
}
if (docID > -1) {
System.out.println(docID);
return reader.document(docID);
}
return null;
}
示例3: buildModel
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
static KEAModel buildModel(Map<String, Set<String>> knownKeyphrases) throws IOException {
Directory indexDir = Commons.getLuceneDirectory(LUCENE_INDEX_DIR);
IndexReader ir = DirectoryReader.open(indexDir);
KEAModel model = new KEAModel(ir, knownKeyphrases);
try{
for(int n = 1; n <= 3; n++){
System.out.printf("%s : building %d-gram model\n", new Date().toString(), n);
String fieldName = Commons.getFieldName(FIELD_NAME, n);
Terms terms = MultiFields.getTerms(ir, fieldName);
TermsEnum te = terms.iterator();
for(BytesRef rawPhrase = te.next(); rawPhrase != null; rawPhrase = te.next()){
String phrase = rawPhrase.utf8ToString();
// use KEAStopFilter instead
//if(stopWords(phrase, n)) continue;
//System.out.printf("%s ", phrase);
PostingsEnum de = MultiFields.getTermDocsEnum(ir, fieldName, rawPhrase);
while(de.nextDoc() != PostingsEnum.NO_MORE_DOCS){
int docId = de.docID();
int freq = de.freq();
// Let's consider only terms that occurs more than one time in the document
// KEA papers said "To reduce the size of the training set, we discard any phrase that occurs only once in the document."
if(freq > 1){
PostingsEnum pe = MultiFields.getTermPositionsEnum(ir, fieldName, rawPhrase);
int ret = pe.advance(docId);
if(ret == PostingsEnum.NO_MORE_DOCS){
System.out.printf("(NO_MORE_DOCS) %d\n", docId);
}
else{
// get first position of the term in the doc (first occurrence)
int pos = pe.nextPosition();
model.add(docId, fieldName, phrase, freq, pos);
}
}
}
}
}
}
finally{
IOUtils.closeWhileHandlingException(ir);
}
return model;
}
示例4: getOrdinal
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
@Override
public int getOrdinal(FacetLabel cp) throws IOException {
ensureOpen();
if (cp.length == 0) {
return ROOT_ORDINAL;
}
// First try to find the answer in the LRU cache:
synchronized (ordinalCache) {
Integer res = ordinalCache.get(cp);
if (res != null) {
if (res.intValue() < indexReader.maxDoc()) {
// Since the cache is shared with DTR instances allocated from
// doOpenIfChanged, we need to ensure that the ordinal is one that
// this DTR instance recognizes.
return res.intValue();
} else {
// if we get here, it means that the category was found in the cache,
// but is not recognized by this TR instance. Therefore there's no
// need to continue search for the path on disk, because we won't find
// it there too.
return TaxonomyReader.INVALID_ORDINAL;
}
}
}
// If we're still here, we have a cache miss. We need to fetch the
// value from disk, and then also put it in the cache:
int ret = TaxonomyReader.INVALID_ORDINAL;
DocsEnum docs = MultiFields.getTermDocsEnum(indexReader, null, Consts.FULL, new BytesRef(FacetsConfig.pathToString(cp.components, cp.length)), 0);
if (docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
ret = docs.docID();
// we only store the fact that a category exists, not its inexistence.
// This is required because the caches are shared with new DTR instances
// that are allocated from doOpenIfChanged. Therefore, if we only store
// information about found categories, we cannot accidently tell a new
// generation of DTR that a category does not exist.
synchronized (ordinalCache) {
ordinalCache.put(cp, Integer.valueOf(ret));
}
}
return ret;
}
示例5: getOrdinal
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
@Override
public int getOrdinal(CategoryPath cp) throws IOException {
ensureOpen();
if (cp.length == 0) {
return ROOT_ORDINAL;
}
// First try to find the answer in the LRU cache:
synchronized (ordinalCache) {
Integer res = ordinalCache.get(cp);
if (res != null) {
if (res.intValue() < indexReader.maxDoc()) {
// Since the cache is shared with DTR instances allocated from
// doOpenIfChanged, we need to ensure that the ordinal is one that
// this DTR instance recognizes.
return res.intValue();
} else {
// if we get here, it means that the category was found in the cache,
// but is not recognized by this TR instance. Therefore there's no
// need to continue search for the path on disk, because we won't find
// it there too.
return TaxonomyReader.INVALID_ORDINAL;
}
}
}
// If we're still here, we have a cache miss. We need to fetch the
// value from disk, and then also put it in the cache:
int ret = TaxonomyReader.INVALID_ORDINAL;
DocsEnum docs = MultiFields.getTermDocsEnum(indexReader, null, Consts.FULL, new BytesRef(cp.toString(delimiter)), 0);
if (docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
ret = docs.docID();
// we only store the fact that a category exists, not its inexistence.
// This is required because the caches are shared with new DTR instances
// that are allocated from doOpenIfChanged. Therefore, if we only store
// information about found categories, we cannot accidently tell a new
// generation of DTR that a category does not exist.
synchronized (ordinalCache) {
ordinalCache.put(cp, Integer.valueOf(ret));
}
}
return ret;
}