本文整理汇总了Java中org.apache.lucene.index.TermDocs.freq方法的典型用法代码示例。如果您正苦于以下问题:Java TermDocs.freq方法的具体用法?Java TermDocs.freq怎么用?Java TermDocs.freq使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.index.TermDocs
的用法示例。
在下文中一共展示了TermDocs.freq方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: filter
import org.apache.lucene.index.TermDocs; //导入方法依赖的package包/类
public boolean filter(Term t)
{
int freq = 0;
TermDocs tDocs;
try {
tDocs = indexReader.termDocs(t);
while( tDocs.next() ){
freq += tDocs.freq();
}
if( freq < minFreq ){
return false;
}
}
catch(Exception e)
{
e.printStackTrace();
}
return true;
}
示例2: getTermPostings
import org.apache.lucene.index.TermDocs; //导入方法依赖的package包/类
/**
* Devuelve los postings de un término dado
*
* @param term Termino a buscar para devolver sus postings
* @return lista de postings de un termino
*/
@Override
public List<Posting> getTermPostings(String term) {
ArrayList<Posting> postingList = new ArrayList<>();
try {
TermDocs termDocs = ireader.termDocs(new Term("content", term));
TermPositions termPositions = ireader.termPositions(new Term("content", term));
//si se usa seek termDocs se borra
//termDocs.seek(new Term(term));
while(termDocs.next()) {
int docId = termDocs.doc();
int freq = termDocs.freq();
ArrayList<Long> positions = new ArrayList<>();
while (termPositions.next()) {
positions.add((long)termPositions.nextPosition());
}
Posting p = new Posting(docId + "", freq, positions);
postingList.add(p);
}
return postingList;
} catch (IOException ex) {
Logger.getLogger(LuceneIndexing.class.getName()).log(Level.SEVERE, null, ex);
}
return postingList;
}
示例3: getTotalTermFreq
import org.apache.lucene.index.TermDocs; //导入方法依赖的package包/类
/**
* 해당 단어가 들어간 문서들에서 모든 빈도 수 (문서의 수가 아닌)를 계산합니다.
*
* @param reader 인덱스 리더
* @param term 단어
* @return 단어의 총 빈도 수 ( 문서 내의 모든 빈도 수의 합 )
* @throws Exception
*/
public static long getTotalTermFreq(IndexReader reader, Term term) throws Exception {
long totalTermFreq = 0;
TermDocs docs = reader.termDocs(term);
while (docs.next()) {
totalTermFreq += docs.freq();
}
return totalTermFreq;
}
示例4: accept
import org.apache.lucene.index.TermDocs; //导入方法依赖的package包/类
@Override
public void accept(TermDocs termDocs) throws IOException {
if (termDocs == null)
return;
while (termDocs.next())
freq += termDocs.freq();
}
示例5: dumpTermFreqs
import org.apache.lucene.index.TermDocs; //导入方法依赖的package包/类
private static void dumpTermFreqs(IndexReader indexReader,
DocNumMap docNumMap, String[] fields,
Writer out)
throws IOException
{
TermDocs docs = indexReader.termDocs();
// Iterate every field.
for (int i = 0; i < fields.length; i++)
{
// Iterate all the terms for this field.
TermEnum terms = indexReader.terms(new Term(fields[i], ""));
while (terms.next())
{
Term t = terms.term();
if (!t.field().equals(fields[i]))
break;
// Skip bi-grams
String text = t.text();
if (text.indexOf("~") >= 0)
continue;
// Skip empty terms (there shouldn't be any though)
if (text.length() == 0)
continue;
// Skip special start/end of field marks (normal terms will also
// be present, without the marks.) Also skip element and attribute
// markers.
//
char c = text.charAt(0);
if (c == Constants.FIELD_START_MARKER ||
c == Constants.ELEMENT_MARKER ||
c == Constants.ATTRIBUTE_MARKER)
{
continue;
}
c = text.charAt(text.length() - 1);
if (c == Constants.FIELD_END_MARKER ||
c == Constants.ELEMENT_MARKER ||
c == Constants.ATTRIBUTE_MARKER)
{
continue;
}
// Okay, we have a live one. Accumulate the total occurrences of
// the term in all documents. For the benefit of the 'text' field,
// accumulate chunk counts into the main document.
//
int prevMainDoc = -1;
int docFreq = 0;
docs.seek(terms);
int termFreq = 0;
while (docs.next())
{
int mainDoc = docs.doc();
if (t.field().equals("text"))
mainDoc = docNumMap.getDocNum(docs.doc());
if (mainDoc != prevMainDoc) {
++docFreq;
prevMainDoc = mainDoc;
}
termFreq += docs.freq();
}
// Output the results.
out.write(
fields[i] + "|" + docFreq + "|" + termFreq + "|" + t.text() + "\n");
} // while
} // for i
}
示例6: getCollectionInfo
import org.apache.lucene.index.TermDocs; //导入方法依赖的package包/类
/**
* Este método obtiene la relación de ocurrencia de los términos en el
* índice de la colección especificada.
*
* @return relación documentos por término
*
* @throws IndexException si ocurre una error el el proceso de obtención de
* los términos de la colección.
*/
public CollectionInfo getCollectionInfo() throws IndexException {
try {
this.indexLSIPath = new File(defaultIndexLSIPath);
this.directory = FSDirectory.open(this.indexLSIPath);
if (IndexReader.indexExists(this.directory)) {
// se verifica que exista un índice en el directorio especificado
this.reader = IndexReader.open(this.directory);
TermEnum terms = this.reader.terms(); // se obtienen todos los términos del índice de la colección
Map<TermInfo, List<DocTermInfo>> termsMap = new HashMap<TermInfo, List<DocTermInfo>>();
List<DocTermInfo> list;
Term termItem;
TermDocs docs;
int docsCount = 0, termsCount = 0;
docs = this.reader.termDocs();
Document doc;
List<String> termsList = new ArrayList<String>();
Set<Integer> docsIds = new HashSet<Integer>();
docsCount = this.reader.numDocs();
Map<Integer, Integer> docsMap = new HashMap<Integer, Integer>();
List<DocInfo> docInfoList = new ArrayList<DocInfo>(docsCount);
String name, filePath;
int index = 0;
for (int i = 0; i < docsCount; i += 2) {
doc = this.reader.document(i);
name = doc.get("name");
filePath = doc.get("filepath");
docInfoList.add(new DocInfo(name, filePath));
docsMap.put(i + 1, index);
index++;
}
docsMap.remove(docsCount + 1);
while (terms.next()) {
termItem = terms.term();
list = new ArrayList<DocTermInfo>();
docs = this.reader.termDocs(termItem);
while (docs.next()) {
int docNum = docs.doc();
if (!(docNum % 2 == 0)) {
doc = this.reader.document(docNum);
int termFreq = docs.freq();
list.add(new DocTermInfo(docsMap.get(docNum), termFreq));
docsIds.add(docNum);
}
}
if (!list.isEmpty()) {
termsMap.put(new TermInfo(termsCount, termItem.text(), reader.docFreq(termItem)), list);
termsList.add(termItem.text());
termsCount++;
}
}
return new CollectionInfo(termsMap, "Apache Lucene", termsList, docInfoList, singularValue);
} else {
throw new IndexException("Index invalid. Not exist index in the directory: " + defaultIndexLSIPath);
}
} catch (IOException ex) {
throw new IndexException(ex.getMessage());
}
}