当前位置: 首页>>代码示例>>Java>>正文


Java TermDocs.freq方法代码示例

本文整理汇总了Java中org.apache.lucene.index.TermDocs.freq方法的典型用法代码示例。如果您正苦于以下问题:Java TermDocs.freq方法的具体用法?Java TermDocs.freq怎么用?Java TermDocs.freq使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.index.TermDocs的用法示例。


在下文中一共展示了TermDocs.freq方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: filter

import org.apache.lucene.index.TermDocs; //导入方法依赖的package包/类
public boolean filter(Term t) 
{
	int freq = 0;
	TermDocs tDocs;
	
	try {
	    tDocs = indexReader.termDocs(t);
	    while( tDocs.next() ){
	      freq += tDocs.freq();
	    }
	    if( freq < minFreq ){
	      return false;
	    }
	}
	catch(Exception e)
	{
		e.printStackTrace();
	}
	
    return true;		
}
 
开发者ID:semanticvectors,项目名称:semanticvectors,代码行数:22,代码来源:TermFreqFilter.java

示例2: getTermPostings

import org.apache.lucene.index.TermDocs; //导入方法依赖的package包/类
/**
 * Devuelve los postings de un término dado
 * 
 * @param term Termino a buscar para devolver sus postings
 * @return lista de postings de un termino
 */
@Override
public List<Posting> getTermPostings(String term) {
    ArrayList<Posting> postingList = new ArrayList<>();
    try {
       
        TermDocs termDocs = ireader.termDocs(new Term("content", term));
        TermPositions termPositions = ireader.termPositions(new Term("content", term));
        //si se usa seek termDocs se borra
        //termDocs.seek(new Term(term));
       
        while(termDocs.next()) {
           
            int docId = termDocs.doc();
            int freq = termDocs.freq();
            ArrayList<Long> positions = new ArrayList<>();
            while (termPositions.next()) {
                positions.add((long)termPositions.nextPosition());
            }
            Posting p = new Posting(docId + "", freq, positions);
            postingList.add(p);
        }
        return postingList;
    } catch (IOException ex) {
        Logger.getLogger(LuceneIndexing.class.getName()).log(Level.SEVERE, null, ex);
    }
    return postingList;
}
 
开发者ID:garnachod,项目名称:mineria2,代码行数:34,代码来源:LuceneIndexing.java

示例3: getTotalTermFreq

import org.apache.lucene.index.TermDocs; //导入方法依赖的package包/类
/**
 * 해당 단어가 들어간 문서들에서 모든 빈도 수 (문서의 수가 아닌)를 계산합니다.
 *
 * @param reader 인덱스 리더
 * @param term   단어
 * @return 단어의 총 빈도 수 ( 문서 내의 모든 빈도 수의 합 )
 * @throws Exception
 */
public static long getTotalTermFreq(IndexReader reader, Term term) throws Exception {
    long totalTermFreq = 0;
    TermDocs docs = reader.termDocs(term);
    while (docs.next()) {
        totalTermFreq += docs.freq();
    }
    return totalTermFreq;
}
 
开发者ID:debop,项目名称:lucene-korean,代码行数:17,代码来源:HighFreqTerms.java

示例4: accept

import org.apache.lucene.index.TermDocs; //导入方法依赖的package包/类
@Override
public void accept(TermDocs termDocs) throws IOException {
	if (termDocs == null)
		return;
	while (termDocs.next())
		freq += termDocs.freq();
}
 
开发者ID:jaeksoft,项目名称:opensearchserver,代码行数:8,代码来源:SuggestionItem.java

示例5: dumpTermFreqs

import org.apache.lucene.index.TermDocs; //导入方法依赖的package包/类
private static void dumpTermFreqs(IndexReader indexReader,
                                  DocNumMap docNumMap, String[] fields,
                                  Writer out)
  throws IOException 
{
  TermDocs docs = indexReader.termDocs();

  // Iterate every field.
  for (int i = 0; i < fields.length; i++) 
  {
    // Iterate all the terms for this field.
    TermEnum terms = indexReader.terms(new Term(fields[i], ""));
    while (terms.next()) 
    {
      Term t = terms.term();
      if (!t.field().equals(fields[i]))
        break;

      // Skip bi-grams
      String text = t.text();
      if (text.indexOf("~") >= 0)
        continue;

      // Skip empty terms (there shouldn't be any though) 
      if (text.length() == 0)
        continue;

      // Skip special start/end of field marks (normal terms will also
      // be present, without the marks.) Also skip element and attribute
      // markers.
      //
      char c = text.charAt(0);
      if (c == Constants.FIELD_START_MARKER ||
          c == Constants.ELEMENT_MARKER ||
          c == Constants.ATTRIBUTE_MARKER) 
      {
        continue;
      }

      c = text.charAt(text.length() - 1);
      if (c == Constants.FIELD_END_MARKER ||
          c == Constants.ELEMENT_MARKER ||
          c == Constants.ATTRIBUTE_MARKER) 
      {
        continue;
      }

      // Okay, we have a live one. Accumulate the total occurrences of 
      // the term in all documents. For the benefit of the 'text' field,
      // accumulate chunk counts into the main document.
      //
      int prevMainDoc = -1;
      int docFreq = 0;
      docs.seek(terms);
      int termFreq = 0;
      while (docs.next()) 
      {
        int mainDoc = docs.doc();
        if (t.field().equals("text"))
          mainDoc = docNumMap.getDocNum(docs.doc());
        if (mainDoc != prevMainDoc) {
          ++docFreq;
          prevMainDoc = mainDoc;
        }
        termFreq += docs.freq();
      }

      // Output the results.
      out.write(
        fields[i] + "|" + docFreq + "|" + termFreq + "|" + t.text() + "\n");
    } // while
  } // for i
}
 
开发者ID:CDLUC3,项目名称:dash-xtf,代码行数:74,代码来源:IndexDump.java

示例6: getCollectionInfo

import org.apache.lucene.index.TermDocs; //导入方法依赖的package包/类
/**
 * Este método obtiene la relación de ocurrencia de los términos en el
 * índice de la colección especificada.
 *
 * @return relación documentos por término
 *
 * @throws IndexException si ocurre una error el el proceso de obtención de
 * los términos de la colección.
 */
public CollectionInfo getCollectionInfo() throws IndexException {
    try {
        this.indexLSIPath = new File(defaultIndexLSIPath);
        this.directory = FSDirectory.open(this.indexLSIPath);
        if (IndexReader.indexExists(this.directory)) {
            // se verifica que exista un índice en el directorio especificado
            this.reader = IndexReader.open(this.directory);
            TermEnum terms = this.reader.terms(); // se obtienen todos los términos del índice de la colección

            Map<TermInfo, List<DocTermInfo>> termsMap = new HashMap<TermInfo, List<DocTermInfo>>();
            List<DocTermInfo> list;
            Term termItem;
            TermDocs docs;
            int docsCount = 0, termsCount = 0;
            docs = this.reader.termDocs();
            Document doc;
            List<String> termsList = new ArrayList<String>();
            Set<Integer> docsIds = new HashSet<Integer>();
            docsCount = this.reader.numDocs();
            Map<Integer, Integer> docsMap = new HashMap<Integer, Integer>();
            List<DocInfo> docInfoList = new ArrayList<DocInfo>(docsCount);
            String name, filePath;
            int index = 0;
            for (int i = 0; i < docsCount; i += 2) {
                doc = this.reader.document(i);
                name = doc.get("name");
                filePath = doc.get("filepath");
                docInfoList.add(new DocInfo(name, filePath));
                docsMap.put(i + 1, index);
                index++;
            }

            docsMap.remove(docsCount + 1);

            while (terms.next()) {
                termItem = terms.term();
                list = new ArrayList<DocTermInfo>();
                docs = this.reader.termDocs(termItem);
                while (docs.next()) {
                    int docNum = docs.doc();
                    if (!(docNum % 2 == 0)) {
                        doc = this.reader.document(docNum);
                        int termFreq = docs.freq();
                        list.add(new DocTermInfo(docsMap.get(docNum), termFreq));
                        docsIds.add(docNum);
                    }
                }

                if (!list.isEmpty()) {
                    termsMap.put(new TermInfo(termsCount, termItem.text(), reader.docFreq(termItem)), list);
                    termsList.add(termItem.text());
                    termsCount++;
                }
            }
            return new CollectionInfo(termsMap, "Apache Lucene", termsList, docInfoList, singularValue);
        } else {
            throw new IndexException("Index invalid. Not exist index in the directory: " + defaultIndexLSIPath);
        }

    } catch (IOException ex) {
        throw new IndexException(ex.getMessage());
    }

}
 
开发者ID:jcrcano,项目名称:DrakkarKeel,代码行数:74,代码来源:LuceneContext.java


注:本文中的org.apache.lucene.index.TermDocs.freq方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。