本文整理汇总了Java中org.apache.lucene.index.MultiFields.getLiveDocs方法的典型用法代码示例。如果您正苦于以下问题:Java MultiFields.getLiveDocs方法的具体用法?Java MultiFields.getLiveDocs怎么用?Java MultiFields.getLiveDocs使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.index.MultiFields
的用法示例。
在下文中一共展示了MultiFields.getLiveDocs方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: iterateAllDocs
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
private DocIdSetIterator iterateAllDocs()
{
IndexReader reader = searcher.getIndexReader();
final Bits liveDocs = MultiFields.getLiveDocs( reader );
final DocIdSetIterator allDocs = DocIdSetIterator.all( reader.maxDoc() );
if ( liveDocs == null )
{
return allDocs;
}
return new FilteredDocIdSetIterator( allDocs )
{
@Override
protected boolean match( int doc )
{
return liveDocs.get( doc );
}
};
}
示例2: testSearchSpeed
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
private void testSearchSpeed(ArrayList<String> images, final Class featureClass) throws IOException {
parallelIndexer = new ParallelIndexer(8, indexPath, testExtensive, true) {
@Override
public void addBuilders(ChainedDocumentBuilder builder) {
builder.addBuilder(new GenericDocumentBuilder(featureClass, "feature"));
}
};
parallelIndexer.run();
IndexReader reader = DirectoryReader.open(new RAMDirectory(FSDirectory.open(new File(indexPath)), IOContext.READONCE));
Bits liveDocs = MultiFields.getLiveDocs(reader);
double queryCount = 0d;
ImageSearcher searcher = new GenericFastImageSearcher(100, featureClass, "feature");
long ms = System.currentTimeMillis();
for (int i = 0; i < reader.maxDoc(); i++) {
if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it.
String fileName = getIDfromFileName(reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
if (queries.keySet().contains(fileName)) {
queryCount += 1d;
// ok, we've got a query here for a document ...
Document queryDoc = reader.document(i);
ImageSearchHits hits = searcher.search(queryDoc, reader);
}
}
ms = System.currentTimeMillis() - ms;
System.out.printf("%s \t %3.1f \n", featureClass.getName().substring(featureClass.getName().lastIndexOf('.')+1), (double) ms / queryCount);
}
示例3: recount
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
/**
* Internal utility: recount for a facet result node
*
* @param fresNode
* result node to be recounted
* @param docIds
* full set of matching documents.
* @throws IOException If there is a low-level I/O error.
*/
private void recount(FacetResultNode fresNode, ScoredDocIDs docIds) throws IOException {
// TODO (Facet): change from void to return the new, smaller docSet, and use
// that for the children, as this will make their intersection ops faster.
// can do this only when the new set is "sufficiently" smaller.
/* We need the category's path name in order to do its recounting.
* If it is missing, because the option to label only part of the
* facet results was exercise, we need to calculate them anyway, so
* in essence sampling with recounting spends some extra cycles for
* labeling results for which labels are not required. */
if (fresNode.label == null) {
fresNode.label = taxonomyReader.getPath(fresNode.ordinal);
}
CategoryPath catPath = fresNode.label;
Term drillDownTerm = DrillDownQuery.term(searchParams.indexingParams, catPath);
// TODO (Facet): avoid Multi*?
Bits liveDocs = MultiFields.getLiveDocs(indexReader);
int updatedCount = countIntersection(MultiFields.getTermDocsEnum(indexReader, liveDocs,
drillDownTerm.field(), drillDownTerm.bytes(),
0), docIds.iterator());
fresNode.value = updatedCount;
}
示例4: getNext
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
private boolean getNext() {
try {
int next = docsEnum.nextDoc();
if (next == DocIdSetIterator.NO_MORE_DOCS) {
return false;
}
Bits liveDocs = MultiFields.getLiveDocs(reader);
if (liveDocs != null) {
while (!liveDocs.get(docsEnum.docID())) {
next = docsEnum.nextDoc();
}
}
return next == DocIdSetIterator.NO_MORE_DOCS ? false : true;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
示例5: BabelMorphWordIterator
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
public BabelMorphWordIterator(IndexSearcher dictionary)
{
this.reader = dictionary.getIndexReader();
this.currentIndex = reader.numDocs()-1;
this.liveDocs = MultiFields.getLiveDocs(reader);
}
示例6: ValueSourceScorer
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
protected ValueSourceScorer(IndexReader reader, FunctionValues values) {
super(null);
this.reader = reader;
this.maxDoc = reader.maxDoc();
this.values = values;
setCheckDeletes(true);
this.liveDocs = MultiFields.getLiveDocs(reader);
}
示例7: DocumentInputIterator
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
/**
* Creates an iterator over term, weight and payload fields from the lucene
* index. setting <code>withPayload</code> to false, implies an iterator
* over only term and weight.
*/
public DocumentInputIterator(boolean hasPayloads, boolean hasContexts) throws IOException {
this.hasPayloads = hasPayloads;
this.hasContexts = hasContexts;
docCount = reader.maxDoc() - 1;
weightValues = (weightField != null) ? MultiDocValues.getNumericValues(reader, weightField) : null;
liveDocs = (reader.leaves().size() > 0) ? MultiFields.getLiveDocs(reader) : null;
relevantFields = getRelevantFields(new String [] {field, weightField, payloadField, contextsField});
}
示例8: buildQnANetworkMap
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
public void buildQnANetworkMap() throws IOException {
IndexReader reader = dataSearcher.getIndexReader();
Document doc = null;
Token token = null;
Bits liveDocs = MultiFields.getLiveDocs(reader);
for (int i = 0; i < reader.maxDoc(); i++) {
if (liveDocs != null && !liveDocs.get(i))
continue;
doc = reader.document(i);
long postId = doc.get("postid") != null ? Long.parseLong(doc.get("postid")) : -1;
long parentId = doc.get("parentid") != null ? Long.parseLong(doc.get("parentid")) : -1;
long userId = doc.get("userid") != null ? Long.parseLong(doc.get("userid")) : -1;
String text = doc.get("searchableText");
if (postId > 0) {
token = new Token(postId, text);
token.setFrequnecy(0);
}
postid2Tokens.put(postId, token);
if (parentId > 0) {
parentId2postIds.put(parentId, postId);
}
if (userId > 0) {
postId2userId.put(postId, userId);
}
}
}
示例9: DocumentInputIterator
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
/**
* Creates an iterator over fields from the lucene index.
*
* @param indexReader
*/
public DocumentInputIterator(final IndexReader indexReader) throws IOException {
__indexReader = indexReader;
__docCount = __indexReader.maxDoc() - 1;
__liveDocs = (__indexReader.leaves().size() > 0) ? MultiFields.getLiveDocs(__indexReader) : null;
__fieldsToLoad = new HashSet<>();
__fieldsToLoad.add(SEARCH_FIELD_TITLE);
__fieldsToLoad.add(SEARCH_FIELD_DESCRIPTION);
__fieldNames = new ArrayList<String>(__fieldsToLoad);
__fieldCount = __fieldNames.size();
}
示例10: TfIdfSearcher
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
public TfIdfSearcher() throws IOException
{
indexReader = DirectoryReader.open(FSDirectory.open(new File(Indexer.INDEX_LOC)));
liveDocs = MultiFields.getLiveDocs(indexReader);
tfidfSIM = new DefaultSimilarity();
inverseDocFreq = new HashMap<String, Float>();
tf_Idf_Weights = new HashMap<String, HashMap<Integer,Float>>();
}
示例11: facetCountsTruth
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
/** Build the "truth" with ALL the facets enumerating indexes content. */
protected Map<CategoryPath, Integer> facetCountsTruth() throws IOException {
FacetIndexingParams iParams = getFacetIndexingParams(Integer.MAX_VALUE);
String delim = String.valueOf(iParams.getFacetDelimChar());
Map<CategoryPath, Integer> res = new HashMap<CategoryPath, Integer>();
HashSet<String> handledTerms = new HashSet<String>();
for (CategoryListParams clp : iParams.getAllCategoryListParams()) {
if (!handledTerms.add(clp.field)) {
continue; // already handled this term (for another list)
}
Terms terms = MultiFields.getTerms(indexReader, clp.field);
if (terms == null) {
continue;
}
Bits liveDocs = MultiFields.getLiveDocs(indexReader);
TermsEnum te = terms.iterator(null);
DocsEnum de = null;
while (te.next() != null) {
de = _TestUtil.docs(random(), te, liveDocs, de, DocsEnum.FLAG_NONE);
int cnt = 0;
while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
cnt++;
}
res.put(new CategoryPath(te.term().utf8ToString().split(delim)), cnt);
}
}
return res;
}
示例12: DocumentInputIterator
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
/**
* Creates an iterator over term, weight and payload fields from the lucene
* index. setting <code>withPayload</code> to false, implies an iterator
* over only term and weight.
*/
public DocumentInputIterator(boolean hasPayloads) throws IOException {
this.hasPayloads = hasPayloads;
docCount = reader.maxDoc() - 1;
weightValues = (weightField != null) ? MultiDocValues.getNumericValues(reader, weightField) : null;
liveDocs = (reader.leaves().size() > 0) ? MultiFields.getLiveDocs(reader) : null;
relevantFields = getRelevantFields(new String [] {field, weightField, payloadField});
}
示例13: main
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
public static void main(String[] args) throws CorruptIndexException, IOException, SolrServerException {
if (args.length < 3) {
System.err.println("Usage: java -Dfile.encoding=UTF8 -Dclient.encoding.override=UTF-8 -Xmx256m -Xms256m -server " + IndexLoader.class.getName()
+ " </path/to/index> <AutoCompleteSolrUrl> <indexField1,acField1> [indexField2,acField2 ... ]");
System.exit(0);
}
Map<String,String> fieldMap = getFieldMapping(args, 2);
DirectoryReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(args[0])));
int docs = reader.maxDoc();
SolrClient solr = new ConcurrentUpdateSolrClient.Builder(args[1]).withQueueSize(10000).withThreadCount(2).build();
Set<SolrInputDocument> batch = new HashSet<SolrInputDocument>(1000);
Bits liveDocs = MultiFields.getLiveDocs(reader);
// go through all docs in the index
for (int i = 0; i < docs; i++) {
// process doc only if not deleted
if (liveDocs == null || liveDocs.get(i)) {
// loop through all fields to be looked at
SolrInputDocument doc = new SolrInputDocument();
Iterator<String> iter = fieldMap.keySet().iterator();
boolean phraseFieldEmpty = false;
while (iter.hasNext()) {
String indexField = iter.next();
String acField = fieldMap.get(indexField);
IndexableField field = reader.document(i).getField(indexField);
String value = field != null ? reader.document(i).getField(indexField).stringValue() : null;
if (field != null && value != null && !value.isEmpty()) {
doc.addField(acField, value);
} else {
// not very relevant piece of info
// System.err.println("Field is null or empty, skipping: " + indexField);
if (acField.equalsIgnoreCase("phrase")) {
System.err.println("Since AC phrase field would be null, this doc will not be created: " + reader.document(i));
phraseFieldEmpty = true;
break;
}
}
}
if (!phraseFieldEmpty) {
solr.add(doc);
if (docs % 1000 == 0) {
System.out.println("Docs: " + docs);
}
}
}
}
if (!batch.isEmpty())
solr.add(batch);
reader.close();
System.out.println("Optimizing...");
solr.optimize();
solr.close();
}
示例14: getDocuments
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
/**
* Ritorna l'elenco dei documenti sottoforma di stringhe contenuti
* nell'indice di una particolare lingua. Viene usato dall'interfaccia
* grafica del SEM GUI
*
* @param language lingua di cui si vogliono i documenti.
* @return lista dei documenti sottoforma di stringhe per popolare la JTable
* della GUI
*/
public List<String[]> getDocuments(String language) {
List<String[]> rows = new ArrayList<>();
try {
String index = getIndexFolder(language);
File fIndex = new File(index);
if (fIndex.exists()) {
IndexReader reader = DirectoryReader.open(getFolderDir(index));
final LeafReader ar = SlowCompositeReaderWrapper.wrap(reader);
Bits liveDocs = MultiFields.getLiveDocs(reader);
final int maxdoc = reader.maxDoc();
for (int i = 0; i < maxdoc; i++) {
if (liveDocs != null && !liveDocs.get(i)) {
continue;
}
Document doc = ar.document(i);
String[] row = new String[10];
row[9] = "";
row[0] = doc.get(IndexManager.UUID);
row[1] = doc.get(IndexManager.BODY);
row[2] = doc.get(IndexManager.TEXT);
String level1 = (String) intern.intern(doc.get(IndexManager.LEVEL1_NAME));
row[3] = level1;
if (level1 != null) {
String level2 = (String) intern.intern(doc.get(IndexManager.LEVEL2_NAME));
if (level2 != null) {
row[4] = level2;
String level3 = (String) intern.intern(doc.get(IndexManager.LEVEL3_NAME));
if (level3 != null) {
row[5] = level3;
String level4 = (String) intern.intern(doc.get(IndexManager.LEVEL4_NAME));
if (level4 != null) {
row[6] = level4;
String level5 = (String) intern.intern(doc.get(IndexManager.LEVEL5_NAME));
if (level5 != null) {
row[7] = level5;
String level6 = (String) intern.intern(doc.get(IndexManager.LEVEL6_NAME));
if (level6 != null) {
row[8] = level6;
}
}
}
}
}
}
if (i % 1000 == 0) {
LogGui.info("Read Progress... " + i);
}
rows.add(row);
}
reader.close();
}
} catch (Exception e) {
LogGui.printException(e);
}
return rows;
}
示例15: getDocumentsExcel
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
/**
* Popola un excel con il contenuto dell'indice
*
* @since 1.2
* @param language lingua indice
* @param sheetResults excel da popolare
* @param c1 valori della colonna kpi1
* @param c2 valori della colonna kpi2
*/
public void getDocumentsExcel(String language, SXSSFSheet sheetResults, HashMap<String, String> c1, HashMap<String, String> c2) {
try {
int rownum = 1;
String index = getIndexFolder(language);
IndexReader reader = DirectoryReader.open(getFolderDir(index));
final LeafReader ar = SlowCompositeReaderWrapper.wrap(reader);
Bits liveDocs = MultiFields.getLiveDocs(reader);
final int maxdoc = reader.maxDoc();
for (int i = 0; i < maxdoc; i++) {
if (liveDocs != null && !liveDocs.get(i)) {
continue;
}
Document doc = ar.document(i);
SXSSFRow row = sheetResults.createRow(rownum++);
String text = doc.get(IndexManager.TEXT);
if (text == null) {
text = "";
}
row.createCell(6).setCellValue(text);
row.createCell(7).setCellValue(doc.get(IndexManager.BODY));
String id = doc.get(IndexManager.UUID);
String c1v = c1.get(id);
String c2v = c2.get(id);
if (c1v != null) {
row.createCell(8).setCellValue(c1v);
}
if (c2v != null) {
row.createCell(9).setCellValue(c2v);
}
String level1 = (String) intern.intern(doc.get(IndexManager.LEVEL1_NAME));
row.createCell(0).setCellValue(level1);
if (level1 != null) {
String level2 = (String) intern.intern(doc.get(IndexManager.LEVEL2_NAME));
if (level2 != null) {
row.createCell(1).setCellValue(level2);
String level3 = (String) intern.intern(doc.get(IndexManager.LEVEL3_NAME));
if (level3 != null) {
row.createCell(2).setCellValue(level3);
String level4 = (String) intern.intern(doc.get(IndexManager.LEVEL4_NAME));
if (level4 != null) {
row.createCell(3).setCellValue(level4);
String level5 = (String) intern.intern(doc.get(IndexManager.LEVEL5_NAME));
if (level5 != null) {
row.createCell(4).setCellValue(level5);
String level6 = (String) intern.intern(doc.get(IndexManager.LEVEL6_NAME));
if (level6 != null) {
row.createCell(5).setCellValue(level6);
}
}
}
}
}
}
if (i % 1000 == 0) {
LogGui.info("Read Progress... " + i);
}
}
reader.close();
} catch (Exception e) {
LogGui.printException(e);
}
}