本文整理汇总了Java中org.apache.lucene.index.MultiFields.getFields方法的典型用法代码示例。如果您正苦于以下问题:Java MultiFields.getFields方法的具体用法?Java MultiFields.getFields怎么用?Java MultiFields.getFields使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.index.MultiFields
的用法示例。
在下文中一共展示了MultiFields.getFields方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: assertNormsEquals
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
/**
* checks that norms are the same across all fields
*/
public void assertNormsEquals(String info, IndexReader leftReader, IndexReader rightReader) throws IOException {
Fields leftFields = MultiFields.getFields(leftReader);
Fields rightFields = MultiFields.getFields(rightReader);
// Fields could be null if there are no postings,
// but then it must be null for both
if (leftFields == null || rightFields == null) {
assertNull(info, leftFields);
assertNull(info, rightFields);
return;
}
for (String field : leftFields) {
NumericDocValues leftNorms = MultiDocValues.getNormValues(leftReader, field);
NumericDocValues rightNorms = MultiDocValues.getNormValues(rightReader, field);
if (leftNorms != null && rightNorms != null) {
assertDocValuesEquals(info, leftReader.maxDoc(), leftNorms, rightNorms);
} else {
assertNull(info, leftNorms);
assertNull(info, rightNorms);
}
}
}
示例2: getFirstMatch
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
protected int getFirstMatch(IndexReader r, Term t) throws IOException {
Fields fields = MultiFields.getFields(r);
if (fields == null) return -1;
Terms terms = fields.terms(t.field());
if (terms == null) return -1;
BytesRef termBytes = t.bytes();
final TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(termBytes)) {
return -1;
}
DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null, DocsEnum.FLAG_NONE);
int id = docs.nextDoc();
if (id != DocIdSetIterator.NO_MORE_DOCS) {
int next = docs.nextDoc();
assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
}
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
示例3: LuceneUtils
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
/**
* @param flagConfig Contains all information necessary for configuring LuceneUtils.
* {@link FlagConfig#luceneindexpath()} must be non-empty.
*/
public LuceneUtils(FlagConfig flagConfig) throws IOException {
if (flagConfig.luceneindexpath().isEmpty()) {
throw new IllegalArgumentException(
"-luceneindexpath is a required argument for initializing LuceneUtils instance.");
}
this.compositeReader = DirectoryReader.open(
FSDirectory.open(FileSystems.getDefault().getPath(flagConfig.luceneindexpath())));
this.leafReader = SlowCompositeReaderWrapper.wrap(compositeReader);
MultiFields.getFields(compositeReader);
this.flagConfig = flagConfig;
if (!flagConfig.stoplistfile().isEmpty())
loadStopWords(flagConfig.stoplistfile());
if (!flagConfig.startlistfile().isEmpty())
loadStartWords(flagConfig.startlistfile());
VerbatimLogger.info("Initialized LuceneUtils from Lucene index in directory: " + flagConfig.luceneindexpath() + "\n");
VerbatimLogger.info("Fields in index are: " + String.join(", ", this.getFieldNames()) + "\n");
}
示例4: getIdfs
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
/**
*
* @param reader
* @return Map of term and its inverse document frequency
*
* @throws IOException
*/
public Map<String, Float> getIdfs(IndexReader reader) throws IOException
{
Fields fields = MultiFields.getFields(reader); //get the fields of the index
for (String field: fields)
{
TermsEnum termEnum = MultiFields.getTerms(reader, field).iterator(null);
BytesRef bytesRef;
while ((bytesRef = termEnum.next()) != null)
{
if (termEnum.seekExact(bytesRef))
{
String term = bytesRef.utf8ToString();
float idf = tfidfSIM.idf( termEnum.docFreq(), reader.numDocs() );
inverseDocFreq.put(term, idf);
System.out.println(term +" idf= "+ idf);
}
}
}
return inverseDocFreq;
}
示例5: getFirstMatch
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
protected int getFirstMatch(IndexReader r, Term t) throws IOException {
Fields fields = MultiFields.getFields(r);
if (fields == null) return -1;
Terms terms = fields.terms(t.field());
if (terms == null) return -1;
BytesRef termBytes = t.bytes();
final TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(termBytes, false)) {
return -1;
}
DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null, DocsEnum.FLAG_NONE);
int id = docs.nextDoc();
if (id != DocIdSetIterator.NO_MORE_DOCS) {
int next = docs.nextDoc();
assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
}
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
示例6: generateFields
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
/**
* Here we could go overboard and use a pre-generated indexed random document for a given Item,
* but for now we'd prefer to simply return the id as the content of the document and that for
* every field.
*/
private static Fields generateFields(String[] fieldNames, String text) throws IOException {
MemoryIndex index = new MemoryIndex();
for (String fieldName : fieldNames) {
index.addField(fieldName, text, new WhitespaceAnalyzer());
}
return MultiFields.getFields(index.createSearcher().getIndexReader());
}
示例7: dummy
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
private void dummy() throws IOException {
Fields fields = MultiFields.getFields(this.reader);
Terms terms = fields.terms("field");
TermsEnum iterator = terms.iterator(null);
BytesRef byteRef = null;
while ((byteRef = iterator.next()) != null) {
String term = new String(byteRef.bytes, byteRef.offset,
byteRef.length);
Term termInstance = new Term("tokens", term);
long termFreq = this.reader.totalTermFreq(termInstance);
this.TermFreqMap.put(term, termFreq);
System.out.println(termFreq);
}
}
示例8: searchGenesInVcfFiles
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
public Set<String> searchGenesInVcfFiles(String gene, List<VcfFile> vcfFiles)
throws IOException {
if (CollectionUtils.isEmpty(vcfFiles)) {
return Collections.emptySet();
}
BooleanQuery.Builder builder = new BooleanQuery.Builder();
PrefixQuery geneIdPrefixQuery = new PrefixQuery(
new Term(FeatureIndexFields.GENE_ID.getFieldName(), gene.toLowerCase()));
PrefixQuery geneNamePrefixQuery = new PrefixQuery(
new Term(FeatureIndexFields.GENE_NAME.getFieldName(), gene.toLowerCase()));
BooleanQuery.Builder geneIdOrNameQuery = new BooleanQuery.Builder();
geneIdOrNameQuery.add(geneIdPrefixQuery, BooleanClause.Occur.SHOULD);
geneIdOrNameQuery.add(geneNamePrefixQuery, BooleanClause.Occur.SHOULD);
builder.add(geneIdOrNameQuery.build(), BooleanClause.Occur.MUST);
BooleanQuery query = builder.build();
Set<String> geneIds = new HashSet<>();
SimpleFSDirectory[] indexes = fileManager.getIndexesForFiles(vcfFiles);
try (MultiReader reader = openMultiReader(indexes)) {
if (reader.numDocs() == 0) {
return Collections.emptySet();
}
if (StringUtils.isEmpty(gene)) {
Fields fields = MultiFields.getFields(reader);
fetchTermValues(geneIds, fields, FeatureIndexFields.GENE_ID.getFieldName());
fetchTermValues(geneIds, fields, FeatureIndexFields.GENE_NAME.getFieldName());
} else {
IndexSearcher searcher = new IndexSearcher(reader);
final TopDocs docs = searcher.search(query, reader.numDocs());
final ScoreDoc[] hits = docs.scoreDocs;
geneIds = fetchGeneIds(hits, searcher);
}
} catch (IOException e) {
LOGGER.error(getMessage(MessagesConstants.ERROR_FEATURE_INDEX_SEARCH_FAILED), e);
return Collections.emptySet();
}
return geneIds;
}
示例9: testReadTokens
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
/**
* Test ReadTokensTask
*/
public void testReadTokens() throws Exception {
// We will call ReadTokens on this many docs
final int NUM_DOCS = 20;
// Read tokens from first NUM_DOCS docs from Reuters and
// then build index from the same docs
String algLines1[] = {
"# ----- properties ",
"analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer",
"content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
"docs.file=" + getReuters20LinesFile(),
"# ----- alg ",
"{ReadTokens}: " + NUM_DOCS,
"ResetSystemErase",
"CreateIndex",
"{AddDoc}: " + NUM_DOCS,
"CloseIndex",
};
// Run algo
Benchmark benchmark = execBenchmark(algLines1);
List<TaskStats> stats = benchmark.getRunData().getPoints().taskStats();
// Count how many tokens all ReadTokens saw
int totalTokenCount1 = 0;
for (final TaskStats stat : stats) {
if (stat.getTask().getName().equals("ReadTokens")) {
totalTokenCount1 += stat.getCount();
}
}
// Separately count how many tokens are actually in the index:
IndexReader reader = DirectoryReader.open(benchmark.getRunData().getDirectory());
assertEquals(NUM_DOCS, reader.numDocs());
int totalTokenCount2 = 0;
Fields fields = MultiFields.getFields(reader);
for (String fieldName : fields) {
if (fieldName.equals(DocMaker.ID_FIELD) || fieldName.equals(DocMaker.DATE_MSEC_FIELD) || fieldName.equals(DocMaker.TIME_SEC_FIELD)) {
continue;
}
Terms terms = fields.terms(fieldName);
if (terms == null) {
continue;
}
TermsEnum termsEnum = terms.iterator(null);
DocsEnum docs = null;
while(termsEnum.next() != null) {
docs = TestUtil.docs(random(), termsEnum, MultiFields.getLiveDocs(reader), docs, DocsEnum.FLAG_FREQS);
while(docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
totalTokenCount2 += docs.freq();
}
}
}
reader.close();
// Make sure they are the same
assertEquals(totalTokenCount1, totalTokenCount2);
}
示例10: getTfIdfs
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
public Map<String, HashMap<Integer, Float>> getTfIdfs() throws IOException
{
float tf, idf, tfidf_score;
Fields fields = MultiFields.getFields(indexReader); //get the fields of the index
for (String field: fields)
{
TermsEnum termEnum = MultiFields.getTerms(indexReader, field).iterator(null);
BytesRef bytesRef;
while ((bytesRef = termEnum.next()) != null)
{
if (termEnum.seekExact(bytesRef))
{
String term = bytesRef.utf8ToString();
idf = tfidfSIM.idf( termEnum.docFreq(), indexReader.numDocs() );
inverseDocFreq.put(term, idf);
System.out.println("Term = "+term);
//System.out.println("idf= "+ idf);
HashMap<Integer,Float> docTfIdf = new HashMap<Integer,Float>();
DocsEnum docsEnum = termEnum.docs(liveDocs, null);
if (docsEnum != null)
{
int doc;
while((doc = docsEnum.nextDoc())!=DocIdSetIterator.NO_MORE_DOCS)
{
tf = tfidfSIM.tf(docsEnum.freq());
tfidf_score = tf*idf;
docTfIdf.put(docsEnum.docID(), tfidf_score);
System.out.println("doc= "+ docsEnum.docID()+" tfidf_score= " + tfidf_score);
}
tf_Idf_Weights.put(term, docTfIdf);
}
}
}
}
return tf_Idf_Weights;
}
示例11: testReadTokens
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
/**
* Test ReadTokensTask
*/
public void testReadTokens() throws Exception {
// We will call ReadTokens on this many docs
final int NUM_DOCS = 20;
// Read tokens from first NUM_DOCS docs from Reuters and
// then build index from the same docs
String algLines1[] = {
"# ----- properties ",
"analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer",
"content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
"docs.file=" + getReuters20LinesFile(),
"# ----- alg ",
"{ReadTokens}: " + NUM_DOCS,
"ResetSystemErase",
"CreateIndex",
"{AddDoc}: " + NUM_DOCS,
"CloseIndex",
};
// Run algo
Benchmark benchmark = execBenchmark(algLines1);
List<TaskStats> stats = benchmark.getRunData().getPoints().taskStats();
// Count how many tokens all ReadTokens saw
int totalTokenCount1 = 0;
for (final TaskStats stat : stats) {
if (stat.getTask().getName().equals("ReadTokens")) {
totalTokenCount1 += stat.getCount();
}
}
// Separately count how many tokens are actually in the index:
IndexReader reader = DirectoryReader.open(benchmark.getRunData().getDirectory());
assertEquals(NUM_DOCS, reader.numDocs());
int totalTokenCount2 = 0;
Fields fields = MultiFields.getFields(reader);
for (String fieldName : fields) {
if (fieldName.equals(DocMaker.ID_FIELD) || fieldName.equals(DocMaker.DATE_MSEC_FIELD) || fieldName.equals(DocMaker.TIME_SEC_FIELD)) {
continue;
}
Terms terms = fields.terms(fieldName);
if (terms == null) {
continue;
}
TermsEnum termsEnum = terms.iterator(null);
DocsEnum docs = null;
while(termsEnum.next() != null) {
docs = _TestUtil.docs(random(), termsEnum, MultiFields.getLiveDocs(reader), docs, DocsEnum.FLAG_FREQS);
while(docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
totalTokenCount2 += docs.freq();
}
}
}
reader.close();
// Make sure they are the same
assertEquals(totalTokenCount1, totalTokenCount2);
}