本文整理汇总了C#中Lucene.Net.Index.IndexReader.Terms方法的典型用法代码示例。如果您正苦于以下问题:C# IndexReader.Terms方法的具体用法?C# IndexReader.Terms怎么用?C# IndexReader.Terms使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Lucene.Net.Index.IndexReader
的用法示例。
在下文中一共展示了IndexReader.Terms方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。
示例1: RegexTermEnum
public RegexTermEnum(IndexReader reader, Term term, IRegexCapabilities regexImpl)
{
_sField = term.Field;
string sText = term.Text;
_regexImpl = regexImpl;
_regexImpl.Compile(sText);
_sPre = _regexImpl.Prefix() ?? "";
SetEnum(reader.Terms(new Term(term.Field, _sPre)));
}
示例2: GetFieldValues
private IEnumerable<string> GetFieldValues(IndexReader reader, string groupByField)
{
TermEnum te = reader.Terms(new Term(groupByField, string.Empty));
if (te.Term() == null || te.Term().Field() != groupByField)
return Enumerable.Empty<string>();
var list = new List<string>();
list.Add(te.Term().Text());
while (te.Next())
{
if (te.Term().Field() != groupByField)
break;
list.Add(te.Term().Text());
}
return list;
}
示例3: CorrectBits
private OpenBitSet CorrectBits(IndexReader reader)
{
OpenBitSet bits = new OpenBitSet(reader.MaxDoc); //assume all are INvalid
Term startTerm = new Term(fieldName);
TermEnum te = reader.Terms(startTerm);
if (te != null)
{
Term currTerm = te.Term;
while ((currTerm != null) && (currTerm.Field == startTerm.Field)) //term fieldnames are interned
{
int lastDoc = -1;
//set non duplicates
TermDocs td = reader.TermDocs(currTerm);
if (td.Next())
{
if (keepMode == KM_USE_FIRST_OCCURRENCE)
{
bits.Set(td.Doc);
}
else
{
do
{
lastDoc = td.Doc;
} while (td.Next());
bits.Set(lastDoc);
}
}
if (!te.Next())
{
break;
}
currTerm = te.Term;
}
}
return bits;
}
示例4: FillCache
private static Dictionary<string, int[]> FillCache(IndexReader reader, int docBase, string field)
{
using (var termDocs = reader.TermDocs())
{
var items = new Dictionary<string, int[]>();
var docsForTerm = new List<int>();
using (var termEnum = reader.Terms(new Term(field)))
{
do
{
if (termEnum.Term == null || field != termEnum.Term.Field)
break;
Term term = termEnum.Term;
if (LowPrecisionNumber(term.Field, term.Text))
continue;
var totalDocCountIncludedDeletes = termEnum.DocFreq();
termDocs.Seek(termEnum.Term);
while (termDocs.Next() && totalDocCountIncludedDeletes > 0)
{
var curDoc = termDocs.Doc;
totalDocCountIncludedDeletes -= 1;
if (reader.IsDeleted(curDoc))
continue;
docsForTerm.Add(curDoc + docBase);
}
docsForTerm.Sort();
items[term.Text] = docsForTerm.ToArray();
docsForTerm.Clear();
} while (termEnum.Next());
}
return items;
}
}
示例5: Load
public virtual void Load(string fieldName, IndexReader reader, TermListFactory listFactory)
{
string field = string.Intern(fieldName);
int maxDoc = reader.MaxDoc;
if (orderArray == null) // we want to reuse the memory
{
orderArray = NewInstance(termCountSize, maxDoc);
}
else
{
orderArray.EnsureCapacity(maxDoc); // no need to fill to 0, we are reseting the data anyway
}
List<int> minIDList = new List<int>();
List<int> maxIDList = new List<int>();
List<int> freqList = new List<int>();
int length = maxDoc + 1;
ITermValueList list = listFactory == null ? new TermStringList() : listFactory.CreateTermList();
TermDocs termDocs = reader.TermDocs();
TermEnum termEnum = reader.Terms(new Term(field));
int t = 0; // current term number
list.Add(null);
minIDList.Add(-1);
maxIDList.Add(-1);
freqList.Add(0);
//int df = 0;
t++;
try
{
do
{
Term term = termEnum.Term;
if (term == null || string.CompareOrdinal(term.Field, field) != 0)
break;
if (t >= orderArray.MaxValue())
{
throw new System.IO.IOException("maximum number of value cannot exceed: " + orderArray.MaxValue());
}
// Alexey: well, we could get now more than one term per document. Effectively, we could build facet againsts tokenized field
/*// we expect that there is at most one term per document
if (t >= length)
{
throw new RuntimeException("there are more terms than " + "documents in field \"" + field + "\", but it's impossible to sort on " + "tokenized fields");
}*/
// store term text
list.Add(term.Text);
termDocs.Seek(termEnum);
// freqList.add(termEnum.docFreq()); // doesn't take into account deldocs
int minID = -1;
int maxID = -1;
int df = 0;
if (termDocs.Next())
{
df++;
int docid = termDocs.Doc;
orderArray.Add(docid, t);
minID = docid;
while (termDocs.Next())
{
df++;
docid = termDocs.Doc;
orderArray.Add(docid, t);
}
maxID = docid;
}
freqList.Add(df);
minIDList.Add(minID);
maxIDList.Add(maxID);
t++;
} while (termEnum.Next());
}
finally
{
termDocs.Dispose();
termEnum.Dispose();
}
list.Seal();
this.valArray = list;
this.freqs = freqList.ToArray();
this.minIDs = minIDList.ToArray();
this.maxIDs = maxIDList.ToArray();
}
示例6: AddStopWords
/*
* Automatically adds stop words for the given field with terms exceeding the maxPercentDocs
*
* @param reader The {@link IndexReader} which will be consulted to identify potential stop words that
* exceed the required document frequency
* @param fieldName The field for which stopwords will be added
* @param maxDocFreq The maximum number of index documents which
* can contain a term, after which the term is considered to be a stop word.
* @return The number of stop words identified.
* @throws IOException
*/
public int AddStopWords(IndexReader reader, String fieldName, int maxDocFreq)
{
var stopWords = Support.Compatibility.SetFactory.CreateHashSet<string>();
String internedFieldName = StringHelper.Intern(fieldName);
TermEnum te = reader.Terms(new Term(fieldName));
Term term = te.Term;
while (term != null) {
if (term.Field != internedFieldName) {
break;
}
if (te.DocFreq() > maxDocFreq) {
stopWords.Add(term.Text);
}
if (!te.Next()) {
break;
}
term = te.Term;
}
stopWordsPerField.Add(fieldName, stopWords);
/* if the stopwords for a field are changed,
* then saved streams for that field are erased.
*/
IDictionary<String,SavedStreams> streamMap = (IDictionary<String,SavedStreams>) PreviousTokenStream;
if (streamMap != null)
streamMap.Remove(fieldName);
return stopWords.Count;
}
示例7: ReadAllEntriesFromIndex
public static RavenJObject[] ReadAllEntriesFromIndex(IndexReader reader)
{
if (reader.MaxDoc > 512 * 1024)
{
throw new InvalidOperationException("Refusing to extract all index entires from an index with " + reader.MaxDoc +
" entries, because of the probable time / memory costs associated with that." +
Environment.NewLine +
"Viewing Index Entries are a debug tool, and should not be used on indexes of this size. You might want to try Luke, instead.");
}
var results = new RavenJObject[reader.MaxDoc];
using (var termDocs = reader.TermDocs())
using (var termEnum = reader.Terms())
{
while (termEnum.Next())
{
var term = termEnum.Term;
if (term == null)
break;
var text = term.Text;
termDocs.Seek(termEnum);
for (int i = 0; i < termEnum.DocFreq() && termDocs.Next(); i++)
{
RavenJObject result = results[termDocs.Doc];
if (result == null)
results[termDocs.Doc] = result = new RavenJObject();
var propertyName = term.Field;
if (propertyName.EndsWith("_ConvertToJson") ||
propertyName.EndsWith("_IsArray"))
continue;
if (result.ContainsKey(propertyName))
{
switch (result[propertyName].Type)
{
case JTokenType.Array:
((RavenJArray)result[propertyName]).Add(text);
break;
case JTokenType.String:
result[propertyName] = new RavenJArray
{
result[propertyName],
text
};
break;
default:
throw new ArgumentException("No idea how to handle " + result[propertyName].Type);
}
}
else
{
result[propertyName] = text;
}
}
}
}
return results;
}
示例8: VerifyEquals
public static void VerifyEquals(IndexReader r1, IndexReader r2, System.String idField)
{
Assert.AreEqual(r1.NumDocs(), r2.NumDocs());
bool hasDeletes = !(r1.MaxDoc() == r2.MaxDoc() && r1.NumDocs() == r1.MaxDoc());
int[] r2r1 = new int[r2.MaxDoc()]; // r2 id to r1 id mapping
TermDocs termDocs1 = r1.TermDocs();
TermDocs termDocs2 = r2.TermDocs();
// create mapping from id2 space to id2 based on idField
idField = StringHelper.Intern(idField);
TermEnum termEnum = r1.Terms(new Term(idField, ""));
do
{
Term term = termEnum.Term();
if (term == null || (System.Object) term.Field() != (System.Object) idField)
break;
termDocs1.Seek(termEnum);
if (!termDocs1.Next())
{
// This doc is deleted and wasn't replaced
termDocs2.Seek(termEnum);
Assert.IsFalse(termDocs2.Next());
continue;
}
int id1 = termDocs1.Doc();
Assert.IsFalse(termDocs1.Next());
termDocs2.Seek(termEnum);
Assert.IsTrue(termDocs2.Next());
int id2 = termDocs2.Doc();
Assert.IsFalse(termDocs2.Next());
r2r1[id2] = id1;
// verify stored fields are equivalent
try
{
VerifyEquals(r1.Document(id1), r2.Document(id2));
}
catch (System.Exception t)
{
System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term);
System.Console.Out.WriteLine(" d1=" + r1.Document(id1));
System.Console.Out.WriteLine(" d2=" + r2.Document(id2));
throw t;
}
try
{
// verify term vectors are equivalent
VerifyEquals(r1.GetTermFreqVectors(id1), r2.GetTermFreqVectors(id2));
}
catch (System.Exception e)
{
System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
TermFreqVector[] tv1 = r1.GetTermFreqVectors(id1);
System.Console.Out.WriteLine(" d1=" + tv1);
if (tv1 != null)
for (int i = 0; i < tv1.Length; i++)
{
System.Console.Out.WriteLine(" " + i + ": " + tv1[i]);
}
TermFreqVector[] tv2 = r2.GetTermFreqVectors(id2);
System.Console.Out.WriteLine(" d2=" + tv2);
if (tv2 != null)
for (int i = 0; i < tv2.Length; i++)
{
System.Console.Out.WriteLine(" " + i + ": " + tv2[i]);
}
throw e;
}
}
while (termEnum.Next());
termEnum.Close();
// Verify postings
TermEnum termEnum1 = r1.Terms(new Term("", ""));
TermEnum termEnum2 = r2.Terms(new Term("", ""));
// pack both doc and freq into single element for easy sorting
long[] info1 = new long[r1.NumDocs()];
long[] info2 = new long[r2.NumDocs()];
for (; ; )
{
Term term1, term2;
// iterate until we get some docs
int len1;
for (; ; )
{
len1 = 0;
term1 = termEnum1.Term();
//.........这里部分代码省略.........
示例9: SingleTermEnum
/// <summary>
/// Creates a new <c>SingleTermEnum</c>.
/// <p/>
/// After calling the constructor the enumeration is already pointing to the term,
/// if it exists.
/// </summary>
public SingleTermEnum(IndexReader reader, Term singleTerm)
{
this.singleTerm = singleTerm;
SetEnum(reader.Terms(singleTerm));
}
示例10: FillCache
private static void FillCache(IndexSearcherHolder.IndexSearcherHoldingState state, List<string> fieldsToRead,IndexReader reader)
{
foreach (var field in fieldsToRead)
{
using (var termDocs = reader.TermDocs())
{
using (var termEnum = reader.Terms(new Term(field)))
{
do
{
if (termEnum.Term == null || field != termEnum.Term.Field)
break;
if (LowPrecisionNumber(termEnum.Term))
continue;
var totalDocCountIncludedDeletes = termEnum.DocFreq();
termDocs.Seek(termEnum.Term);
while (termDocs.Next() && totalDocCountIncludedDeletes > 0)
{
totalDocCountIncludedDeletes -= 1;
if (reader.IsDeleted(termDocs.Doc))
continue;
state.SetInCache(field, termDocs.Doc, termEnum.Term);
}
} while (termEnum.Next());
}
}
}
}
示例11: ComputeDistances
protected internal double[] ComputeDistances(IndexReader reader)
{
double[] retArray = null;
var termDocs = reader.TermDocs();
var termEnum = reader.Terms(new Term(Constants.SpatialShapeFieldName));
try
{
do
{
Term term = termEnum.Term();
if (term == null)
break;
Debug.Assert(Constants.SpatialShapeFieldName.Equals(term.Field()));
Shape termval;
try
{
termval = SpatialIndex.RavenSpatialContext.ReadShape(term.Text()); // read shape
}
catch (InvalidShapeException)
{
continue;
}
var pt = termval as Point;
if (pt == null)
continue;
var distance = SpatialIndex.RavenSpatialContext.GetDistCalc().Distance(pt, originPt);
if (retArray == null)
// late init
retArray = new double[reader.MaxDoc()];
termDocs.Seek(termEnum);
while (termDocs.Next())
{
retArray[termDocs.Doc()] = distance;
}
} while (termEnum.Next());
}
finally
{
termDocs.Close();
termEnum.Close();
}
return retArray ?? new double[reader.MaxDoc()];
}
示例12: TermsEnumCompatibility
public TermsEnumCompatibility(IndexReader reader, String fieldName)
{
this.reader = reader;
this.fieldName = string.Intern(fieldName);
this.termEnum = reader.Terms(new Term(this.fieldName));
}
示例13: FastBits
private OpenBitSet FastBits(IndexReader reader)
{
OpenBitSet bits = new OpenBitSet(reader.MaxDoc());
bits.Set(0, reader.MaxDoc()); //assume all are valid
Term startTerm = new Term(fieldName);
TermEnum te = reader.Terms(startTerm);
if (te != null)
{
Term currTerm = te.Term();
while ((currTerm != null) && (currTerm.Field() == startTerm.Field())) //term fieldnames are interned
{
if (te.DocFreq() > 1)
{
int lastDoc = -1;
//unset potential duplicates
TermDocs td = reader.TermDocs(currTerm);
td.Next();
if (keepMode == KM_USE_FIRST_OCCURRENCE)
{
td.Next();
}
do
{
lastDoc = td.Doc();
bits.Clear(lastDoc);
} while (td.Next());
if (keepMode == KM_USE_LAST_OCCURRENCE)
{
//restore the last bit
bits.Set(lastDoc);
}
}
if (!te.Next())
{
break;
}
currTerm = te.Term();
}
}
return bits;
}
示例14: AssertIndexEquals
public static void AssertIndexEquals(IndexReader index1, IndexReader index2)
{
Assert.AreEqual(index1.NumDocs(), index2.NumDocs(), "IndexReaders have different values for numDocs.");
Assert.AreEqual(index1.MaxDoc, index2.MaxDoc, "IndexReaders have different values for maxDoc.");
Assert.AreEqual(index1.HasDeletions, index2.HasDeletions, "Only one IndexReader has deletions.");
Assert.AreEqual(index1.IsOptimized(), index2.IsOptimized(), "Only one index is optimized.");
// check field names
System.Collections.Generic.ICollection<string> fieldsNames1 = index1.GetFieldNames(FieldOption.ALL);
System.Collections.Generic.ICollection<string> fieldsNames2 = index1.GetFieldNames(FieldOption.ALL);
System.Collections.Generic.ICollection<IFieldable> fields1 = null;
System.Collections.Generic.ICollection<IFieldable> fields2 = null;
Assert.AreEqual(fieldsNames1.Count, fieldsNames2.Count, "IndexReaders have different numbers of fields.");
System.Collections.IEnumerator it1 = fieldsNames1.GetEnumerator();
System.Collections.IEnumerator it2 = fieldsNames2.GetEnumerator();
while (it1.MoveNext() && it2.MoveNext())
{
Assert.AreEqual((System.String) it1.Current, (System.String) it2.Current, "Different field names.");
}
// check norms
it1 = fieldsNames1.GetEnumerator();
while (it1.MoveNext())
{
System.String curField = (System.String) it1.Current;
byte[] norms1 = index1.Norms(curField);
byte[] norms2 = index2.Norms(curField);
if (norms1 != null && norms2 != null)
{
Assert.AreEqual(norms1.Length, norms2.Length);
for (int i = 0; i < norms1.Length; i++)
{
Assert.AreEqual(norms1[i], norms2[i], "Norm different for doc " + i + " and field '" + curField + "'.");
}
}
else
{
Assert.AreSame(norms1, norms2);
}
}
// check deletions
for (int i = 0; i < index1.MaxDoc; i++)
{
Assert.AreEqual(index1.IsDeleted(i), index2.IsDeleted(i), "Doc " + i + " only deleted in one index.");
}
// check stored fields
for (int i = 0; i < index1.MaxDoc; i++)
{
if (!index1.IsDeleted(i))
{
Document doc1 = index1.Document(i);
Document doc2 = index2.Document(i);
fields1 = doc1.GetFields();
fields2 = doc2.GetFields();
Assert.AreEqual(fields1.Count, fields2.Count, "Different numbers of fields for doc " + i + ".");
it1 = fields1.GetEnumerator();
it2 = fields2.GetEnumerator();
while (it1.MoveNext() && it2.MoveNext())
{
Field curField1 = (Field) it1.Current;
Field curField2 = (Field) it2.Current;
Assert.AreEqual(curField1.Name, curField2.Name, "Different fields names for doc " + i + ".");
Assert.AreEqual(curField1.StringValue, curField2.StringValue, "Different field values for doc " + i + ".");
}
}
}
// check dictionary and posting lists
TermEnum enum1 = index1.Terms();
TermEnum enum2 = index2.Terms();
TermPositions tp1 = index1.TermPositions();
TermPositions tp2 = index2.TermPositions();
while (enum1.Next())
{
Assert.IsTrue(enum2.Next());
Assert.AreEqual(enum1.Term, enum2.Term, "Different term in dictionary.");
tp1.Seek(enum1.Term);
tp2.Seek(enum1.Term);
while (tp1.Next())
{
Assert.IsTrue(tp2.Next());
Assert.AreEqual(tp1.Doc, tp2.Doc, "Different doc id in postinglist of term " + enum1.Term + ".");
Assert.AreEqual(tp1.Freq, tp2.Freq, "Different term frequence in postinglist of term " + enum1.Term + ".");
for (int i = 0; i < tp1.Freq; i++)
{
Assert.AreEqual(tp1.NextPosition(), tp2.NextPosition(), "Different positions in postinglist of term " + enum1.Term + ".");
}
}
}
}
示例15: Load
public override void Load(string fieldName, IndexReader reader, TermListFactory listFactory, BoboIndexReader.WorkArea workArea)
{
long t0 = System.Environment.TickCount;
int maxdoc = reader.MaxDoc;
BigNestedIntArray.BufferedLoader loader = GetBufferedLoader(maxdoc, workArea);
BigNestedIntArray.BufferedLoader weightLoader = GetBufferedLoader(maxdoc, null);
TermEnum tenum = null;
TermDocs tdoc = null;
var list = (listFactory == null ? new TermStringList() : listFactory.CreateTermList());
List<int> minIDList = new List<int>();
List<int> maxIDList = new List<int>();
List<int> freqList = new List<int>();
OpenBitSet bitset = new OpenBitSet(maxdoc + 1);
int negativeValueCount = GetNegativeValueCount(reader, string.Intern(fieldName));
int t = 0; // current term number
list.Add(null);
minIDList.Add(-1);
maxIDList.Add(-1);
freqList.Add(0);
t++;
_overflow = false;
string pre = null;
int df = 0;
int minID = -1;
int maxID = -1;
int valId = 0;
try
{
tdoc = reader.TermDocs();
tenum = reader.Terms(new Term(fieldName, ""));
if (tenum != null)
{
do
{
Term term = tenum.Term;
if (term == null || !fieldName.Equals(term.Field))
break;
string val = term.Text;
if (val != null)
{
int weight = 0;
string[] split = val.Split(new char[] { '\0' }, StringSplitOptions.RemoveEmptyEntries);
if (split.Length > 1)
{
val = split[0];
weight = int.Parse(split[split.Length - 1]);
}
if (pre == null || !val.Equals(pre))
{
if (pre != null)
{
freqList.Add(df);
minIDList.Add(minID);
maxIDList.Add(maxID);
}
list.Add(val);
df = 0;
minID = -1;
maxID = -1;
valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
t++;
}
tdoc.Seek(tenum);
if (tdoc.Next())
{
df++;
int docid = tdoc.Doc;
if (!loader.Add(docid, valId)) LogOverflow(fieldName);
else weightLoader.Add(docid, weight);
if (docid < minID) minID = docid;
bitset.FastSet(docid);
while (tdoc.Next())
{
df++;
docid = tdoc.Doc;
if (!loader.Add(docid, valId)) LogOverflow(fieldName);
else weightLoader.Add(docid, weight);
bitset.FastSet(docid);
}
if (docid > maxID) maxID = docid;
}
pre = val;
}
}
while (tenum.Next());
//.........这里部分代码省略.........