本文整理汇总了C#中Raven.Database.Indexing.IndexingBatch类的典型用法代码示例。如果您正苦于以下问题:C# IndexingBatch类的具体用法?C# IndexingBatch怎么用?C# IndexingBatch使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
IndexingBatch类属于Raven.Database.Indexing命名空间,在下文中一共展示了IndexingBatch类的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。
示例1: IndexDocuments
public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp)
{
var count = 0;
var sourceCount = 0;
var sw = Stopwatch.StartNew();
var start = SystemTime.UtcNow;
int loadDocumentCount = 0;
long loadDocumentDuration = 0;
Write((indexWriter, analyzer, stats) =>
{
var processedKeys = new HashSet<string>();
var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
.Where(x => x != null)
.ToList();
try
{
var indexingPerfStats = RecordCurrentBatch("Current", batch.Docs.Count);
batch.SetIndexingPerformance(indexingPerfStats);
var docIdTerm = new Term(Constants.DocumentIdFieldName);
var documentsWrapped = batch.Docs.Select((doc, i) =>
{
Interlocked.Increment(ref sourceCount);
if (doc.__document_id == null)
throw new ArgumentException(
string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));
string documentId = doc.__document_id.ToString();
if (processedKeys.Add(documentId) == false)
return doc;
InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
if (batch.SkipDeleteFromIndex[i] == false ||
context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
return doc;
})
.Where(x => x is FilteredDocument == false)
.ToList();
var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>();
BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
{
var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing);
var luceneDoc = new Document();
var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
Field.Index.NOT_ANALYZED_NO_NORMS);
using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
{
string currentDocId = null;
int outputPerDocId = 0;
Action<Exception, object> onErrorFunc;
bool skipDocument = false;
foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc))
{
float boost;
IndexingResult indexingResult;
try
{
indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);
}
catch (Exception e)
{
onErrorFunc(e, doc);
continue;
}
// ReSharper disable once RedundantBoolCompare --> code clarity
if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false)
{
continue;
}
if (currentDocId != indexingResult.NewDocId)
{
currentDocId = indexingResult.NewDocId;
outputPerDocId = 0;
skipDocument = false;
}
if (skipDocument)
continue;
outputPerDocId++;
if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false)
{
skipDocument = true;
continue;
}
Interlocked.Increment(ref count);
luceneDoc.GetFields().Clear();
luceneDoc.Boost = boost;
documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
luceneDoc.Add(documentIdField);
foreach (var field in indexingResult.Fields)
{
luceneDoc.Add(field);
}
//.........这里部分代码省略.........
示例2: IndexDocuments
private void IndexDocuments(IStorageActionsAccessor actions, string index, IndexingBatch batch)
{
var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(index);
if (viewGenerator == null)
return; // index was deleted, probably
try
{
if (Log.IsDebugEnabled)
{
string ids;
if (batch.Ids.Count < 256)
ids = string.Join(",", batch.Ids);
else
{
ids = string.Join(", ", batch.Ids.Take(128)) + " ... " + string.Join(", ", batch.Ids.Skip(batch.Ids.Count - 128));
}
Log.Debug("Indexing {0} documents for index: {1}. ({2})", batch.Docs.Count, index, ids);
}
context.CancellationToken.ThrowIfCancellationRequested();
context.IndexStorage.Index(index, viewGenerator, batch, context, actions, batch.DateTime ?? DateTime.MinValue);
}
catch (OperationCanceledException)
{
throw;
}
catch (Exception e)
{
if (actions.IsWriteConflict(e))
return;
Log.WarnException(string.Format("Failed to index documents for index: {0}", index), e);
}
}
示例3: Index
public IndexingPerformanceStats Index(int index, AbstractViewGenerator viewGenerator, IndexingBatch batch, WorkContext context, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token)
{
Index value;
if (indexes.TryGetValue(index, out value) == false)
{
log.Debug("Tried to index on a non existent index {0}, ignoring", index);
return null;
}
using (EnsureInvariantCulture())
using (DocumentCacher.SkipSettingDocumentsInDocumentCache())
{
var performance = value.IndexDocuments(viewGenerator, batch, actions, minimumTimestamp, token);
context.RaiseIndexChangeNotification(new IndexChangeNotification
{
Name = value.PublicName,
Type = IndexChangeTypes.MapCompleted
});
return performance;
}
}
示例4: IndexDocuments
public abstract void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp);
示例5: FilterIndexes
private IEnumerable<IndexingBatchForIndex> FilterIndexes(IList<IndexToWorkOn> indexesToWorkOn, List<JsonDocument> jsonDocs, Etag highestETagInBatch)
{
var last = jsonDocs.Last();
Debug.Assert(last.Etag != null);
Debug.Assert(last.LastModified != null);
var lastEtag = last.Etag;
var lastModified = last.LastModified.Value;
var documentRetriever = new DocumentRetriever(null, context.ReadTriggers, context.Database.InFlightTransactionalState);
var filteredDocs =
BackgroundTaskExecuter.Instance.Apply(context, jsonDocs, doc =>
{
var filteredDoc = documentRetriever.ExecuteReadTriggers(doc, null, ReadOperation.Index);
return filteredDoc == null ? new
{
Doc = doc,
Json = (object)new FilteredDocument(doc)
} : new
{
Doc = filteredDoc,
Json = JsonToExpando.Convert(doc.ToJson())
};
});
Log.Debug("After read triggers executed, {0} documents remained", filteredDocs.Count);
var results = new IndexingBatchForIndex[indexesToWorkOn.Count];
var actions = new Action<IStorageActionsAccessor>[indexesToWorkOn.Count];
BackgroundTaskExecuter.Instance.ExecuteAll(context, indexesToWorkOn, (indexToWorkOn, i) =>
{
var indexName = indexToWorkOn.IndexName;
var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexName);
if (viewGenerator == null)
return; // probably deleted
var batch = new IndexingBatch(highestETagInBatch);
foreach (var item in filteredDocs)
{
if (prefetchingBehavior.FilterDocuments(item.Doc) == false)
continue;
// did we already indexed this document in this index?
var etag = item.Doc.Etag;
if (etag == null)
continue;
// is the Raven-Entity-Name a match for the things the index executes on?
if (viewGenerator.ForEntityNames.Count != 0 &&
viewGenerator.ForEntityNames.Contains(item.Doc.Metadata.Value<string>(Constants.RavenEntityName)) == false)
{
continue;
}
batch.Add(item.Doc, item.Json, prefetchingBehavior.ShouldSkipDeleteFromIndex(item.Doc));
if (batch.DateTime == null)
batch.DateTime = item.Doc.LastModified;
else
batch.DateTime = batch.DateTime > item.Doc.LastModified
? item.Doc.LastModified
: batch.DateTime;
}
if (batch.Docs.Count == 0)
{
Log.Debug("All documents have been filtered for {0}, no indexing will be performed, updating to {1}, {2}", indexName,
lastEtag, lastModified);
// we use it this way to batch all the updates together
actions[i] = accessor => accessor.Indexing.UpdateLastIndexed(indexName, lastEtag, lastModified);
return;
}
if (Log.IsDebugEnabled)
{
Log.Debug("Going to index {0} documents in {1}: ({2})", batch.Ids.Count, indexToWorkOn, string.Join(", ", batch.Ids));
}
results[i] = new IndexingBatchForIndex
{
Batch = batch,
IndexName = indexToWorkOn.IndexName,
Index = indexToWorkOn.Index,
LastIndexedEtag = indexToWorkOn.LastIndexedEtag
};
});
transactionalStorage.Batch(actionsAccessor =>
{
foreach (var action in actions)
{
if (action != null)
action(actionsAccessor);
}
});
return results.Where(x => x != null);
//.........这里部分代码省略.........
示例6: IndexDocuments
public override IndexingPerformanceStats IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token)
{
token.ThrowIfCancellationRequested();
var count = 0;
var sourceCount = 0;
var deleted = new Dictionary<ReduceKeyAndBucket, int>();
var performance = RecordCurrentBatch("Current Map", "Map", batch.Docs.Count);
var performanceStats = new List<BasePerformanceStats>();
var usedStorageAccessors = new ConcurrentSet<IStorageActionsAccessor>();
if (usedStorageAccessors.TryAdd(actions))
{
var storageCommitDuration = new Stopwatch();
actions.BeforeStorageCommit += storageCommitDuration.Start;
actions.AfterStorageCommit += () =>
{
storageCommitDuration.Stop();
performanceStats.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
};
}
var deleteMappedResultsDuration = new Stopwatch();
var documentsWrapped = batch.Docs.Select(doc =>
{
token.ThrowIfCancellationRequested();
sourceCount++;
var documentId = doc.__document_id;
using (StopwatchScope.For(deleteMappedResultsDuration))
{
actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, indexId, deleted);
}
return doc;
})
.Where(x => x is FilteredDocument == false)
.ToList();
performanceStats.Add(new PerformanceStats
{
Name = IndexingOperation.Map_DeleteMappedResults,
DurationMs = deleteMappedResultsDuration.ElapsedMilliseconds,
});
var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>();
var allState = new ConcurrentQueue<Tuple<HashSet<ReduceKeyAndBucket>, IndexingWorkStats, Dictionary<string, int>>>();
var parallelOperations = new ConcurrentQueue<ParallelBatchStats>();
var parallelProcessingStart = SystemTime.UtcNow;
BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, partition =>
{
token.ThrowIfCancellationRequested();
var parallelStats = new ParallelBatchStats
{
StartDelay = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds
};
var localStats = new IndexingWorkStats();
var localChanges = new HashSet<ReduceKeyAndBucket>();
var statsPerKey = new Dictionary<string, int>();
var linqExecutionDuration = new Stopwatch();
var reduceInMapLinqExecutionDuration = new Stopwatch();
var putMappedResultsDuration = new Stopwatch();
var convertToRavenJObjectDuration = new Stopwatch();
allState.Enqueue(Tuple.Create(localChanges, localStats, statsPerKey));
using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
{
// we are writing to the transactional store from multiple threads here, and in a streaming fashion
// should result in less memory and better perf
context.TransactionalStorage.Batch(accessor =>
{
if (usedStorageAccessors.TryAdd(accessor))
{
var storageCommitDuration = new Stopwatch();
accessor.BeforeStorageCommit += storageCommitDuration.Start;
accessor.AfterStorageCommit += () =>
{
storageCommitDuration.Stop();
parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
};
}
var mapResults = RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, localStats, linqExecutionDuration);
var currentDocumentResults = new List<object>();
string currentKey = null;
//.........这里部分代码省略.........
示例7: IndexDocuments
public override void IndexDocuments(
AbstractViewGenerator viewGenerator,
IndexingBatch batch,
IStorageActionsAccessor actions,
DateTime minimumTimestamp)
{
var count = 0;
var sourceCount = 0;
var sw = Stopwatch.StartNew();
var start = SystemTime.UtcNow;
var deleted = new Dictionary<ReduceKeyAndBucket, int>();
var indexPerfStats = RecordCurrentBatch("Current Map", batch.Docs.Count);
batch.SetIndexingPerformance(indexPerfStats);
var documentsWrapped = batch.Docs.Select(doc =>
{
sourceCount++;
var documentId = doc.__document_id;
actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, indexId, deleted);
return doc;
})
.Where(x => x is FilteredDocument == false)
.ToList();
var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>();
var allState = new ConcurrentQueue<Tuple<HashSet<ReduceKeyAndBucket>, IndexingWorkStats, Dictionary<string, int>>>();
int loadDocumentCount = 0;
long loadDocumentDuration = 0;
BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, partition =>
{
var localStats = new IndexingWorkStats();
var localChanges = new HashSet<ReduceKeyAndBucket>();
var statsPerKey = new Dictionary<string, int>();
allState.Enqueue(Tuple.Create(localChanges, localStats, statsPerKey));
using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
{
// we are writing to the transactional store from multiple threads here, and in a streaming fashion
// should result in less memory and better perf
context.TransactionalStorage.Batch(accessor =>
{
var mapResults = RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, localStats);
var currentDocumentResults = new List<object>();
string currentKey = null;
bool skipDocument = false;
foreach (var currentDoc in mapResults)
{
var documentId = GetDocumentId(currentDoc);
if (documentId != currentKey)
{
count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey);
currentDocumentResults.Clear();
currentKey = documentId;
}
else if (skipDocument)
{
continue;
}
currentDocumentResults.Add(new DynamicJsonObject(RavenJObject.FromObject(currentDoc, jsonSerializer)));
if (EnsureValidNumberOfOutputsForDocument(documentId, currentDocumentResults.Count) == false)
{
skipDocument = true;
currentDocumentResults.Clear();
continue;
}
Interlocked.Increment(ref localStats.IndexingSuccesses);
}
count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey);
});
allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);
Interlocked.Add(ref loadDocumentCount, CurrentIndexingScope.Current.LoadDocumentCount);
Interlocked.Add(ref loadDocumentDuration, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds);
}
});
UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);
var changed = allState.SelectMany(x => x.Item1).Concat(deleted.Keys)
.Distinct()
.ToList();
var stats = new IndexingWorkStats(allState.Select(x => x.Item2));
var reduceKeyStats = allState.SelectMany(x => x.Item3)
.GroupBy(x => x.Key)
.Select(g => new { g.Key, Count = g.Sum(x => x.Value) })
.ToList();
BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, reduceKeyStats, enumerator => context.TransactionalStorage.Batch(accessor =>
{
while (enumerator.MoveNext())
{
var reduceKeyStat = enumerator.Current;
accessor.MapReduce.IncrementReduceKeyCounter(indexId, reduceKeyStat.Key, reduceKeyStat.Count);
}
//.........这里部分代码省略.........
示例8: FilterIndexes
private IEnumerable<Tuple<IndexToWorkOn, IndexingBatch>> FilterIndexes(IList<IndexToWorkOn> indexesToWorkOn, JsonDocument[] jsonDocs)
{
var last = jsonDocs.Last();
Debug.Assert(last.Etag != null);
Debug.Assert(last.LastModified != null);
var lastEtag = last.Etag.Value;
var lastModified = last.LastModified.Value;
var lastIndexedEtag = new ComparableByteArray(lastEtag.ToByteArray());
var documentRetriever = new DocumentRetriever(null, context.ReadTriggers);
var filteredDocs =
BackgroundTaskExecuter.Instance.Apply(jsonDocs, doc =>
{
doc = documentRetriever.ExecuteReadTriggers(doc, null, ReadOperation.Index);
return doc == null ? null : new {Doc = doc, Json = JsonToExpando.Convert(doc.ToJson())};
});
log.Debug("After read triggers executed, {0} documents remained", filteredDocs.Count);
var results = new Tuple<IndexToWorkOn, IndexingBatch>[indexesToWorkOn.Count];
var actions = new Action<IStorageActionsAccessor>[indexesToWorkOn.Count];
BackgroundTaskExecuter.Instance.ExecuteAll(context.Configuration, scheduler, indexesToWorkOn, (indexToWorkOn, i) =>
{
var indexLastInedexEtag = new ComparableByteArray(indexToWorkOn.LastIndexedEtag.ToByteArray());
if (indexLastInedexEtag.CompareTo(lastIndexedEtag) >= 0)
return;
var indexName = indexToWorkOn.IndexName;
var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexName);
if (viewGenerator == null)
return; // probably deleted
var batch = new IndexingBatch();
foreach (var item in filteredDocs)
{
// did we already indexed this document in this index?
if (indexLastInedexEtag.CompareTo(new ComparableByteArray(item.Doc.Etag.Value.ToByteArray())) >= 0)
continue;
// is the Raven-Entity-Name a match for the things the index executes on?
if (viewGenerator.ForEntityNames.Count != 0 &&
viewGenerator.ForEntityNames.Contains(item.Doc.Metadata.Value<string>(Constants.RavenEntityName)) == false)
{
continue;
}
batch.Add(item.Doc, item.Json);
if (batch.DateTime == null)
batch.DateTime = item.Doc.LastModified;
else
batch.DateTime = batch.DateTime > item.Doc.LastModified
? item.Doc.LastModified
: batch.DateTime;
}
if (batch.Docs.Count == 0)
{
log.Debug("All documents have been filtered for {0}, no indexing will be performed, updating to {1}, {2}", indexName,
lastEtag, lastModified);
// we use it this way to batch all the updates together
actions[i] = accessor => accessor.Indexing.UpdateLastIndexed(indexName, lastEtag, lastModified);
return;
}
log.Debug("Going to index {0} documents in {1}", batch.Ids.Count, indexToWorkOn);
results[i] = Tuple.Create(indexToWorkOn, batch);
});
transactionalStorage.Batch(actionsAccessor =>
{
foreach (var action in actions)
{
if (action != null)
action(actionsAccessor);
}
});
return results.Where(x => x != null);
}
示例9: IndexDocuments
public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp)
{
var count = 0;
var sourceCount = 0;
var sw = Stopwatch.StartNew();
var start = SystemTime.UtcNow;
Write((indexWriter, analyzer, stats) =>
{
var processedKeys = new HashSet<string>();
var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name))
.Where(x => x != null)
.ToList();
try
{
RecordCurrentBatch("Current", batch.Docs.Count);
var docIdTerm = new Term(Constants.DocumentIdFieldName);
var documentsWrapped = batch.Docs.Select((doc, i) =>
{
Interlocked.Increment(ref sourceCount);
if (doc.__document_id == null)
throw new ArgumentException(
string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));
string documentId = doc.__document_id.ToString();
if (processedKeys.Add(documentId) == false)
return doc;
batchers.ApplyAndIgnoreAllErrors(
exception =>
{
logIndexing.WarnException(
string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'",
name, documentId),
exception);
context.AddError(name,
documentId,
exception.Message,
"OnIndexEntryDeleted Trigger"
);
},
trigger => trigger.OnIndexEntryDeleted(documentId));
if (batch.SkipDeleteFromIndex[i] == false ||
context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
return doc;
})
.Where(x => x is FilteredDocument == false)
.ToList();
var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
{
var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator);
var luceneDoc = new Document();
var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
Field.Index.NOT_ANALYZED_NO_NORMS);
using (CurrentIndexingScope.Current = new CurrentIndexingScope(LoadDocument, allReferencedDocs.Enqueue))
{
foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats))
{
float boost;
var indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);
if (indexingResult.NewDocId != null && indexingResult.ShouldSkip == false)
{
Interlocked.Increment(ref count);
luceneDoc.GetFields().Clear();
luceneDoc.Boost = boost;
documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
luceneDoc.Add(documentIdField);
foreach (var field in indexingResult.Fields)
{
luceneDoc.Add(field);
}
batchers.ApplyAndIgnoreAllErrors(
exception =>
{
logIndexing.WarnException(
string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
name, indexingResult.NewDocId),
exception);
context.AddError(name,
indexingResult.NewDocId,
exception.Message,
"OnIndexEntryCreated Trigger"
);
},
trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
}
Interlocked.Increment(ref stats.IndexingSuccesses);
}
}
});
var dic = context.ReferencingDocumentsByChildKeysWhichMightNeedReindexing_SimpleIndex;
//.........这里部分代码省略.........
示例10: Index
public void Index(string index,
AbstractViewGenerator viewGenerator,
IndexingBatch batch,
WorkContext context,
IStorageActionsAccessor actions,
DateTime minimumTimestamp)
{
Index value;
if (indexes.TryGetValue(index, out value) == false)
{
log.Debug("Tried to index on a non existent index {0}, ignoring", index);
return;
}
using (EnsureInvariantCulture())
using (DocumentCacher.SkipSettingDocumentsInDocumentCache())
{
value.IndexDocuments(viewGenerator, batch, context, actions, minimumTimestamp);
context.RaiseIndexChangeNotification(new IndexChangeNotification
{
Name = index,
Type = IndexChangeTypes.MapCompleted
});
}
}
示例11: IndexDocuments
public override IndexingPerformanceStats IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token)
{
token.ThrowIfCancellationRequested();
var count = 0;
var sourceCount = 0;
var writeToIndexStats = new List<PerformanceStats>();
IndexingPerformanceStats performance = null;
var performanceStats = new List<BasePerformanceStats>();
var storageCommitDuration = new Stopwatch();
actions.BeforeStorageCommit += storageCommitDuration.Start;
actions.AfterStorageCommit += () =>
{
storageCommitDuration.Stop();
performanceStats.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
};
Write((indexWriter, analyzer, stats) =>
{
var processedKeys = new HashSet<string>();
var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
.Where(x => x != null)
.ToList();
try
{
performance = RecordCurrentBatch("Current", "Index", batch.Docs.Count);
var deleteExistingDocumentsDuration = new Stopwatch();
var docIdTerm = new Term(Constants.DocumentIdFieldName);
var documentsWrapped = batch.Docs.Select((doc, i) =>
{
token.ThrowIfCancellationRequested();
Interlocked.Increment(ref sourceCount);
if (doc.__document_id == null)
throw new ArgumentException(
string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));
string documentId = doc.__document_id.ToString();
if (processedKeys.Add(documentId) == false)
return doc;
InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
if (batch.SkipDeleteFromIndex[i] == false ||
context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
{
using (StopwatchScope.For(deleteExistingDocumentsDuration))
{
indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
}
}
return doc;
})
.Where(x => x is FilteredDocument == false)
.ToList();
performanceStats.Add(new PerformanceStats
{
Name = IndexingOperation.Lucene_DeleteExistingDocument,
DurationMs = deleteExistingDocumentsDuration.ElapsedMilliseconds
});
var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>();
var parallelOperations = new ConcurrentQueue<ParallelBatchStats>();
var parallelProcessingStart = SystemTime.UtcNow;
BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
{
token.ThrowIfCancellationRequested();
var parallelStats = new ParallelBatchStats
{
StartDelay = (long) (SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds
};
var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing);
var luceneDoc = new Document();
var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
Field.Index.NOT_ANALYZED_NO_NORMS);
using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
{
string currentDocId = null;
int outputPerDocId = 0;
Action<Exception, object> onErrorFunc;
bool skipDocument = false;
var linqExecutionDuration = new Stopwatch();
var addDocumentDutation = new Stopwatch();
var convertToLuceneDocumentDuration = new Stopwatch();
//.........这里部分代码省略.........
示例12: IndexDocuments
public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, WorkContext context, IStorageActionsAccessor actions, DateTime minimumTimestamp)
{
var count = 0;
var sourceCount = 0;
var sw = Stopwatch.StartNew();
var start = SystemTime.UtcNow;
Write(context, (indexWriter, analyzer, stats) =>
{
var processedKeys = new HashSet<string>();
var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name))
.Where(x => x != null)
.ToList();
try
{
var docIdTerm = new Term(Constants.DocumentIdFieldName);
var documentsWrapped = batch.Docs.Select((doc,i) =>
{
Interlocked.Increment(ref sourceCount);
if (doc.__document_id == null)
throw new ArgumentException(
string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));
string documentId = doc.__document_id.ToString();
if (processedKeys.Add(documentId) == false)
return doc;
batchers.ApplyAndIgnoreAllErrors(
exception =>
{
logIndexing.WarnException(
string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'",
name, documentId),
exception);
context.AddError(name,
documentId,
exception.Message
);
},
trigger => trigger.OnIndexEntryDeleted(documentId));
if(batch.SkipDeleteFromIndex[i] == false)
indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
return doc;
})
.Where(x => x is FilteredDocument == false)
.ToList();
BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
{
var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(indexDefinition);
var luceneDoc = new Document();
var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
Field.Index.NOT_ANALYZED_NO_NORMS);
foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, actions, stats))
{
float boost;
var indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);
if (indexingResult.NewDocId != null && indexingResult.ShouldSkip == false)
{
Interlocked.Increment(ref count);
luceneDoc.GetFields().Clear();
luceneDoc.Boost = boost;
documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
luceneDoc.Add(documentIdField);
foreach (var field in indexingResult.Fields)
{
luceneDoc.Add(field);
}
batchers.ApplyAndIgnoreAllErrors(
exception =>
{
logIndexing.WarnException(
string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
name, indexingResult.NewDocId),
exception);
context.AddError(name,
indexingResult.NewDocId,
exception.Message
);
},
trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
}
Interlocked.Increment(ref stats.IndexingSuccesses);
}
});
}
catch(Exception e)
{
batchers.ApplyAndIgnoreAllErrors(
ex =>
{
logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex);
context.AddError(name, null, ex.Message);
},
x => x.AnErrorOccured(e));
throw;
//.........这里部分代码省略.........
示例13: IndexDocuments
public override void IndexDocuments(
AbstractViewGenerator viewGenerator,
IndexingBatch batch,
IStorageActionsAccessor actions,
DateTime minimumTimestamp)
{
var count = 0;
var sourceCount = 0;
var sw = Stopwatch.StartNew();
var start = SystemTime.UtcNow;
var changed = new HashSet<ReduceKeyAndBucket>();
var documentsWrapped = batch.Docs.Select(doc =>
{
sourceCount++;
var documentId = doc.__document_id;
actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, name, changed);
return doc;
})
.Where(x => x is FilteredDocument == false)
.ToList();
var items = new ConcurrentQueue<MapResultItem>();
var stats = new IndexingWorkStats();
var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
if (documentsWrapped.Count > 0)
actions.MapReduce.UpdateRemovedMapReduceStats(name, changed);
BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, partition =>
{
using (CurrentIndexingScope.Current = new CurrentIndexingScope(LoadDocument, allReferencedDocs.Enqueue))
{
var mapResults = RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats);
var currentDocumentResults = new List<object>();
string currentKey = null;
foreach (var currentDoc in mapResults)
{
var documentId = GetDocumentId(currentDoc);
if (documentId != currentKey)
{
count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, items);
currentDocumentResults.Clear();
currentKey = documentId;
}
currentDocumentResults.Add(new DynamicJsonObject(RavenJObject.FromObject(currentDoc, jsonSerializer)));
}
count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, items);
}
});
IDictionary<string, HashSet<string>> result;
while (allReferencedDocs.TryDequeue(out result))
{
foreach (var referencedDocument in result)
{
actions.Indexing.UpdateDocumentReferences(name, referencedDocument.Key, referencedDocument.Value);
actions.General.MaybePulseTransaction();
}
}
foreach (var mapResultItem in items)
{
changed.Add(new ReduceKeyAndBucket(mapResultItem.Bucket, mapResultItem.ReduceKey));
actions.MapReduce.PutMappedResult(name, mapResultItem.DocId, mapResultItem.ReduceKey, mapResultItem.Data);
actions.General.MaybePulseTransaction();
}
UpdateIndexingStats(context, stats);
actions.MapReduce.ScheduleReductions(name, 0, changed);
AddindexingPerformanceStat(new IndexingPerformanceStats
{
OutputCount = count,
InputCount = sourceCount,
Operation = "Map",
Duration = sw.Elapsed,
Started = start
});
logIndexing.Debug("Mapped {0} documents for {1}", count, name);
}
示例14: IndexDocuments
public override void IndexDocuments(
AbstractViewGenerator viewGenerator,
IndexingBatch batch,
IStorageActionsAccessor actions,
DateTime minimumTimestamp)
{
var count = 0;
var sourceCount = 0;
var sw = Stopwatch.StartNew();
var start = SystemTime.UtcNow;
var changed = new HashSet<ReduceKeyAndBucket>();
var documentsWrapped = batch.Docs.Select(doc =>
{
sourceCount++;
var documentId = doc.__document_id;
actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, name, changed);
return doc;
})
.Where(x => x is FilteredDocument == false);
var items = new List<MapResultItem>();
var stats = new IndexingWorkStats();
var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
using (CurrentIndexingScope.Current = new CurrentIndexingScope(LoadDocument, allReferencedDocs.Enqueue))
{
var mapResults = RobustEnumerationIndex(
documentsWrapped.GetEnumerator(),
viewGenerator.MapDefinitions,
actions,
stats)
.ToList();
actions.MapReduce.UpdateRemovedMapReduceStats(name, changed);
foreach (var mappedResultFromDocument in mapResults.GroupBy(GetDocumentId))
{
var dynamicResults = mappedResultFromDocument.Select(x => (object)new DynamicJsonObject(RavenJObject.FromObject(x, jsonSerializer))).ToList();
foreach (
var doc in
RobustEnumerationReduceDuringMapPhase(dynamicResults.GetEnumerator(), viewGenerator.ReduceDefinition, actions, context))
{
count++;
var reduceValue = viewGenerator.GroupByExtraction(doc);
if (reduceValue == null)
{
logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}",
viewGenerator.GroupByExtraction, mappedResultFromDocument.Key);
continue;
}
var reduceKey = ReduceKeyToString(reduceValue);
var docId = mappedResultFromDocument.Key.ToString();
var data = GetMappedData(doc);
items.Add(new MapResultItem
{
Data = data,
DocId = docId,
ReduceKey = reduceKey
});
changed.Add(new ReduceKeyAndBucket(IndexingUtil.MapBucket(docId), reduceKey));
}
}
}
IDictionary<string, HashSet<string>> result;
while (allReferencedDocs.TryDequeue(out result))
{
foreach (var referencedDocument in result)
{
actions.Indexing.UpdateDocumentReferences(name, referencedDocument.Key, referencedDocument.Value);
actions.General.MaybePulseTransaction();
}
}
foreach (var mapResultItem in items)
{
actions.MapReduce.PutMappedResult(name, mapResultItem.DocId, mapResultItem.ReduceKey, mapResultItem.Data);
actions.General.MaybePulseTransaction();
}
UpdateIndexingStats(context, stats);
actions.MapReduce.ScheduleReductions(name, 0, changed);
AddindexingPerformanceStat(new IndexingPerformanceStats
{
OutputCount = count,
InputCount = sourceCount,
Operation = "Map",
Duration = sw.Elapsed,
Started = start
});
logIndexing.Debug("Mapped {0} documents for {1}", count, name);
}