本文整理汇总了C#中Document.GetAnnotatedBlocks方法的典型用法代码示例。如果您正苦于以下问题:C# Document.GetAnnotatedBlocks方法的具体用法?C# Document.GetAnnotatedBlocks怎么用?C# Document.GetAnnotatedBlocks使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Document
的用法示例。
在下文中一共展示了Document.GetAnnotatedBlocks方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。
示例1: ProcessDocument
/*protected*/
public override void ProcessDocument(Document document)
{
string contentType = document.Features.GetFeatureValue("contentType");
if (contentType != "Text") { return; }
try
{
if (mTokenGroupSelector == null)
{
TextBlock[] textBlocks = document.GetAnnotatedBlocks(mBlockSelector);
ProcessTokens(textBlocks);
}
else
{
document.CreateAnnotationIndex();
TextBlock[] tokenGroups = document.GetAnnotatedBlocks(mTokenGroupSelector);
foreach (TextBlock tokenGroup in tokenGroups)
{
TextBlock[] textBlocks = document.GetAnnotatedBlocks(mBlockSelector, tokenGroup.SpanStart, tokenGroup.SpanEnd);
ProcessTokens(textBlocks);
}
}
}
catch (Exception exception)
{
mLogger.Error("ProcessDocument", exception);
}
}
示例2: ProcessDocument
public override void ProcessDocument(Document document)
{
string contentType = document.Features.GetFeatureValue("contentType");
if (contentType != "Text") { return; }
try
{
document.CreateAnnotationIndex();
EntityRecognitionEngine.Document erDoc = new EntityRecognitionEngine.Document();
foreach (TextBlock tb in document.GetAnnotatedBlocks(mBlockSelector))
{
erDoc.BeginNewTextBlock();
foreach (TextBlock s in document.GetAnnotatedBlocks("Sentence", tb.SpanStart, tb.SpanEnd)) // *** sentence selector hardcoded
{
ArrayList<string> tokens = new ArrayList<string>();
ArrayList<string> posTags = new ArrayList<string>();
ArrayList<int> spanInfo = new ArrayList<int>();
foreach (TextBlock token in document.GetAnnotatedBlocks("Token", s.SpanStart, s.SpanEnd)) // *** token selector hardcoded
{
tokens.Add(token.Text);
posTags.Add(token.Annotation.Features.GetFeatureValue("posTag")); // *** POS tag feature name hardcoded
spanInfo.Add(token.SpanStart);
}
erDoc.AddSentence(tokens, spanInfo, posTags);
}
}
ArrayList<Pair<int, int>> spans;
ArrayList<string> entities = erDoc.DiscoverEntities(mEntityRecognitionEngine, out spans);
int i = 0;
foreach (string gazetteerUri in entities)
{
string instanceUri = mEntityRecognitionEngine.GetIdentifiedInstance(gazetteerUri);
if (instanceUri != null)
{
string annotationName = GetAnnotationName(mEntityRecognitionEngine.GetInstanceClassPath(instanceUri));
Annotation annotation = new Annotation(spans[i].First, spans[i].Second, annotationName);
document.AddAnnotation(annotation);
annotation.Features.SetFeatureValue("gazetteerUri", gazetteerUri);
annotation.Features.SetFeatureValue("instanceUri", instanceUri);
annotation.Features.SetFeatureValue("instanceClassUri", mEntityRecognitionEngine.GetInstanceClass(instanceUri));
// TODO: instanceLabel, instanceClassLabel
}
i++;
}
}
catch (Exception exception)
{
mLogger.Error("ProcessDocument", exception);
}
}
示例3: ProcessDocument
/*protected*/
public override void ProcessDocument(Document document)
{
string contentType = document.Features.GetFeatureValue("contentType");
if (contentType != "Text") { return; }
try
{
TextBlock[] blocks = document.GetAnnotatedBlocks(mBlockSelector);
foreach (TextBlock block in blocks)
{
OpenNLP.Tools.Util.Pair<int, int>[] positions;
string[] sentences = mSentenceDetector.SentenceDetect(block.Text, out positions);
int i = 0;
foreach (OpenNLP.Tools.Util.Pair<int, int> pos in positions)
{
int startTrimOffset, endTrimOffset;
GetTrimOffsets(sentences[i], out startTrimOffset, out endTrimOffset);
int startIdx = block.SpanStart + pos.FirstValue + startTrimOffset;
int endIdx = block.SpanStart + pos.FirstValue + (pos.SecondValue - 1) - endTrimOffset;
if (endIdx >= startIdx)
{
document.AddAnnotation(new Annotation(startIdx, endIdx, "Sentence"));
}
i++;
}
}
}
catch (Exception exception)
{
mLogger.Error("ProcessDocument", exception);
}
}
示例4: ProcessDocument
public override void ProcessDocument(Document document)
{
string contentType = document.Features.GetFeatureValue("contentType");
if (contentType != "Text") { return; }
try
{
StringBuilder txt = new StringBuilder();
foreach (TextBlock tb in document.GetAnnotatedBlocks(mBlockSelector))
{
txt.AppendLine(tb.Text);
}
SparseVector<double> bow = mBowSpace.ProcessDocument(txt.ToString());
ArrayList<string> categories = new ArrayList<string>();
Categorize(/*prefix=*/"", mTolerance, bow, categories);
document.Features.SetFeatureValue("NumCategories", categories.Count.ToString());
for (int i = 0; i < categories.Count; i++)
{
document.Features.SetFeatureValue("Category" + i, categories[i]);
}
}
catch (Exception exception)
{
mLogger.Error("ProcessDocument", exception);
}
}
示例5: ProcessDocument
/*protected*/
public override void ProcessDocument(Document document)
{
string contentType = document.Features.GetFeatureValue("contentType");
if (contentType != "Text") { return; }
StringBuilder strBuilder = new StringBuilder();
try
{
TextBlock[] blocks = document.GetAnnotatedBlocks(mBlockSelector);
foreach (TextBlock block in blocks)
{
strBuilder.AppendLine(block.Text);
}
string text = strBuilder.ToString();
if (text.Length >= mMinTextLen)
{
LanguageProfile langProfile = mLanguageDetector.DetectLanguage(text);
if (langProfile != null)
{
document.Features.SetFeatureValue("detectedLanguage", langProfile.Language.ToString());
}
}
if (text.Length > 0)
{
document.Features.SetFeatureValue("detectedCharRange", TextMiningUtils.GetCharRange(text));
}
}
catch (Exception exception)
{
mLogger.Error("ProcessDocument", exception);
}
}
示例6: ProcessDocument
protected override void ProcessDocument(Document document)
{
TextBlock[] textBlocks = document.GetAnnotatedBlocks(SRC_ANNOT_TYPE);
foreach (TextBlock textBlock in textBlocks)
{
// do tokenization, add annotations to document
mTokenizer.Text = textBlock.Text;
for (RegexTokenizer.Enumerator e = (RegexTokenizer.Enumerator)mTokenizer.GetEnumerator(); e.MoveNext(); )
{
//Console.WriteLine("{0} {1} {2}", textBlock.SpanStart + e.CurrentTokenIdx, textBlock.SpanStart + e.CurrentTokenIdx + e.Current.Length - 1, e.Current);
Annotation annot = new Annotation(textBlock.SpanStart + e.CurrentTokenIdx, textBlock.SpanStart + e.CurrentTokenIdx + e.Current.Length - 1, DEST_ANNOT_TYPE);
document.AddAnnotation(annot);
}
}
}
示例7: ProcessDocument
/*protected*/
public override void ProcessDocument(Document document)
{
string contentType = document.Features.GetFeatureValue("contentType");
if (contentType != "Text") { return; }
try
{
TextBlock[] textBlocks = document.GetAnnotatedBlocks(mBlockSelector);
foreach (TextBlock textBlock in textBlocks)
{
ArrayList<Annotation> annotations = mAnnotator.ExtractEntities(textBlock.Text, /*offset=*/textBlock.SpanStart);
document.AddAnnotations(annotations);
}
}
catch (Exception exception)
{
mLogger.Error("ProcessDocument", exception);
}
}
示例8: ProcessDocument
public override void ProcessDocument(Document document)
{
string contentType = document.Features.GetFeatureValue("contentType");
if (contentType != "Text") { return; }
try
{
StringBuilder text = new StringBuilder(document.Name);
TextBlock[] blocks = document.GetAnnotatedBlocks(mBlockSelector);
foreach (TextBlock block in blocks) { text.AppendLine(block.Text); }
SparseVector<double> bow = mBowSpace.ProcessDocument(text.ToString());
Prediction<int> p = mClassifier.Predict(bow);
double nrmDist = p.BestScore / (2.0 * (p.BestClassLabel > 0.0 ? mAvgDistPos : mAvgDistNeg));
document.Features.SetFeatureValue("pumpIndex", nrmDist.ToString());
}
catch (Exception e)
{
mLogger.Error("ProcessDocument", e);
}
}
示例9: ProcessDocument
/*protected*/
public override void ProcessDocument(Document document)
{
string contentType = document.Features.GetFeatureValue("contentType");
if (contentType != "Text") { return; }
try
{
TextBlock[] textBlocks = document.GetAnnotatedBlocks(mBlockSelector);
foreach (TextBlock textBlock in textBlocks)
{
mTokenizer.Text = textBlock.Text;
for (RegexTokenizer.Enumerator e = (RegexTokenizer.Enumerator)mTokenizer.GetEnumerator(); e.MoveNext();)
{
document.AddAnnotation(new Annotation(textBlock.SpanStart + e.CurrentTokenIdx, textBlock.SpanStart + e.CurrentTokenIdx + e.Current.Length - 1, "Token"));
}
}
}
catch (Exception exception)
{
mLogger.Error("ProcessDocument", exception);
}
}
示例10: ProcessDocument
/*protected*/
public override void ProcessDocument(Document document)
{
string contentType = document.Features.GetFeatureValue("contentType");
if (contentType != "Text") { return; }
try
{
TextBlock[] textBlocks = document.GetAnnotatedBlocks(mBlockSelector);
foreach (TextBlock textBlock in textBlocks)
{
if (mType == Type.PorterStemmer || mType == Type.Both)
{
textBlock.Annotation.Features.SetFeatureValue("stem", mStemmer.GetStem(textBlock.Text));
}
if (mType == Type.RdrLemmatizer || mType == Type.Both)
{
textBlock.Annotation.Features.SetFeatureValue("lemma", mLemmatizer.GetStem(textBlock.Text));
}
}
}
catch (Exception exception)
{
mLogger.Error("ProcessDocument", exception);
}
}
示例11: ProcessDocument
public override void ProcessDocument(Document document)
{
string contentType = document.Features.GetFeatureValue("contentType");
if (contentType != "Text") { return; }
try
{
//******************* Ontology-based check for financial documents
Boolean isFinancial = false; // for setting the feature isFinancial in of the document
foreach (TextBlock s in document.GetAnnotatedBlocks("SentimentObject"))
{
if (!s.Annotation.Type.StartsWith("SentimentObject/GeographicalRegion"))
{
isFinancial = true;
//Console.WriteLine("\nFinancial document: " + s.Annotation.Features.GetFeatureValue("instanceUri"));
break;
}
}
document.Features.SetFeatureValue("isFinancial", isFinancial.ToString()); //add feature isFinancial
}
catch (Exception e)
{
mLogger.Error("ProcessDocument", e);
}
}
示例12: Text
public ActionResult Text(string docId, bool? includeBoilerplate)
{
string fileName;
if (!CheckRequest(docId, out fileName))
{
return View("Error");
}
if (!includeBoilerplate.HasValue)
{
includeBoilerplate = false;
}
Document doc = new Document("", "");
doc.ReadXmlCompressed(fileName);
StringBuilder txt = new StringBuilder();
string selector = includeBoilerplate.Value ? "TextBlock" : "TextBlock/Content";
foreach (TextBlock textBlock in doc.GetAnnotatedBlocks(selector))
{
txt.AppendLine(textBlock.Text);
}
return Content(txt.ToString(), "text/plain");
}
示例13: ProcessDocument
public override void ProcessDocument(Document document)
{
string contentType = document.Features.GetFeatureValue("contentType");
if (contentType != "Text") { return; }
try
{
StringBuilder text = new StringBuilder(document.Name); // *** document title used as part of content (make configurable?)
TextBlock[] blocks = document.GetAnnotatedBlocks(mBlockSelector);
foreach (TextBlock block in blocks) { text.AppendLine(block.Text); }
SparseVector<double> docVec = mBowSpace.ProcessDocument(text.ToString());
ArrayList<string> categories = new ArrayList<string>();
GetPredictedCategories(/*prefix=*/"", mThresh, docVec, categories);
int i = 0;
foreach (string category in categories)
{
string key = "category" + ++i;
document.Features.SetFeatureValue(key, category);
}
}
catch (Exception e)
{
mLogger.Error("ProcessDocument", e);
}
}