当前位置: 首页>>代码示例>>C#>>正文


C# Document.GetAnnotatedBlocks方法代码示例

本文整理汇总了C#中Document.GetAnnotatedBlocks方法的典型用法代码示例。如果您正苦于以下问题:C# Document.GetAnnotatedBlocks方法的具体用法?C# Document.GetAnnotatedBlocks怎么用?C# Document.GetAnnotatedBlocks使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Document的用法示例。


在下文中一共展示了Document.GetAnnotatedBlocks方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。

示例1: ProcessDocument

 /*protected*/
 public override void ProcessDocument(Document document)
 {
     string contentType = document.Features.GetFeatureValue("contentType");
     if (contentType != "Text") { return; }
     try
     {
         if (mTokenGroupSelector == null)
         {
             TextBlock[] textBlocks = document.GetAnnotatedBlocks(mBlockSelector);
             ProcessTokens(textBlocks);
         }
         else
         {
             document.CreateAnnotationIndex();
             TextBlock[] tokenGroups = document.GetAnnotatedBlocks(mTokenGroupSelector);
             foreach (TextBlock tokenGroup in tokenGroups)
             {
                 TextBlock[] textBlocks = document.GetAnnotatedBlocks(mBlockSelector, tokenGroup.SpanStart, tokenGroup.SpanEnd);
                 ProcessTokens(textBlocks);
             }
         }
     }
     catch (Exception exception)
     {
         mLogger.Error("ProcessDocument", exception);
     }
 }
开发者ID:viidea,项目名称:latino,代码行数:28,代码来源:PosTaggerComponent.cs

示例2: ProcessDocument

 public override void ProcessDocument(Document document)
 {
     string contentType = document.Features.GetFeatureValue("contentType");
     if (contentType != "Text") { return; }
     try
     {
         document.CreateAnnotationIndex();
         EntityRecognitionEngine.Document erDoc = new EntityRecognitionEngine.Document();
         foreach (TextBlock tb in document.GetAnnotatedBlocks(mBlockSelector))
         {
             erDoc.BeginNewTextBlock();
             foreach (TextBlock s in document.GetAnnotatedBlocks("Sentence", tb.SpanStart, tb.SpanEnd)) // *** sentence selector hardcoded
             {
                 ArrayList<string> tokens = new ArrayList<string>();
                 ArrayList<string> posTags = new ArrayList<string>();
                 ArrayList<int> spanInfo = new ArrayList<int>();
                 foreach (TextBlock token in document.GetAnnotatedBlocks("Token", s.SpanStart, s.SpanEnd)) // *** token selector hardcoded
                 {
                     tokens.Add(token.Text);
                     posTags.Add(token.Annotation.Features.GetFeatureValue("posTag")); // *** POS tag feature name hardcoded
                     spanInfo.Add(token.SpanStart);
                 }
                 erDoc.AddSentence(tokens, spanInfo, posTags);
             }
         }
         ArrayList<Pair<int, int>> spans;
         ArrayList<string> entities = erDoc.DiscoverEntities(mEntityRecognitionEngine, out spans);
         int i = 0;
         foreach (string gazetteerUri in entities)
         {
             string instanceUri = mEntityRecognitionEngine.GetIdentifiedInstance(gazetteerUri);
             if (instanceUri != null)
             {
                 string annotationName = GetAnnotationName(mEntityRecognitionEngine.GetInstanceClassPath(instanceUri));
                 Annotation annotation = new Annotation(spans[i].First, spans[i].Second, annotationName);
                 document.AddAnnotation(annotation);
                 annotation.Features.SetFeatureValue("gazetteerUri", gazetteerUri);
                 annotation.Features.SetFeatureValue("instanceUri", instanceUri);
                 annotation.Features.SetFeatureValue("instanceClassUri", mEntityRecognitionEngine.GetInstanceClass(instanceUri));
                 // TODO: instanceLabel, instanceClassLabel
             }
             i++;
         }
     }
     catch (Exception exception)
     {
         mLogger.Error("ProcessDocument", exception);
     }
 }
开发者ID:project-first,项目名称:latinoworkflows,代码行数:49,代码来源:EntityRecognitionComponent.cs

示例3: ProcessDocument

 /*protected*/
 public override void ProcessDocument(Document document)
 {
     string contentType = document.Features.GetFeatureValue("contentType");
     if (contentType != "Text") { return; }
     try
     {
         TextBlock[] blocks = document.GetAnnotatedBlocks(mBlockSelector);
         foreach (TextBlock block in blocks)
         {
             OpenNLP.Tools.Util.Pair<int, int>[] positions;
             string[] sentences = mSentenceDetector.SentenceDetect(block.Text, out positions);
             int i = 0;
             foreach (OpenNLP.Tools.Util.Pair<int, int> pos in positions)
             {
                 int startTrimOffset, endTrimOffset;
                 GetTrimOffsets(sentences[i], out startTrimOffset, out endTrimOffset);
                 int startIdx = block.SpanStart + pos.FirstValue + startTrimOffset;
                 int endIdx = block.SpanStart + pos.FirstValue + (pos.SecondValue - 1) - endTrimOffset;
                 if (endIdx >= startIdx)
                 {
                     document.AddAnnotation(new Annotation(startIdx, endIdx, "Sentence"));
                 }
                 i++;
             }
         }
     }
     catch (Exception exception)
     {
         mLogger.Error("ProcessDocument", exception);
     }
 }
开发者ID:viidea,项目名称:latino,代码行数:32,代码来源:SentenceSplitterComponent.cs

示例4: ProcessDocument

 public override void ProcessDocument(Document document)
 {
     string contentType = document.Features.GetFeatureValue("contentType");
     if (contentType != "Text") { return; }
     try
     {
         StringBuilder txt = new StringBuilder();
         foreach (TextBlock tb in document.GetAnnotatedBlocks(mBlockSelector))
         {
             txt.AppendLine(tb.Text);
         }
         SparseVector<double> bow = mBowSpace.ProcessDocument(txt.ToString());
         ArrayList<string> categories = new ArrayList<string>();
         Categorize(/*prefix=*/"", mTolerance, bow, categories);
         document.Features.SetFeatureValue("NumCategories", categories.Count.ToString());
         for (int i = 0; i < categories.Count; i++)
         {
             document.Features.SetFeatureValue("Category" + i, categories[i]);
         }
     }
     catch (Exception exception)
     {
         mLogger.Error("ProcessDocument", exception);
     }
 }
开发者ID:project-first,项目名称:latinoworkflows,代码行数:25,代码来源:DocumentCategorizerComponent.cs

示例5: ProcessDocument

 /*protected*/
 public override void ProcessDocument(Document document)
 {
     string contentType = document.Features.GetFeatureValue("contentType");
     if (contentType != "Text") { return; }
     StringBuilder strBuilder = new StringBuilder();
     try
     {
         TextBlock[] blocks = document.GetAnnotatedBlocks(mBlockSelector);
         foreach (TextBlock block in blocks)
         {
             strBuilder.AppendLine(block.Text);
         }
         string text = strBuilder.ToString();
         if (text.Length >= mMinTextLen)
         {
             LanguageProfile langProfile = mLanguageDetector.DetectLanguage(text);
             if (langProfile != null)
             {
                 document.Features.SetFeatureValue("detectedLanguage", langProfile.Language.ToString());
             }
         }
         if (text.Length > 0)
         {
             document.Features.SetFeatureValue("detectedCharRange", TextMiningUtils.GetCharRange(text));
         }
     }
     catch (Exception exception)
     {
         mLogger.Error("ProcessDocument", exception);
     }
 }
开发者ID:project-first,项目名称:latinoworkflows,代码行数:32,代码来源:LanguageDetectorComponent.cs

示例6: ProcessDocument

 protected override void ProcessDocument(Document document)
 {
     TextBlock[] textBlocks = document.GetAnnotatedBlocks(SRC_ANNOT_TYPE);
     foreach (TextBlock textBlock in textBlocks)
     {
         // do tokenization, add annotations to document
         mTokenizer.Text = textBlock.Text;
         for (RegexTokenizer.Enumerator e = (RegexTokenizer.Enumerator)mTokenizer.GetEnumerator(); e.MoveNext(); )
         {
             //Console.WriteLine("{0} {1} {2}", textBlock.SpanStart + e.CurrentTokenIdx, textBlock.SpanStart + e.CurrentTokenIdx + e.Current.Length - 1, e.Current);
             Annotation annot = new Annotation(textBlock.SpanStart + e.CurrentTokenIdx, textBlock.SpanStart + e.CurrentTokenIdx + e.Current.Length - 1, DEST_ANNOT_TYPE);
             document.AddAnnotation(annot);
         }
     }
 }
开发者ID:viidea,项目名称:latino,代码行数:15,代码来源:RegexTokenizerComponent.cs

示例7: ProcessDocument

 /*protected*/
 public override void ProcessDocument(Document document)
 {
     string contentType = document.Features.GetFeatureValue("contentType");
     if (contentType != "Text") { return; }
     try
     {
         TextBlock[] textBlocks = document.GetAnnotatedBlocks(mBlockSelector);
         foreach (TextBlock textBlock in textBlocks)
         {
             ArrayList<Annotation> annotations = mAnnotator.ExtractEntities(textBlock.Text, /*offset=*/textBlock.SpanStart);
             document.AddAnnotations(annotations);
         }
     }
     catch (Exception exception)
     {
         mLogger.Error("ProcessDocument", exception);
     }
 }
开发者ID:viidea,项目名称:latino,代码行数:19,代码来源:SemanticAnnotationComponent.cs

示例8: ProcessDocument

 public override void ProcessDocument(Document document)
 {
     string contentType = document.Features.GetFeatureValue("contentType");
     if (contentType != "Text") { return; }
     try
     {
         StringBuilder text = new StringBuilder(document.Name);
         TextBlock[] blocks = document.GetAnnotatedBlocks(mBlockSelector);
         foreach (TextBlock block in blocks) { text.AppendLine(block.Text); }
         SparseVector<double> bow = mBowSpace.ProcessDocument(text.ToString());
         Prediction<int> p = mClassifier.Predict(bow);
         double nrmDist = p.BestScore / (2.0 * (p.BestClassLabel > 0.0 ? mAvgDistPos : mAvgDistNeg));
         document.Features.SetFeatureValue("pumpIndex", nrmDist.ToString());
     }
     catch (Exception e)
     {
         mLogger.Error("ProcessDocument", e);
     }
 }
开发者ID:project-first,项目名称:monitorpipeline,代码行数:19,代码来源:PumpIndexComponent.cs

示例9: ProcessDocument

 /*protected*/
 public override void ProcessDocument(Document document)
 {
     string contentType = document.Features.GetFeatureValue("contentType");
     if (contentType != "Text") { return; }
     try
     {
         TextBlock[] textBlocks = document.GetAnnotatedBlocks(mBlockSelector);
         foreach (TextBlock textBlock in textBlocks)
         {
             mTokenizer.Text = textBlock.Text;
             for (RegexTokenizer.Enumerator e = (RegexTokenizer.Enumerator)mTokenizer.GetEnumerator(); e.MoveNext();)
             {
                 document.AddAnnotation(new Annotation(textBlock.SpanStart + e.CurrentTokenIdx, textBlock.SpanStart + e.CurrentTokenIdx + e.Current.Length - 1, "Token"));
             }
         }
     }
     catch (Exception exception)
     {
         mLogger.Error("ProcessDocument", exception);
     }
 }
开发者ID:project-first,项目名称:latinoworkflows,代码行数:22,代码来源:RegexTokenizerComponent.cs

示例10: ProcessDocument

 /*protected*/
 public override void ProcessDocument(Document document)
 {
     string contentType = document.Features.GetFeatureValue("contentType");
     if (contentType != "Text") { return; }
     try
     {
         TextBlock[] textBlocks = document.GetAnnotatedBlocks(mBlockSelector);
         foreach (TextBlock textBlock in textBlocks)
         {
             if (mType == Type.PorterStemmer || mType == Type.Both)
             {
                 textBlock.Annotation.Features.SetFeatureValue("stem", mStemmer.GetStem(textBlock.Text));
             }
             if (mType == Type.RdrLemmatizer || mType == Type.Both)
             {
                 textBlock.Annotation.Features.SetFeatureValue("lemma", mLemmatizer.GetStem(textBlock.Text));
             }
         }
     }
     catch (Exception exception)
     {
         mLogger.Error("ProcessDocument", exception);
     }
 }
开发者ID:viidea,项目名称:latino,代码行数:25,代码来源:EnglishLemmatizerComponent.cs

示例11: ProcessDocument

 public override void ProcessDocument(Document document)
 {
     string contentType = document.Features.GetFeatureValue("contentType");
     if (contentType != "Text") { return; }
     try
     {
         //******************* Ontology-based check for financial documents
         Boolean isFinancial = false;                                     // for setting the feature isFinancial in of the document
         foreach (TextBlock s in document.GetAnnotatedBlocks("SentimentObject"))
         {
             if (!s.Annotation.Type.StartsWith("SentimentObject/GeographicalRegion"))
             {
                 isFinancial = true;
                 //Console.WriteLine("\nFinancial document: " + s.Annotation.Features.GetFeatureValue("instanceUri"));
                 break;
             }
         }
         document.Features.SetFeatureValue("isFinancial", isFinancial.ToString());    //add feature isFinancial
     }
     catch (Exception e)
     {
         mLogger.Error("ProcessDocument", e);
     }
 }
开发者ID:project-first,项目名称:monitorpipeline,代码行数:24,代码来源:OntologyCategorizerComponent.cs

示例12: Text

 public ActionResult Text(string docId, bool? includeBoilerplate)
 {
     string fileName;
     if (!CheckRequest(docId, out fileName))
     {
         return View("Error");
     }
     if (!includeBoilerplate.HasValue)
     {
         includeBoilerplate = false;
     }
     Document doc = new Document("", "");
     doc.ReadXmlCompressed(fileName);
     StringBuilder txt = new StringBuilder();
     string selector = includeBoilerplate.Value ? "TextBlock" : "TextBlock/Content";
     foreach (TextBlock textBlock in doc.GetAnnotatedBlocks(selector))
     {
         txt.AppendLine(textBlock.Text);
     }
     return Content(txt.ToString(), "text/plain");
 }
开发者ID:project-first,项目名称:documentviewerservice,代码行数:21,代码来源:DocumentViewerController.cs

示例13: ProcessDocument

 public override void ProcessDocument(Document document)
 {
     string contentType = document.Features.GetFeatureValue("contentType");
     if (contentType != "Text") { return; }
     try
     {
         StringBuilder text = new StringBuilder(document.Name); // *** document title used as part of content (make configurable?)
         TextBlock[] blocks = document.GetAnnotatedBlocks(mBlockSelector);
         foreach (TextBlock block in blocks) { text.AppendLine(block.Text); }
         SparseVector<double> docVec = mBowSpace.ProcessDocument(text.ToString());
         ArrayList<string> categories = new ArrayList<string>();
         GetPredictedCategories(/*prefix=*/"", mThresh, docVec, categories);
         int i = 0;
         foreach (string category in categories)
         {
             string key = "category" + ++i;
             document.Features.SetFeatureValue(key, category);
         }
     }
     catch (Exception e)
     {
         mLogger.Error("ProcessDocument", e);
     }
 }
开发者ID:project-first,项目名称:monitorpipeline,代码行数:24,代码来源:DocumentCategorizerComponent.cs


注:本文中的Document.GetAnnotatedBlocks方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。