本文整理汇总了C#中System.Xml.Linq.XDocument.GetElementById方法的典型用法代码示例。如果您正苦于以下问题:C# XDocument.GetElementById方法的具体用法?C# XDocument.GetElementById怎么用?C# XDocument.GetElementById使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类System.Xml.Linq.XDocument
的用法示例。
在下文中一共展示了XDocument.GetElementById方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。
示例1: AppendNextPage
/// <summary>
/// Recursively appends subsequent pages of a multipage article.
/// </summary>
/// <param name="document">Compiled document</param>
/// <param name="url">Url of current page</param>
private void AppendNextPage(XDocument document, string url)
{
_curPageNum++;
var contentDiv = document.GetElementById("readInner");
if (_curPageNum > _MaxPages)
{
url = "<div style='text-align: center'><a href='" + url + "'>View Next Page</a></div>";
contentDiv.Add(XDocument.Parse(url));
return;
}
string nextContent = _urlFetcher.Fetch(url);
if (string.IsNullOrEmpty(nextContent))
{
return;
}
bool mainContentExtracted;
string extractedTitle;
string nextPageLink;
var nextDocument = _transcoder.TranscodeToXml(nextContent, url, out mainContentExtracted, out extractedTitle, out nextPageLink);
var nextInner = nextDocument.GetElementById("readInner");
var header = nextInner.Element("h1");
if (header != null)
{
header.Remove();
}
/*
* Anti-duplicate mechanism. Essentially, get the first paragraph of our new page.
* Compare it against all of the the previous document's we've gotten. If the previous
* document contains exactly the innerHTML of this first paragraph, it's probably a duplicate.
*/
var firstP = nextInner.GetElementsByTagName("p").Count() > 0 ? nextInner.GetElementsByTagName("p").First() : null;
if (firstP != null && firstP.GetInnerHtml().Length > 100)
{
//string innerHtml = firstP.GetInnerHtml();
//var existingContent = contentDiv.GetInnerHtml();
//existingContent = Regex.Replace(existingContent, "xmlns(:[a-z]+)?=['\"][^'\"]+['\"]", "", RegexOptions.IgnoreCase);
//existingContent = Regex.Replace(existingContent, @"\s+", "");
//innerHtml = Regex.Replace(innerHtml, @"\s+", "");
// TODO: This test could probably be improved to compare the actual markup.
string existingContent = contentDiv.Value;
string innerHtml = firstP.Value;
if (!string.IsNullOrEmpty(existingContent) && !string.IsNullOrEmpty(innerHtml) && existingContent.IndexOf(innerHtml) != -1)
{
_parsedPages.Add(url);
return;
}
}
/* Add the content to the existing html */
var nextDiv = new XElement("div");
if (_pageSeparatorBuilder != null)
{
nextDiv.SetInnerHtml(_pageSeparatorBuilder(_curPageNum));
}
nextDiv.SetId(_PageIdPrefix + _curPageNum);
nextDiv.SetClass("page");
nextDiv.Add(nextInner.Nodes());
contentDiv.Add(nextDiv);
_parsedPages.Add(url);
/* Only continue if we haven't already seen the next page page */
if (!string.IsNullOrEmpty(nextPageLink) && !_parsedPages.Contains(nextPageLink))
{
AppendNextPage(document, nextPageLink);
}
}
示例2: TryFindArticleContentElement
private static XElement TryFindArticleContentElement(XDocument document, string articleContentElementHint)
{
if (document == null)
{
throw new ArgumentNullException("document");
}
if (string.IsNullOrEmpty(articleContentElementHint))
{
throw new ArgumentException("Argument can't be null nor empty.", "articleContentElementHint");
}
return document.GetElementById(articleContentElementHint);
//针对tag名称或者内容节点,只对html5页面有效。目前国内网站使用div id过滤效果更好
//return document
// .GetElementsByTagName(articleContentElementHint)
// .FirstOrDefault();
}