本文整理汇总了C#中HtmlAgilityPack.GetElementbyId方法的典型用法代码示例。如果您正苦于以下问题:C# HtmlAgilityPack.GetElementbyId方法的具体用法?C# HtmlAgilityPack.GetElementbyId怎么用?C# HtmlAgilityPack.GetElementbyId使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类HtmlAgilityPack
的用法示例。
在下文中一共展示了HtmlAgilityPack.GetElementbyId方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。
示例1: GetDescription
private string GetDescription(HtmlAgilityPack.HtmlDocument Html)
{
var description = Html.GetElementbyId("productDescription");
if (description == null)
{
try
{
description = Html.GetElementbyId("pd-available").NextSibling.NextSibling;
string javatoparse = description.InnerText;
var frame = GetJavaIFrame(javatoparse);
var htmlcode = WebUtility.UrlDecode(frame);
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(htmlcode);
description = doc.GetElementbyId("productDescription");
}
catch (Exception)
{
}
}
var descriptiontext = "";
if (description != null)
{
descriptiontext = description.InnerText;
}
return descriptiontext;
}
示例2: Validate
public string Validate(HtmlAgilityPack.HtmlDocument document, HtmlStaticizeContext status)
{
var errorMessageBuilder = new StringBuilder();
foreach (var id in this.elementXPath.Keys)
{
String truthXPath = elementXPath[id];
var element = document.GetElementbyId(id);
if (element == null)
{
errorMessageBuilder.AppendFormat("\r\n元素 \"{0}\" 在文档中不存在。", id);
continue;
}
if (element.XPath != truthXPath)
{
errorMessageBuilder.AppendFormat("\r\n元素 \"{0}\" XPath 不匹配,应为\"{1}\",但实际为\"{2}\"。\r\n行号:{3}\r\n源HTML:\r\n{4}\r\n", id, truthXPath, element.XPath, element.Line.ToString(), element.OuterHtml);
continue;
}
}
return errorMessageBuilder.Length == 0 ? null : errorMessageBuilder.ToString();
}
示例3: parsecontent
private static void parsecontent(HttpClient httpClient, string folderpath, HtmlAgilityPack.HtmlDocument htmlDoc, List<string> lstImgurl, string link)
{
int a = link.LastIndexOf("/");
var name = link.Substring(a + 1);
var b = name.IndexOf(".");
name = name.Substring(0, b);
string urlPageName = name;
string resname = Path.Combine(folderpath, urlPageName);
string contenturl = baseurl + link;
string txtfullname = resname + ".txt";
if (File.Exists(txtfullname))
{
Console.WriteLine("page {0} has download =>{1}", link, urlPageName);
return;
}
try
{
var taskget = httpClient.GetStreamAsync(contenturl);
htmlDoc.Load(taskget.Result, Encoding.UTF8);
Console.WriteLine("load html " + contenturl);
}
catch (Exception ex)
{
Console.WriteLine("load html error: " + ex.Message);
return;
}
var contentNode = htmlDoc.GetElementbyId("read_tpc");
var content = contentNode.InnerHtml;
try
{
FileStream fs = File.OpenWrite(txtfullname);
byte[] torbytes = Encoding.UTF8.GetBytes(content);
fs.Write(torbytes, 0, torbytes.Count());
fs.Flush();
fs.Close();
fs.Dispose();
}
catch (Exception ex)
{
Console.WriteLine("get torrent failed! " + ex.Message);
}
int imgIndex = 0;
contentNode.Elements("img").ToList().ForEach(e =>
{
try
{
var imgurl = e.Attributes["src"].Value;
var imgstream = httpClient.GetStreamAsync(imgurl);
var downImgname = resname + "-" + (++imgIndex) + ".jpg";
lstImgurl.Add(imgurl);
FileStream fsimg = File.OpenWrite(downImgname);
imgstream.Result.CopyTo(fsimg);
if (fsimg.Length < 100)
{
return;
}
fsimg.Flush();
fsimg.Close();
fsimg.Dispose();
Console.WriteLine("save img => " + downImgname);
}
catch (Exception ex)
{
Console.WriteLine("get img {0} failed! {1} ", lstImgurl, ex.Message);
}
});
}
示例4: GetTitle
private string GetTitle(HtmlAgilityPack.HtmlDocument Html)
{
var title = Html.GetElementbyId("productTitle");
if (title == null)
{
title = Html.GetElementbyId("btAsinTitle");
}
return title.InnerText.Trim();
}
示例5: updateCraigslistInfoFromFullItemDetailsPage
public void updateCraigslistInfoFromFullItemDetailsPage(HtmlAgilityPack.HtmlDocument htmlDoc, string url)
{
HtmlAgilityPack.HtmlNode time = htmlDoc.DocumentNode.SelectSingleNode("//time");
HtmlAgilityPack.HtmlNode title = htmlDoc.DocumentNode.SelectSingleNode("//h2");
HtmlAgilityPack.HtmlNode bodyElement = htmlDoc.GetElementbyId("userbody");
//
// magic here to trim down the HTML, break it up, analyze it etc
// you can see example HTMl for the pages at URLs like this
// view-source:http://vancouver.en.craigslist.ca/van/bik/3436265260.html - no images
// view-source:http://vancouver.en.craigslist.ca/rds/bik/3451242524.html - got images
//
//
// really pikey bit of brittle code here - but had some real difficulty parsing the image
// URL out of the img tags using the agility pack. Kept getting null pointers and gave up after a couple of hours.
// TODO: ervert back ToString parsing the <img tags as iterator's a better approach
//
if (bodyElement.InnerText.Contains("imgList ="))
{
string scriptText = bodyElement.InnerText.Substring(bodyElement.InnerText.IndexOf("imgList =")).Replace("\"", "").Replace("\n", String.Empty).Replace("\r", String.Empty);
string[] images = scriptText.Substring(scriptText.IndexOf('[') + 1, (scriptText.IndexOf(']') - scriptText.IndexOf('[')) - 1).Split(',');
this.Images = images.ToList();
}
//
// title is on the format
// <h2 class="postingtitle">2008 Kona Dawg - $1200 (Delta, BC)</h2>
//
this.Title = title.InnerText;
this.LinkURL = url;
if (time != null)
{
this.DatePosted = time.InnerText;
}
this.DescriptionHTML = bodyElement.InnerText.Replace("\t", String.Empty).Replace("\n", String.Empty); ;
this.PhoneNumber = this.extractPhoneNumber(bodyElement);
this.calculateDodginessScore(bodyElement);
}