本文整理汇总了C#中HtmlAgilityPack.LoadHtml方法的典型用法代码示例。如果您正苦于以下问题:C# HtmlAgilityPack.LoadHtml方法的具体用法?C# HtmlAgilityPack.LoadHtml怎么用?C# HtmlAgilityPack.LoadHtml使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类HtmlAgilityPack
的用法示例。
在下文中一共展示了HtmlAgilityPack.LoadHtml方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。
示例1: get_wenti
private void get_wenti(HtmlNodeCollection dorlist, HtmlAgilityPack.HtmlDocument dorducoment, int k)
{
HtmlNode question_page;
HtmlNode ht1 = null;
HtmlNode ht2 = null;
HtmlNode ht3 = null;
String sql = "";
string wenti = "";
string desc = "";
//多个回答
try
{
string dordetailurl = dorlist[k].Attributes["href"].Value;//医生详细页面URL
string dordetailstring = "";
string wenti_bianhao = dordetailurl.Substring(31).Split('.')[0];
bool exist = false;
question_list.TryGetValue(wenti_bianhao, out exist);
if (exist)
{
return;
}
int trynum = 0;
while (trynum <= 5)
{
trynum++;
try
{
dordetailstring = CreateGetHttpResponse(dordetailurl, http_timeout, null, null);
//dordetailstring = dordetail.DownloadString(dordetailurl);
}
catch (System.Exception ex)
{
}
if (dordetailstring.Length > 99)
{
break;
}
Thread.Sleep(trynum * 1000);
}
if (dordetailstring.Length < 100)
{
File.AppendAllText(log_file1, "[ID = " + doc_num.ToString("D2") + "] "
+ "访问到该问题的网址无效:" + dordetailurl);
File.AppendAllText(log_file1, Environment.NewLine);
return;
}
dorducoment.LoadHtml(dordetailstring);
question_page = dorducoment.DocumentNode;
//获取问题和描述
ht1 = question_page.SelectSingleNode("//div[@class='b_askti']");
if (ht1 == null)
{
return;
}
ht2 = ht1.SelectSingleNode("h1");
wenti = ht2.InnerText.Trim().Replace("‘", " ").Replace("'", " ");
ht1 = question_page.SelectSingleNode("//div[@class='b_askcont']");
if (ht1 != null)
{
ht2 = ht1.SelectNodes("//p[@class='crazy_new']")[0];
desc = ht2.InnerText.Trim().Replace("‘", " ").Replace("'", " ");
}
//获取回答的网页
HtmlNodeCollection ht_collect = question_page.SelectNodes("//div[@class='crazy_new']");
Random rd = new Random();
//获取问题分类并将问题加入数据库
int cid = 0, cid1 = 0, cid2 = 0, cid3 = 0;
string dept = "";
if (get_questiong_category(question_page, ref dept, false))
{
get_category(dept, ref cid, ref cid1, ref cid2, ref cid3);
}
sql = "INSERT INTO ask_question set "
+ "cid='" + cid + "'"
+ ",cid1='" + cid1 + "'"
+ ",cid2='" + cid2 + "'"
+ ",cid3='" + cid3 + "'"
+ ",author='游客'"
+ ",authorid=433"
+ ",answers='" + ht_collect.Count.ToString() + "'"
+ ",title='" + (wenti) + "'"
+ ",description='" + (desc) + "'"
+ ",views='" + rd.Next(1000,10000000).ToString("D") + "'"
+ ",supply='" + "0" + "'"
+ ";commit;\n";
sql_exec(sql);
question_list.TryGetValue(wenti_bianhao, out exist);
if (!exist)
//.........这里部分代码省略.........
示例2: GetNextInSeriesTitle2
/// <summary>
/// Search Shelfari for series info, scrape series page, and return next title in series.
/// </summary>
/// <param name="searchHtmlDoc">Book's Shelfari page, pre-downloaded</param>
private string GetNextInSeriesTitle2(HtmlAgilityPack.HtmlDocument searchHtmlDoc)
{
bool hasSeries = false;
string series = "";
string seriesShort = "";
string seriesURL = "";
int currentSeriesIndex = 0;
int currentSeriesCount = 0;
string nextTitle = "";
//Check if book's Shelfari page contains series info
HtmlAgilityPack.HtmlNode node = searchHtmlDoc.DocumentNode.SelectSingleNode("//span[@class='series']");
if (node != null)
{
//Series name and book number
series = node.InnerText.Trim();
//Convert book number string to integer
Int32.TryParse(series.Substring(series.LastIndexOf(" ") + 1), out currentSeriesIndex);
//Parse series Shelfari URL
seriesURL = node.SelectSingleNode("//span[@class='series']/a[@href]")
.GetAttributeValue("href", "");
seriesShort = node.FirstChild.InnerText.Trim();
//Add series name and book number to log, if found
searchHtmlDoc.LoadHtml(HttpDownloader.GetPageHtml(String.Format(seriesURL)));
//Parse number of books in series and convert to integer
node = searchHtmlDoc.DocumentNode.SelectSingleNode("//h2[@class='f_m']");
string test = node.FirstChild.InnerText.Trim();
Match match = Regex.Match(test, @"\d+");
if (match.Success)
Int32.TryParse(match.Value, out currentSeriesCount);
hasSeries = true;
//Check if there is a next book
if (currentSeriesIndex < currentSeriesCount)
{
//Add series name and book number to log, if found
main.Log(String.Format("This is book {0} of {1} in the {2} Series...",
currentSeriesIndex, currentSeriesCount, seriesShort));
foreach (HtmlAgilityPack.HtmlNode seriesItem in
searchHtmlDoc.DocumentNode.SelectNodes(".//ol/li"))
{
node = seriesItem.SelectSingleNode(".//div/span[@class='series bold']");
if (node != null)
if (node.InnerText.Contains((currentSeriesIndex + 1).ToString()))
{
node = seriesItem.SelectSingleNode(".//h3/a");
//Parse title of the next book
nextTitle = node.InnerText.Trim();
//Add next book in series to log, if found
main.Log(String.Format("The next book in this series is {0}!", nextTitle));
return nextTitle;
}
}
}
if (hasSeries)
return "";
}
return "";
}
示例3: get_doc
private void get_doc(HtmlNodeCollection dorlist, HtmlAgilityPack.HtmlDocument dorducoment, int k, string hos_jx)
{
HtmlNode dor;
HtmlNode ht1 = null;
HtmlNode ht2 = null;
HtmlNode ht3 = null;
HtmlNodeCollection hts = null;
string doc_key = "";
string name = "";
string dept = "";
string hos = "";
string skill = "";
string word = "";
string img = "";
string zhicheng = "";
string dordetailurl = "";
string dordetailstring = "";
try
{
dordetailurl = dorlist[k].SelectNodes("div")[0].
SelectSingleNode("a").Attributes["href"].Value;//医生详细页面URL
dordetailurl = dordetailurl.Replace('\t', ' ');
dordetailurl = dordetailurl.Replace('\r', ' ');
dordetailurl = dordetailurl.Replace('\n', ' ');
dordetailurl = dordetailurl.Replace(" ", "");
//http://www.guahao.com/expert/9a722630-0d6f-446f-9b4f-4bf89d54fee8000
doc_key = dordetailurl.Substring(29);
bool exist = false;
doc_list.TryGetValue(doc_key, out exist);
if (exist)
{
return;
}
WebClient wc = new WebClient();
wc.Encoding = Encoding.UTF8;
int i = 0;
while (i < 6)
{
try
{
dordetailstring = wc.DownloadString(dordetailurl);
}
catch (System.Exception ex)
{
}
if (dordetailstring.Length > 99)
{
break;
}
Thread.Sleep(i * 1000);
i++;
}
if (dordetailstring.Length < 2)
{
//MessageBox.Show("获取网页失败");
}
dorducoment.LoadHtml(dordetailstring);
dor = dorducoment.DocumentNode;
ht1 = dor.SelectSingleNode("//div[@class='detail word-break']");
if (ht1 != null)
{
name = ht1.SelectSingleNode("h1").SelectSingleNode("strong").InnerText;
HtmlNode tmp = ht1.SelectSingleNode("h1").SelectSingleNode("span");
if (tmp != null)
{
zhicheng = tmp.InnerText;
}
}
hts = dor.SelectNodes("/html/body/div/div/div/div/section/div/div/div/p/a[@target='_blank']");
if (hts != null)
{
if (hts.Count >= 2)
{
dept = hts[1].InnerText.Replace("\n", string.Empty).Replace("\r", string.Empty).Trim();
hos = hts[0].InnerText.Replace("\n", string.Empty).Replace("\r", string.Empty).Trim();
}
else
{
//MessageBox.Show("error");
}
}
else
{
//MessageBox.Show("error");
}
ht1 = dor.SelectSingleNode("//div[@class='detail word-break']");
ht2 = ht1.SelectSingleNode("//div[@class='goodat']");
ht3 = ht2.SelectSingleNode("span");
if (ht3 != null)
//.........这里部分代码省略.........