当前位置: 首页>>代码示例>>C#>>正文


C# HtmlAgilityPack.LoadHtml方法代码示例

本文整理汇总了C#中HtmlAgilityPack.LoadHtml方法的典型用法代码示例。如果您正苦于以下问题:C# HtmlAgilityPack.LoadHtml方法的具体用法?C# HtmlAgilityPack.LoadHtml怎么用?C# HtmlAgilityPack.LoadHtml使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在HtmlAgilityPack的用法示例。


在下文中一共展示了HtmlAgilityPack.LoadHtml方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。

示例1: get_wenti

        private void get_wenti(HtmlNodeCollection dorlist, HtmlAgilityPack.HtmlDocument dorducoment, int k)
        {
            HtmlNode question_page;
            HtmlNode ht1 = null;
            HtmlNode ht2 = null;
            HtmlNode ht3 = null;

            String sql = "";
            string wenti = "";
            string desc = "";
            //多个回答

            try
            {
                string dordetailurl = dorlist[k].Attributes["href"].Value;//医生详细页面URL
                string dordetailstring = "";

                string wenti_bianhao = dordetailurl.Substring(31).Split('.')[0];

                bool exist = false;
                question_list.TryGetValue(wenti_bianhao, out exist);
                if (exist)
                {
                    return;
                }

                int trynum = 0;
                while (trynum <= 5)
                {
                    trynum++;
                    try
                    {
                        dordetailstring = CreateGetHttpResponse(dordetailurl, http_timeout, null, null);
                        //dordetailstring = dordetail.DownloadString(dordetailurl);
                    }
                    catch (System.Exception ex)
                    {
                    }
                    if (dordetailstring.Length > 99)
                    {
                        break;
                    }
                    Thread.Sleep(trynum * 1000);
                }

                if (dordetailstring.Length < 100)
                {
                    File.AppendAllText(log_file1, "[ID = " + doc_num.ToString("D2") + "] "
                                        + "访问到该问题的网址无效:" + dordetailurl);
                    File.AppendAllText(log_file1, Environment.NewLine);
                    return;
                }
                dorducoment.LoadHtml(dordetailstring);
                question_page = dorducoment.DocumentNode;

                //获取问题和描述
                ht1 = question_page.SelectSingleNode("//div[@class='b_askti']");
                if (ht1 == null)
                {
                    return;
                }
                ht2 = ht1.SelectSingleNode("h1");
                wenti = ht2.InnerText.Trim().Replace("‘", " ").Replace("'", " ");

                ht1 = question_page.SelectSingleNode("//div[@class='b_askcont']");
                if (ht1 != null)
                {
                    ht2 = ht1.SelectNodes("//p[@class='crazy_new']")[0];
                    desc = ht2.InnerText.Trim().Replace("‘", " ").Replace("'", " ");
                }

                //获取回答的网页
                HtmlNodeCollection ht_collect = question_page.SelectNodes("//div[@class='crazy_new']");
                Random rd = new Random();

                //获取问题分类并将问题加入数据库
                int cid = 0, cid1 = 0, cid2 = 0, cid3 = 0;
                string dept = "";
                if (get_questiong_category(question_page, ref dept, false))
                {
                    get_category(dept, ref cid, ref cid1, ref cid2, ref cid3);
                }

                sql = "INSERT INTO ask_question set "
                    + "cid='" + cid + "'"
                    + ",cid1='" + cid1 + "'"
                    + ",cid2='" + cid2 + "'"
                    + ",cid3='"  + cid3 + "'"
                    + ",author='游客'"
                    + ",authorid=433"
                    + ",answers='" + ht_collect.Count.ToString() + "'"
                    + ",title='" +  (wenti) + "'"
                    + ",description='" +  (desc) + "'"
                    + ",views='" + rd.Next(1000,10000000).ToString("D") + "'"
                    + ",supply='" + "0" + "'"
                    + ";commit;\n";
                sql_exec(sql);

                question_list.TryGetValue(wenti_bianhao, out exist);
                if (!exist)
//.........这里部分代码省略.........
开发者ID:c-ber,项目名称:cber,代码行数:101,代码来源:Form1.cs

示例2: GetNextInSeriesTitle2

 /// <summary>
 /// Search Shelfari for series info, scrape series page, and return next title in series.
 /// </summary>
 /// <param name="searchHtmlDoc">Book's Shelfari page, pre-downloaded</param>
 private string GetNextInSeriesTitle2(HtmlAgilityPack.HtmlDocument searchHtmlDoc)
 {
     bool hasSeries = false;
     string series = "";
     string seriesShort = "";
     string seriesURL = "";
     int currentSeriesIndex = 0;
     int currentSeriesCount = 0;
     string nextTitle = "";
     //Check if book's Shelfari page contains series info
     HtmlAgilityPack.HtmlNode node = searchHtmlDoc.DocumentNode.SelectSingleNode("//span[@class='series']");
     if (node != null)
     {
         //Series name and book number
         series = node.InnerText.Trim();
         //Convert book number string to integer
         Int32.TryParse(series.Substring(series.LastIndexOf(" ") + 1), out currentSeriesIndex);
         //Parse series Shelfari URL
         seriesURL = node.SelectSingleNode("//span[@class='series']/a[@href]")
             .GetAttributeValue("href", "");
         seriesShort = node.FirstChild.InnerText.Trim();
         //Add series name and book number to log, if found
         searchHtmlDoc.LoadHtml(HttpDownloader.GetPageHtml(String.Format(seriesURL)));
         //Parse number of books in series and convert to integer
         node = searchHtmlDoc.DocumentNode.SelectSingleNode("//h2[@class='f_m']");
         string test = node.FirstChild.InnerText.Trim();
         Match match = Regex.Match(test, @"\d+");
         if (match.Success)
             Int32.TryParse(match.Value, out currentSeriesCount);
         hasSeries = true;
         //Check if there is a next book
         if (currentSeriesIndex < currentSeriesCount)
         {
             //Add series name and book number to log, if found
             main.Log(String.Format("This is book {0} of {1} in the {2} Series...",
                 currentSeriesIndex, currentSeriesCount, seriesShort));
             foreach (HtmlAgilityPack.HtmlNode seriesItem in
                 searchHtmlDoc.DocumentNode.SelectNodes(".//ol/li"))
             {
                 node = seriesItem.SelectSingleNode(".//div/span[@class='series bold']");
                 if (node != null)
                     if (node.InnerText.Contains((currentSeriesIndex + 1).ToString()))
                     {
                         node = seriesItem.SelectSingleNode(".//h3/a");
                         //Parse title of the next book
                         nextTitle = node.InnerText.Trim();
                         //Add next book in series to log, if found
                         main.Log(String.Format("The next book in this series is {0}!", nextTitle));
                         return nextTitle;
                     }
             }
         }
         if (hasSeries)
         return "";
     }
     return "";
 }
开发者ID:cainstudios,项目名称:xray-builder.gui,代码行数:61,代码来源:EndActions.cs

示例3: get_doc

        private void get_doc(HtmlNodeCollection dorlist, HtmlAgilityPack.HtmlDocument dorducoment, int k, string hos_jx)
        {
            HtmlNode dor;
            HtmlNode ht1 = null;
            HtmlNode ht2 = null;
            HtmlNode ht3 = null;
            HtmlNodeCollection hts = null;

            string doc_key = "";
            string name = "";
            string dept = "";
            string hos = "";
            string skill = "";
            string word = "";
            string img = "";
            string zhicheng = "";

            string dordetailurl = "";
            string dordetailstring = "";
            try
            {

                dordetailurl = dorlist[k].SelectNodes("div")[0].
                                      SelectSingleNode("a").Attributes["href"].Value;//医生详细页面URL
                dordetailurl = dordetailurl.Replace('\t', ' ');
                dordetailurl = dordetailurl.Replace('\r', ' ');
                dordetailurl = dordetailurl.Replace('\n', ' ');
                dordetailurl = dordetailurl.Replace(" ", "");

                //http://www.guahao.com/expert/9a722630-0d6f-446f-9b4f-4bf89d54fee8000
                doc_key = dordetailurl.Substring(29);
                bool exist = false;
                doc_list.TryGetValue(doc_key, out exist);
                if (exist)
                {
                    return;
                }

                WebClient wc = new WebClient();
                wc.Encoding = Encoding.UTF8;
                int i = 0;
                while (i < 6)
                {
                    try
                    {
                        dordetailstring = wc.DownloadString(dordetailurl);
                    }
                    catch (System.Exception ex)
                    {

                    }
                    if (dordetailstring.Length > 99)
                    {
                        break;
                    }
                    Thread.Sleep(i * 1000);
                    i++;
                }

                if (dordetailstring.Length < 2)
                {
                    //MessageBox.Show("获取网页失败");
                }

                dorducoment.LoadHtml(dordetailstring);
                dor = dorducoment.DocumentNode;

                ht1 = dor.SelectSingleNode("//div[@class='detail word-break']");
                if (ht1 != null)
                {
                    name = ht1.SelectSingleNode("h1").SelectSingleNode("strong").InnerText;
                    HtmlNode tmp = ht1.SelectSingleNode("h1").SelectSingleNode("span");
                    if (tmp != null)
                    {
                        zhicheng = tmp.InnerText;
                    }
                }

                hts = dor.SelectNodes("/html/body/div/div/div/div/section/div/div/div/p/a[@target='_blank']");
                if (hts != null)
                {
                    if (hts.Count >= 2)
                    {
                        dept = hts[1].InnerText.Replace("\n", string.Empty).Replace("\r", string.Empty).Trim();
                        hos = hts[0].InnerText.Replace("\n", string.Empty).Replace("\r", string.Empty).Trim();
                    }
                    else
                    {
                        //MessageBox.Show("error");
                    }
                }
                else
                {
                    //MessageBox.Show("error");
                }

                ht1 = dor.SelectSingleNode("//div[@class='detail word-break']");
                ht2 = ht1.SelectSingleNode("//div[@class='goodat']");
                ht3 = ht2.SelectSingleNode("span");
                if (ht3 != null)
//.........这里部分代码省略.........
开发者ID:c-ber,项目名称:cber,代码行数:101,代码来源:Form1.cs


注:本文中的HtmlAgilityPack.LoadHtml方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。