当前位置: 首页>>代码示例>>C#>>正文


C# HtmlDocument.DetectEncodingHtml方法代码示例

本文整理汇总了C#中HtmlAgilityPack.HtmlDocument.DetectEncodingHtml方法的典型用法代码示例。如果您正苦于以下问题:C# HtmlDocument.DetectEncodingHtml方法的具体用法?C# HtmlDocument.DetectEncodingHtml怎么用?C# HtmlDocument.DetectEncodingHtml使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在HtmlAgilityPack.HtmlDocument的用法示例。


在下文中一共展示了HtmlDocument.DetectEncodingHtml方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。

示例1: GetReadings

        public static string GetReadings(DateTime dateTime)
        {
            string date = "";
            date += dateTime.ToString("MM:dd:yy");
            date = date.Replace(":", "");

            WebClient w = new WebClient();
            string s = w.DownloadString("http://www.usccb.org/bible/readings/" + date + ".cfm");

            var htmldoc = new HtmlDocument();
            var encoding =  htmldoc.DetectEncodingHtml(s);

            htmldoc.LoadHtml(s);

            if (htmldoc.ParseErrors != null && htmldoc.ParseErrors.Any())
            {
                //handle errors
            }

                if (htmldoc.DocumentNode != null)
                {
                    HtmlNode bodyNode = htmldoc.DocumentNode.SelectSingleNode("//div[@id='CS_Element_maincontent']");
                    string returnString = bodyNode.WriteTo();

                    returnString = returnString.Replace("“", "\"");
                    returnString = returnString.Replace("”", "\"");
                    return returnString;
                }

            return null;
        }
开发者ID:j-hayes,项目名称:Blog,代码行数:31,代码来源:DailyReadingsScraper.cs

示例2: Retrive

 public List<BindData> Retrive(string Data)
 {
     List<BindData> list = new List<BindData>();
     try
     {
     XDocument doc = XDocument.Parse(Data);
         foreach (XElement ele in doc.Descendants("item"))
         {
             BindData d = new BindData();
             d.Tag = ele.Element("link").Value;
             d.Content = ele.Element("title").Value;
             string destocheck = ele.Element("description").Value;
             HtmlDocument HTdoc = new HtmlDocument();
             HTdoc.LoadHtml(destocheck);
             HTdoc.DetectEncodingHtml(destocheck);
             d.Description = HttpUtility.HtmlDecode(HTdoc.DocumentNode.InnerText);
             list.Add(d);
         }
         return list;
     }
     catch (Exception c)
     {
         list=null;
         return list;
     }
 }
开发者ID:jsandip57,项目名称:PinUrRss,代码行数:26,代码来源:XML.cs

示例3: GetLunchRestaurantDocumentForUrl

        /// <summary>
        /// Attempts to fetch and load a HtmlDocument for a given URL.
        /// Also determines the MIME-type for the stream and computes a hash if needed.
        /// </summary>
        /// <param name="url">URL to be loaded.</param>
        /// <param name="timeout">Timeout for HttpWebRequest in seconds.</param>
        public static LunchRestaurantDocument GetLunchRestaurantDocumentForUrl(string url, int timeout)
        {
            var document = new LunchRestaurantDocument { URL = url };
            var htmlDoc = new HtmlDocument();
            var allowedmimetypes = new[] { "text/html", "text/xml" };

            const int buffsize = 1024;

            try
            {
                var request = (HttpWebRequest)WebRequest.Create(GetUri(url));
                request.Timeout = timeout * 1000;
                using (var response = (HttpWebResponse)request.GetResponse())
                {
                    var headerEncoding = TryGetEncoding(response.ContentEncoding) ??
                                         TryGetEncoding(response.CharacterSet) ??
                                         Encoding.UTF8;

                    var buf = new byte[buffsize];
                    var ms = new MemoryStream();
                    var responseStream = response.GetResponseStream();
                    if (responseStream == null)
                    {
                        return null;
                    }
                    var count = responseStream.Read(buf, 0, buffsize);

                    document.MimeType = MimeDetector.DetermineMIMEType(buf);

                    if (Array.Exists(allowedmimetypes, mimetype => mimetype.Equals(document.MimeType)))
                    {
                        do
                            ms.Write(buf, 0, count);
                        while ((count = responseStream.Read(buf, 0, buffsize)) != 0);

                        var bytes = ms.GetBuffer();

                        var docEncoding = htmlDoc.DetectEncodingHtml(headerEncoding.GetString(bytes));
                        var convertedBytes = Encoding.Convert(docEncoding ?? headerEncoding, Encoding.Unicode, bytes);
                        var convertedData = Encoding.Unicode.GetString(convertedBytes);

                        htmlDoc.LoadHtml(convertedData);
                    }
                    else
                    {
                        _logger.Info("Discarded invalid mimetype '{0}' for URL: {1}", document.MimeType, url);
                    }
                }
            }
            catch
            {
                return null;
            }

            if (htmlDoc.ParseErrors != null && htmlDoc.ParseErrors.Count() > 0)
            {
                // TODO: handle any parse errors
            }

            if (htmlDoc.DocumentNode != null)
            {
                document.HtmlDocument = htmlDoc;

                // let's also compute a hash for the document
                document.Hash = ComputeHashForDocument(htmlDoc, url);
            }

            return document;
        }
开发者ID:mikkoj,项目名称:LunchCrawler,代码行数:75,代码来源:Utils.Html.cs


注:本文中的HtmlAgilityPack.HtmlDocument.DetectEncodingHtml方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。