当前位置: 首页>>代码示例>>C#>>正文


C# HtmlAgilityPack.HtmlWeb类代码示例

本文整理汇总了C#中HtmlAgilityPack.HtmlWeb的典型用法代码示例。如果您正苦于以下问题:C# HtmlWeb类的具体用法?C# HtmlWeb怎么用?C# HtmlWeb使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


HtmlWeb类属于HtmlAgilityPack命名空间,在下文中一共展示了HtmlWeb类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。

示例1: datascraper

        public datascraper()
        {
            string url = @"http://www.bbc.co.uk/sport/football/results/partial/competition-118996114";
            HtmlWeb htmlWeb = new HtmlWeb();
            HtmlDocument doc = new HtmlDocument{ OptionUseIdAttribute = true };

            doc = htmlWeb.Load(url);
            HtmlNodeCollection mtchrslts = doc.DocumentNode.SelectNodes("//tr[@id]");

            string date;
            string ateam;
            string hteam;
            string score;
            string idmess;
            string idnum;
            string[] teamscores;
            string teamscoreh;
            string teamscorea;

            foreach (HtmlNode matchresult in mtchrslts)
            {
                idmess = matchresult.SelectSingleNode("//tr[@id]").Id;
                idnum = idmess.Replace("match-row-", "");
                score = matchresult.SelectSingleNode("//abbr[@title='Score']").InnerText;
                teamscores = score.Split('-');
                teamscoreh = teamscores[0];
                teamscorea = teamscores[1];
                hteam = matchresult.SelectSingleNode("//p[(@class='team-home teams')]").InnerText;
                ateam = matchresult.SelectSingleNode("//p[(@class='team-away teams')]").InnerText;
                date = matchresult.SelectSingleNode("//td[(@class='match-date')]").InnerText;
            }

            return;
        }
开发者ID:mrwebed,项目名称:take1-minimumpoints4safety,代码行数:34,代码来源:datascraperController.cs

示例2: GetText2

        public static void  GetText2()
        {
            List<string> outList = new List<string>();

            string html = "https://yandex.by/search/?numdoc=10&p=0&rdrnd=601861&text=kinogo.co%20Один%20дома%201990%20&lr=157";
            HtmlDocument HD = new HtmlDocument();
            var web = new HtmlWeb
            {
                AutoDetectEncoding = false,
                OverrideEncoding = Encoding.UTF8 //GetEncoding("windows-1251")
            };

            HD = web.Load(html);

            
            HtmlNodeCollection NoAltElements = HD.DocumentNode.SelectNodes("//div");
            
            ///допилить
            if (NoAltElements != null)
            {
                foreach(HtmlNode node in NoAltElements)
                {

                    string outputText = node.InnerHtml;
                    Console.WriteLine(outputText);
                }
            }
            else
                Console.WriteLine("found nothing");
        }
开发者ID:horbel,项目名称:FilmCatalog,代码行数:30,代码来源:Parser.cs

示例3: GetServersFromMap

        //query gametracker by map
        public static List<string> GetServersFromMap(List<string> list, string map)
        {
            HtmlWeb htmlWeb = new HtmlWeb();

            // Creates an HtmlDocument object from an URL
            HtmlAgilityPack.HtmlDocument document = htmlWeb.Load("http://www.gametracker.com/search/dota2/?search_by=map&query="+map.Trim()+"&searchipp=50");

            var query = from table in document.DocumentNode.SelectNodes("//table").Cast<HtmlNode>()
                        from row in table.SelectNodes("tr").Cast<HtmlNode>()
                        from cell in row.SelectNodes("td").Cast<HtmlNode>()
                        select new { Table = table.Id, CellText = cell.InnerText, CellClass = cell.Attributes };
            string rep = "";
            bool started = false;
            bool stopped = true;
            foreach (var cell in query)
            {

                if (cell.CellText.Contains("Rank&darr"))
                {
                    stopped = !stopped;
                    started = false;
                }
                if (started && !stopped)
                {
                    list.Add(cell.CellText.Trim());
                }

                if (cell.CellText.Contains("Server Map&nbsp;"))
                {
                    started = true;
                }

            }
            return list;
        }
开发者ID:Canardlaquay,项目名称:Dota2Mods-Client,代码行数:36,代码来源:Form1.cs

示例4: HentBilinformation

        public static Bilinformation HentBilinformation(string nummerplade)
        {
            try
            {
                Bilinformation bilinformation = new Bilinformation();

                string html = "http://www.nummerplade.net/soeg/?regnr=" + nummerplade;
                HtmlWeb web = new HtmlWeb();
                HtmlDocument page = web.Load(html);

                if (page.DocumentNode != null)
                {
                    bilinformation.Maerke = page.DocumentNode.SelectSingleNode("//td[@id='maerke']").InnerText;
                    bilinformation.Model = page.DocumentNode.SelectSingleNode("//td[@id='model']").InnerText;
                    bilinformation.Variant = page.DocumentNode.SelectSingleNode("//td[@id='variant']").InnerText;
                    bilinformation.Stelnummer = page.DocumentNode.SelectSingleNode("//td[@id='stelnr']").InnerText;
                    bilinformation.Aargang = page.DocumentNode.SelectSingleNode("//td[@id='model_aar']").InnerText;
                    bilinformation.Nummerplade = page.DocumentNode.SelectSingleNode("//td[@id='regnr']").InnerText;
                }

                return bilinformation;
            }
            catch (Exception ex)
            {
                throw new IngenBilinformationException("Der blev ikke fundet nogen bilinformation på nummerpladen.", ex);
            }
        }
开发者ID:aroesdal,项目名称:Hovedopgave,代码行数:27,代码来源:Bilinformation.cs

示例5: crawlingPhase

        public void crawlingPhase(CloudQueueMessage urlMessage)
        {
            totalUrls++;
            String url = urlMessage.AsString;
            if (!alreadyVisitedUrls.Contains(url))
            {
                alreadyVisitedUrls.Add(url);
                try
                {
                    HtmlWeb hw = new HtmlWeb();
                    HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                    doc = hw.Load(url);
                    String date = "";
                    if (doc.DocumentNode.SelectSingleNode("//head/meta[@property='og:pubdate']") != null)
                    {
                        String stringDate = doc.DocumentNode.SelectSingleNode("//head/meta[@property='og:pubdate']").GetAttributeValue("content", "default").Substring(0, 10);
                        date = DateTime.ParseExact(stringDate, "yyyy-MM-dd", CultureInfo.InvariantCulture).ToString("yyyy/MM/dd");
                    }
                    String fullTitle = doc.DocumentNode.SelectSingleNode("//head/title").InnerText;
                    String[] titles = fullTitle.Split(' ');
                    foreach (string partTitle in titles) {
                        if (!partTitle.Equals(" ") && !partTitle.Equals("-") && !partTitle.Equals("CNN.com") && !partTitle.Equals(""))
                        {
                            CrawlerEntry entry = new CrawlerEntry(url, fullTitle, date, partTitle);
                            TableOperation insertOperation = TableOperation.Insert(entry);
                            table.Execute(insertOperation);
                            tableSize++;
                        }
                    }

                    //get urls in page
                    foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//a[@href]"))
                    {
                        string href = link.GetAttributeValue("href", string.Empty);
                        HashSet<String> links = new HashSet<String>();
                        String[] hrefSplit = href.Split('/');
                        String html = hrefSplit[hrefSplit.Length - 1];
                        //if the href is not in the disallowed urls, is not already crawled, is not a duplicate link, is a valid html page, and on cnn or bleacherreport
                        if (!disallowedUrls.Any(s => href.Contains(s)) && !alreadyVisitedUrls.Any(s => s.Equals(href)) && !links.Contains(href) && rgx.IsMatch(html) && (href.Contains("cnn.com") || href.Contains("bleacherreport.com")))
                        {
                            //store remaining into queue
                            urlQueue.AddMessage(new CloudQueueMessage(href));

                            //adds link to current link set
                            links.Add(href);
                        }
                    }
                }
                catch
                {

                }
            }

            updateTotalUrls();
            //Update last 10 urls crawled
            updateLastUrl(urlMessage.AsString);

            urlQueue.DeleteMessage(urlMessage);
        }
开发者ID:kinderst,项目名称:Web-Service-Like-Google,代码行数:60,代码来源:Crawler.cs

示例6: GetFromTerra

        public static string GetFromTerra(string artist, string title)
        {
            string rep = string.Empty;

            artist = (artist + "").ToLowerInvariant();
            title = (title + "").ToLowerInvariant();

            //Obter a letra da música
            HtmlWeb web = new HtmlWeb();
            HtmlDocument doc = web.Load(string.Format("http://letras.mus.br/winamp.php?t={0}-{1}", HttpUtility.UrlEncode(artist, ISOEncoding), HttpUtility.UrlEncode(title, ISOEncoding)));
            HtmlNode node = doc.DocumentNode.SelectSingleNode("//div[@id='letra']/p");

            //Se encontrar a letra, retorna
            if (node == null && (artist.Contains("&") || title.Contains("&"))) {
                artist = artist.Replace('&', 'e');
                title = title.Replace('&', 'e');

                return GetFromTerra(artist, title);
            }

            node.InnerHtml = node.InnerHtml.Replace("<br>", "\r\n");

            rep = WebUtility.HtmlDecode(node.InnerText);

            return rep;
        }
开发者ID:mrmarino,项目名称:lyricsthingie,代码行数:26,代码来源:Providers.cs

示例7: getBibTex

        public string getBibTex(string url)
        {
            string res = "", temp = "";

            HtmlWeb web;
            HtmlDocument doc;
            HtmlNode n;

            if (url.Contains("viewdoc"))//e.g. http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.31.3487
            {
                web = new HtmlWeb();
                doc = web.Load(url);

                if (doc != null)
                    Console.WriteLine("Document Loaded!");
                else
                    Console.WriteLine("Load Error!");
                try
                {
                    if ((n = doc.DocumentNode.SelectSingleNode("//*[@id=\"bibtex\"]/p")) != null)
                    {
                        temp = n.InnerText;
                        temp = temp.Replace(",", ",\n").Replace("&nbsp;", " ");
                    }
                }
                catch (Exception e) { }
                res = temp;
                return res;
            }
            else//e.g. http://citeseer.ist.psu.edu/showciting?cid=2131272
                return res;
        }
开发者ID:patwaria,项目名称:pubcite,代码行数:32,代码来源:CSXParser.cs

示例8: AddGithubJobs

        private void AddGithubJobs(string url, List<JobListing> jobListings)
        {
            HtmlWeb page = new HtmlWeb();
            var document = page.Load(url);

            string baseURL = "https://jobs.github.com";

            try
            {
                HtmlNodeCollection rows = document.DocumentNode
                    .SelectSingleNode("//table[@class='positionlist']")
                    .SelectNodes(".//td[@class='title']");

                if (rows.Count > 0)
                {
                    foreach (HtmlNode row in rows)
                    {
                        if (row.ChildNodes.Count == 5)
                        {
                            jobListings.Add(new JobListing()
                            {
                                SearchEngine = SearchEngines.GitHub,
                                Title = row.ChildNodes[1].InnerText,
                                Company = row.ChildNodes[3].ChildNodes[1].InnerText,
                                URL = baseURL + row.ChildNodes[1].ChildNodes[0].Attributes[0].Value
                            });
                        }
                    }
                }
            }
            catch { }
        }
开发者ID:rushfive,项目名称:teklist,代码行数:32,代码来源:ListBuilder.cs

示例9: GetProjectIds

        /// <summary>
        /// Retrieves list of Houzz project IDs
        /// </summary>
        /// <param name="username">Houzz Username</param>
        /// <returns>List of project IDs</returns>
        public IEnumerable<int> GetProjectIds(string username)
        {
            var url = string.Format("http://www.houzz.com/projects/users/{0}", username);
            HtmlDocument htmlDoc = new HtmlWeb().Load(url);

            var nodes = htmlDoc.DocumentNode.SelectNodes("//div[@class='sidebar-body']//a");

            if (nodes != null && nodes.Any())
            {
                var links = nodes.Skip(1) // Skip "All Projects"
                                .Select(e => e.GetAttributeValue("href", null).ToString());

                var projects = new List<int>();

                foreach (var link in links)
                {
                    var splicedUrl = link.Split('/');
                    if (splicedUrl.Length >= 4)
                    {
                        projects.Add(Convert.ToInt32(splicedUrl[4]));
                    }
                }

                return projects;
            }

            return null;
        }
开发者ID:devfunkd,项目名称:Houzz,代码行数:33,代码来源:ProjectService.cs

示例10: GetTimetableForYear

        public List<TimetableItem> GetTimetableForYear(StudyYear year, HalfYear halfYear = HalfYear.None)
        {
            List<TimetableItem> timetable;

            string tempYear = Enum.GetName(typeof(StudyYear), year);
            string tempHalfYear = Enum.GetName(typeof(HalfYear), halfYear);
            if (tempHalfYear == "None") tempHalfYear = String.Empty;

            try
            {
                HtmlWeb hw = new HtmlWeb();
                HtmlDocument doc = hw.Load(String.Format("http://thor.info.uaic.ro/~orar/participanti/orar_{0}{1}.html", tempYear, halfYear));
                doc.DocumentNode.InnerHtml = doc.DocumentNode.InnerHtml.Replace("\r\n", "");

                timetable = ParseTable(doc, TimetableType.Year);
            }
            catch (WebException ex)
            {
                Logger.ExceptionLogger.Log(ex);
                timetable = null;
            }
            catch (NotSupportedException ex)
            {
                Logger.ExceptionLogger.Log(ex);
                timetable = null;
            }
            return timetable;
        }
开发者ID:alexkiro,项目名称:Timr,代码行数:28,代码来源:Parser.cs

示例11: addPageType

        private void addPageType(String pageUrl)
        {
            mCarTypeList.Clear();

            HtmlDocument htmlDocument = new HtmlWeb().Load(WebConstants.BASE_URL + pageUrl);
            HtmlNodeCollection typeNodes = htmlDocument.DocumentNode.SelectNodes(WebConstants.TYPE_NODE);
            if (typeNodes != null)
            {
                foreach (HtmlNode tempNode in typeNodes)
                {
                    HtmlNode typeNode = HtmlNode.CreateNode(tempNode.OuterHtml);
                    CarType carType = new CarType(mCarFactory);
                    HtmlNode nameNode = HtmlNode.CreateNode(typeNode.SelectSingleNode(WebConstants.TYPE_NAME).OuterHtml);
                    carType.Name = nameNode.SelectSingleNode(WebConstants.LINK_HREF).InnerText;
                    HtmlNode imageNode = HtmlNode.CreateNode(typeNode.SelectSingleNode(WebConstants.TYPE_IMAGE).OuterHtml);
                    carType.ImageUrl = imageNode.SelectSingleNode(WebConstants.IMAGE_SRC).Attributes[WebConstants.SRC].Value;
                    new Thread(new TypeImageDownloadTask(carType).Download).Start();

                    mCarTypeList.Add(carType);
                }
            }

            String priceUrl = pageUrl.Replace(WebConstants.PHOTO, WebConstants.PRICE);
            setPrice(priceUrl);
        }
开发者ID:SevanJoe,项目名称:CarImageDownloader,代码行数:25,代码来源:WebFactoryTask.cs

示例12: getNews

 public static IEnumerable<MangaData> getNews(Source source)
 {
     var web = new HtmlAgilityPack.HtmlWeb();
     web.AutoDetectEncoding = true;
     var htmlMainDoc = web.Load(@"http://www.mangahere.com/latest/");
     var itemsManga = htmlMainDoc.DocumentNode.SelectNodes(@"/html/body/section[@class='page_main']/div[@class='latest_released']/div[@class='manga_updates']/dl");
     for (int i = itemsManga.Count-1; i >+0; i--)
     {
         var itemManga = itemsManga[i];
         MangaData manga = new MangaData(source, true);
         var mangaNode = itemManga.SelectSingleNode(@"dt");
         var mangaName = mangaNode.SelectSingleNode("a").InnerHtml;
         var mangaDetailLink = mangaNode.SelectSingleNode("a").GetAttributeValue("href", "");
         manga.DetailMangaSource = source.CreateDetailMangaSource(manga, mangaDetailLink);
         manga.Name = mangaName;
         foreach (var itemChapter in itemManga.SelectNodes("dd"))
         {
             ChapterData chapter = new ChapterData();
             //var matches = Regex.Matches(itemChapter.SelectSingleNode("a").InnerText, @"\d+");
             //var chapterName = matches[matches.Count - 1].Value;
             var chapterName = itemChapter.SelectSingleNode("a").InnerText;
             var chapterLink = itemChapter.SelectSingleNode("a").GetAttributeValue("href", "");
             chapter.Name = chapterName;
             chapter.ChapterSource = source.CreateChapterSource(chapterLink);
             manga.ChaptersData.Add(chapter);
         }
         yield return manga;
     }
 }
开发者ID:guipasmoi,项目名称:MangaTracker,代码行数:29,代码来源:MangaHere.cs

示例13: getChapters

        public static ChapterData getChapters(Source source, string link)
        {
            ChapterData chapter = new ChapterData();
            var web = new HtmlAgilityPack.HtmlWeb();
            web.AutoDetectEncoding = true;
            var htmlpage1 = web.Load(link);
            var pages = new List<IObservable<HtmlDocument>>();
            pages.Add(Observable.Return(htmlpage1));
            var linksToPages = htmlpage1.DocumentNode.SelectNodes(@"/html/body/section[@class='readpage_top']/div[@class='go_page clearfix']/span[@class='right']/select[@class='wid60']/option");
            for (int i = 1; i < linksToPages.Count; i++)
            {
                var linkToPage=linksToPages[i].GetAttributeValue("value", "");
                pages.Add(Observable.Start<HtmlDocument>(
                    ()=>{
                        var web2 = new HtmlAgilityPack.HtmlWeb();
                        web.AutoDetectEncoding = true;
                        return htmlpage1 = web.Load(linkToPage);
                    }
                ));

            }
            foreach (IObservable<HtmlDocument> item in pages)
            {
                HtmlDocument pagehtml = item.Wait();
                chapter.Images.Add(pagehtml.DocumentNode.SelectSingleNode(@"/html/body/section[@id='viewer']/a/img[@id='image']/@src").GetAttributeValue("src","")) ;

            }
            return chapter;
        }
开发者ID:guipasmoi,项目名称:MangaTracker,代码行数:29,代码来源:MangaHere.cs

示例14: GetFaceBookLikes

        /// <summary>
        /// WebCrawl facebook to get likes from ordbogen.com page
        /// </summary>
        /// <returns>int</returns>
        public int GetFaceBookLikes()
        {
            int numOfLikes = 0;
            string searchStart = "omBeskedDelMere";
            string searchEnd = " ";
            try
            {
                HtmlDocument doc = new HtmlWeb().Load("https://m.facebook.com/ordbogen");

                if (doc != null)
                {
                    var divNodes = doc.DocumentNode.SelectNodes("//div");
                    foreach (var div in divNodes)
                    {
                        if (div.InnerText.Contains("personer synes godt om dette"))
                        {
                            int start = div.InnerText.IndexOf(searchStart, 0) + searchStart.Length;
                            int end = div.InnerText.IndexOf(searchEnd, start);
                            string number = div.InnerText.Substring(start, end - start);
                            int.TryParse(number, out numOfLikes);
                            return numOfLikes;
                        }
                    }
                    return -1;
                }
                else
                {
                    return -1;
                }
            }
            catch (Exception)
            {
                return -3;
            }
        }
开发者ID:Ravnii,项目名称:ATISv2,代码行数:39,代码来源:SocialMediaStatistics.cs

示例15: _GetSerieMiniatureUrl

 protected override string _GetSerieMiniatureUrl(Serie serie)
 {
     var web = new HtmlWeb();
     var doc = web.Load(serie.URL);
     var img = doc.DocumentNode.SelectSingleNode("//div[@id='series_info']/div[@class='cover']/img");
     return img.GetAttributeValue("src", "");
 }
开发者ID:tundy,项目名称:MangaCrawler,代码行数:7,代码来源:MangaFoxCrawler.cs


注:本文中的HtmlAgilityPack.HtmlWeb类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。