本文整理汇总了C#中HtmlAgilityPack.HtmlWeb类的典型用法代码示例。如果您正苦于以下问题:C# HtmlWeb类的具体用法?C# HtmlWeb怎么用?C# HtmlWeb使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
HtmlWeb类属于HtmlAgilityPack命名空间,在下文中一共展示了HtmlWeb类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。
示例1: datascraper
public datascraper()
{
string url = @"http://www.bbc.co.uk/sport/football/results/partial/competition-118996114";
HtmlWeb htmlWeb = new HtmlWeb();
HtmlDocument doc = new HtmlDocument{ OptionUseIdAttribute = true };
doc = htmlWeb.Load(url);
HtmlNodeCollection mtchrslts = doc.DocumentNode.SelectNodes("//tr[@id]");
string date;
string ateam;
string hteam;
string score;
string idmess;
string idnum;
string[] teamscores;
string teamscoreh;
string teamscorea;
foreach (HtmlNode matchresult in mtchrslts)
{
idmess = matchresult.SelectSingleNode("//tr[@id]").Id;
idnum = idmess.Replace("match-row-", "");
score = matchresult.SelectSingleNode("//abbr[@title='Score']").InnerText;
teamscores = score.Split('-');
teamscoreh = teamscores[0];
teamscorea = teamscores[1];
hteam = matchresult.SelectSingleNode("//p[(@class='team-home teams')]").InnerText;
ateam = matchresult.SelectSingleNode("//p[(@class='team-away teams')]").InnerText;
date = matchresult.SelectSingleNode("//td[(@class='match-date')]").InnerText;
}
return;
}
示例2: GetText2
public static void GetText2()
{
List<string> outList = new List<string>();
string html = "https://yandex.by/search/?numdoc=10&p=0&rdrnd=601861&text=kinogo.co%20Один%20дома%201990%20&lr=157";
HtmlDocument HD = new HtmlDocument();
var web = new HtmlWeb
{
AutoDetectEncoding = false,
OverrideEncoding = Encoding.UTF8 //GetEncoding("windows-1251")
};
HD = web.Load(html);
HtmlNodeCollection NoAltElements = HD.DocumentNode.SelectNodes("//div");
///допилить
if (NoAltElements != null)
{
foreach(HtmlNode node in NoAltElements)
{
string outputText = node.InnerHtml;
Console.WriteLine(outputText);
}
}
else
Console.WriteLine("found nothing");
}
示例3: GetServersFromMap
//query gametracker by map
public static List<string> GetServersFromMap(List<string> list, string map)
{
HtmlWeb htmlWeb = new HtmlWeb();
// Creates an HtmlDocument object from an URL
HtmlAgilityPack.HtmlDocument document = htmlWeb.Load("http://www.gametracker.com/search/dota2/?search_by=map&query="+map.Trim()+"&searchipp=50");
var query = from table in document.DocumentNode.SelectNodes("//table").Cast<HtmlNode>()
from row in table.SelectNodes("tr").Cast<HtmlNode>()
from cell in row.SelectNodes("td").Cast<HtmlNode>()
select new { Table = table.Id, CellText = cell.InnerText, CellClass = cell.Attributes };
string rep = "";
bool started = false;
bool stopped = true;
foreach (var cell in query)
{
if (cell.CellText.Contains("Rank&darr"))
{
stopped = !stopped;
started = false;
}
if (started && !stopped)
{
list.Add(cell.CellText.Trim());
}
if (cell.CellText.Contains("Server Map "))
{
started = true;
}
}
return list;
}
示例4: HentBilinformation
public static Bilinformation HentBilinformation(string nummerplade)
{
try
{
Bilinformation bilinformation = new Bilinformation();
string html = "http://www.nummerplade.net/soeg/?regnr=" + nummerplade;
HtmlWeb web = new HtmlWeb();
HtmlDocument page = web.Load(html);
if (page.DocumentNode != null)
{
bilinformation.Maerke = page.DocumentNode.SelectSingleNode("//td[@id='maerke']").InnerText;
bilinformation.Model = page.DocumentNode.SelectSingleNode("//td[@id='model']").InnerText;
bilinformation.Variant = page.DocumentNode.SelectSingleNode("//td[@id='variant']").InnerText;
bilinformation.Stelnummer = page.DocumentNode.SelectSingleNode("//td[@id='stelnr']").InnerText;
bilinformation.Aargang = page.DocumentNode.SelectSingleNode("//td[@id='model_aar']").InnerText;
bilinformation.Nummerplade = page.DocumentNode.SelectSingleNode("//td[@id='regnr']").InnerText;
}
return bilinformation;
}
catch (Exception ex)
{
throw new IngenBilinformationException("Der blev ikke fundet nogen bilinformation på nummerpladen.", ex);
}
}
示例5: crawlingPhase
public void crawlingPhase(CloudQueueMessage urlMessage)
{
totalUrls++;
String url = urlMessage.AsString;
if (!alreadyVisitedUrls.Contains(url))
{
alreadyVisitedUrls.Add(url);
try
{
HtmlWeb hw = new HtmlWeb();
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc = hw.Load(url);
String date = "";
if (doc.DocumentNode.SelectSingleNode("//head/meta[@property='og:pubdate']") != null)
{
String stringDate = doc.DocumentNode.SelectSingleNode("//head/meta[@property='og:pubdate']").GetAttributeValue("content", "default").Substring(0, 10);
date = DateTime.ParseExact(stringDate, "yyyy-MM-dd", CultureInfo.InvariantCulture).ToString("yyyy/MM/dd");
}
String fullTitle = doc.DocumentNode.SelectSingleNode("//head/title").InnerText;
String[] titles = fullTitle.Split(' ');
foreach (string partTitle in titles) {
if (!partTitle.Equals(" ") && !partTitle.Equals("-") && !partTitle.Equals("CNN.com") && !partTitle.Equals(""))
{
CrawlerEntry entry = new CrawlerEntry(url, fullTitle, date, partTitle);
TableOperation insertOperation = TableOperation.Insert(entry);
table.Execute(insertOperation);
tableSize++;
}
}
//get urls in page
foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//a[@href]"))
{
string href = link.GetAttributeValue("href", string.Empty);
HashSet<String> links = new HashSet<String>();
String[] hrefSplit = href.Split('/');
String html = hrefSplit[hrefSplit.Length - 1];
//if the href is not in the disallowed urls, is not already crawled, is not a duplicate link, is a valid html page, and on cnn or bleacherreport
if (!disallowedUrls.Any(s => href.Contains(s)) && !alreadyVisitedUrls.Any(s => s.Equals(href)) && !links.Contains(href) && rgx.IsMatch(html) && (href.Contains("cnn.com") || href.Contains("bleacherreport.com")))
{
//store remaining into queue
urlQueue.AddMessage(new CloudQueueMessage(href));
//adds link to current link set
links.Add(href);
}
}
}
catch
{
}
}
updateTotalUrls();
//Update last 10 urls crawled
updateLastUrl(urlMessage.AsString);
urlQueue.DeleteMessage(urlMessage);
}
示例6: GetFromTerra
public static string GetFromTerra(string artist, string title)
{
string rep = string.Empty;
artist = (artist + "").ToLowerInvariant();
title = (title + "").ToLowerInvariant();
//Obter a letra da música
HtmlWeb web = new HtmlWeb();
HtmlDocument doc = web.Load(string.Format("http://letras.mus.br/winamp.php?t={0}-{1}", HttpUtility.UrlEncode(artist, ISOEncoding), HttpUtility.UrlEncode(title, ISOEncoding)));
HtmlNode node = doc.DocumentNode.SelectSingleNode("//div[@id='letra']/p");
//Se encontrar a letra, retorna
if (node == null && (artist.Contains("&") || title.Contains("&"))) {
artist = artist.Replace('&', 'e');
title = title.Replace('&', 'e');
return GetFromTerra(artist, title);
}
node.InnerHtml = node.InnerHtml.Replace("<br>", "\r\n");
rep = WebUtility.HtmlDecode(node.InnerText);
return rep;
}
示例7: getBibTex
public string getBibTex(string url)
{
string res = "", temp = "";
HtmlWeb web;
HtmlDocument doc;
HtmlNode n;
if (url.Contains("viewdoc"))//e.g. http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.31.3487
{
web = new HtmlWeb();
doc = web.Load(url);
if (doc != null)
Console.WriteLine("Document Loaded!");
else
Console.WriteLine("Load Error!");
try
{
if ((n = doc.DocumentNode.SelectSingleNode("//*[@id=\"bibtex\"]/p")) != null)
{
temp = n.InnerText;
temp = temp.Replace(",", ",\n").Replace(" ", " ");
}
}
catch (Exception e) { }
res = temp;
return res;
}
else//e.g. http://citeseer.ist.psu.edu/showciting?cid=2131272
return res;
}
示例8: AddGithubJobs
private void AddGithubJobs(string url, List<JobListing> jobListings)
{
HtmlWeb page = new HtmlWeb();
var document = page.Load(url);
string baseURL = "https://jobs.github.com";
try
{
HtmlNodeCollection rows = document.DocumentNode
.SelectSingleNode("//table[@class='positionlist']")
.SelectNodes(".//td[@class='title']");
if (rows.Count > 0)
{
foreach (HtmlNode row in rows)
{
if (row.ChildNodes.Count == 5)
{
jobListings.Add(new JobListing()
{
SearchEngine = SearchEngines.GitHub,
Title = row.ChildNodes[1].InnerText,
Company = row.ChildNodes[3].ChildNodes[1].InnerText,
URL = baseURL + row.ChildNodes[1].ChildNodes[0].Attributes[0].Value
});
}
}
}
}
catch { }
}
示例9: GetProjectIds
/// <summary>
/// Retrieves list of Houzz project IDs
/// </summary>
/// <param name="username">Houzz Username</param>
/// <returns>List of project IDs</returns>
public IEnumerable<int> GetProjectIds(string username)
{
var url = string.Format("http://www.houzz.com/projects/users/{0}", username);
HtmlDocument htmlDoc = new HtmlWeb().Load(url);
var nodes = htmlDoc.DocumentNode.SelectNodes("//div[@class='sidebar-body']//a");
if (nodes != null && nodes.Any())
{
var links = nodes.Skip(1) // Skip "All Projects"
.Select(e => e.GetAttributeValue("href", null).ToString());
var projects = new List<int>();
foreach (var link in links)
{
var splicedUrl = link.Split('/');
if (splicedUrl.Length >= 4)
{
projects.Add(Convert.ToInt32(splicedUrl[4]));
}
}
return projects;
}
return null;
}
示例10: GetTimetableForYear
public List<TimetableItem> GetTimetableForYear(StudyYear year, HalfYear halfYear = HalfYear.None)
{
List<TimetableItem> timetable;
string tempYear = Enum.GetName(typeof(StudyYear), year);
string tempHalfYear = Enum.GetName(typeof(HalfYear), halfYear);
if (tempHalfYear == "None") tempHalfYear = String.Empty;
try
{
HtmlWeb hw = new HtmlWeb();
HtmlDocument doc = hw.Load(String.Format("http://thor.info.uaic.ro/~orar/participanti/orar_{0}{1}.html", tempYear, halfYear));
doc.DocumentNode.InnerHtml = doc.DocumentNode.InnerHtml.Replace("\r\n", "");
timetable = ParseTable(doc, TimetableType.Year);
}
catch (WebException ex)
{
Logger.ExceptionLogger.Log(ex);
timetable = null;
}
catch (NotSupportedException ex)
{
Logger.ExceptionLogger.Log(ex);
timetable = null;
}
return timetable;
}
示例11: addPageType
private void addPageType(String pageUrl)
{
mCarTypeList.Clear();
HtmlDocument htmlDocument = new HtmlWeb().Load(WebConstants.BASE_URL + pageUrl);
HtmlNodeCollection typeNodes = htmlDocument.DocumentNode.SelectNodes(WebConstants.TYPE_NODE);
if (typeNodes != null)
{
foreach (HtmlNode tempNode in typeNodes)
{
HtmlNode typeNode = HtmlNode.CreateNode(tempNode.OuterHtml);
CarType carType = new CarType(mCarFactory);
HtmlNode nameNode = HtmlNode.CreateNode(typeNode.SelectSingleNode(WebConstants.TYPE_NAME).OuterHtml);
carType.Name = nameNode.SelectSingleNode(WebConstants.LINK_HREF).InnerText;
HtmlNode imageNode = HtmlNode.CreateNode(typeNode.SelectSingleNode(WebConstants.TYPE_IMAGE).OuterHtml);
carType.ImageUrl = imageNode.SelectSingleNode(WebConstants.IMAGE_SRC).Attributes[WebConstants.SRC].Value;
new Thread(new TypeImageDownloadTask(carType).Download).Start();
mCarTypeList.Add(carType);
}
}
String priceUrl = pageUrl.Replace(WebConstants.PHOTO, WebConstants.PRICE);
setPrice(priceUrl);
}
示例12: getNews
public static IEnumerable<MangaData> getNews(Source source)
{
var web = new HtmlAgilityPack.HtmlWeb();
web.AutoDetectEncoding = true;
var htmlMainDoc = web.Load(@"http://www.mangahere.com/latest/");
var itemsManga = htmlMainDoc.DocumentNode.SelectNodes(@"/html/body/section[@class='page_main']/div[@class='latest_released']/div[@class='manga_updates']/dl");
for (int i = itemsManga.Count-1; i >+0; i--)
{
var itemManga = itemsManga[i];
MangaData manga = new MangaData(source, true);
var mangaNode = itemManga.SelectSingleNode(@"dt");
var mangaName = mangaNode.SelectSingleNode("a").InnerHtml;
var mangaDetailLink = mangaNode.SelectSingleNode("a").GetAttributeValue("href", "");
manga.DetailMangaSource = source.CreateDetailMangaSource(manga, mangaDetailLink);
manga.Name = mangaName;
foreach (var itemChapter in itemManga.SelectNodes("dd"))
{
ChapterData chapter = new ChapterData();
//var matches = Regex.Matches(itemChapter.SelectSingleNode("a").InnerText, @"\d+");
//var chapterName = matches[matches.Count - 1].Value;
var chapterName = itemChapter.SelectSingleNode("a").InnerText;
var chapterLink = itemChapter.SelectSingleNode("a").GetAttributeValue("href", "");
chapter.Name = chapterName;
chapter.ChapterSource = source.CreateChapterSource(chapterLink);
manga.ChaptersData.Add(chapter);
}
yield return manga;
}
}
示例13: getChapters
public static ChapterData getChapters(Source source, string link)
{
ChapterData chapter = new ChapterData();
var web = new HtmlAgilityPack.HtmlWeb();
web.AutoDetectEncoding = true;
var htmlpage1 = web.Load(link);
var pages = new List<IObservable<HtmlDocument>>();
pages.Add(Observable.Return(htmlpage1));
var linksToPages = htmlpage1.DocumentNode.SelectNodes(@"/html/body/section[@class='readpage_top']/div[@class='go_page clearfix']/span[@class='right']/select[@class='wid60']/option");
for (int i = 1; i < linksToPages.Count; i++)
{
var linkToPage=linksToPages[i].GetAttributeValue("value", "");
pages.Add(Observable.Start<HtmlDocument>(
()=>{
var web2 = new HtmlAgilityPack.HtmlWeb();
web.AutoDetectEncoding = true;
return htmlpage1 = web.Load(linkToPage);
}
));
}
foreach (IObservable<HtmlDocument> item in pages)
{
HtmlDocument pagehtml = item.Wait();
chapter.Images.Add(pagehtml.DocumentNode.SelectSingleNode(@"/html/body/section[@id='viewer']/a/img[@id='image']/@src").GetAttributeValue("src","")) ;
}
return chapter;
}
示例14: GetFaceBookLikes
/// <summary>
/// WebCrawl facebook to get likes from ordbogen.com page
/// </summary>
/// <returns>int</returns>
public int GetFaceBookLikes()
{
int numOfLikes = 0;
string searchStart = "omBeskedDelMere";
string searchEnd = " ";
try
{
HtmlDocument doc = new HtmlWeb().Load("https://m.facebook.com/ordbogen");
if (doc != null)
{
var divNodes = doc.DocumentNode.SelectNodes("//div");
foreach (var div in divNodes)
{
if (div.InnerText.Contains("personer synes godt om dette"))
{
int start = div.InnerText.IndexOf(searchStart, 0) + searchStart.Length;
int end = div.InnerText.IndexOf(searchEnd, start);
string number = div.InnerText.Substring(start, end - start);
int.TryParse(number, out numOfLikes);
return numOfLikes;
}
}
return -1;
}
else
{
return -1;
}
}
catch (Exception)
{
return -3;
}
}
示例15: _GetSerieMiniatureUrl
protected override string _GetSerieMiniatureUrl(Serie serie)
{
var web = new HtmlWeb();
var doc = web.Load(serie.URL);
var img = doc.DocumentNode.SelectSingleNode("//div[@id='series_info']/div[@class='cover']/img");
return img.GetAttributeValue("src", "");
}