本文整理汇总了C#中BaseLib.GlobusHttpHelper.GetTextDataByTagAndAttributeName方法的典型用法代码示例。如果您正苦于以下问题:C# GlobusHttpHelper.GetTextDataByTagAndAttributeName方法的具体用法?C# GlobusHttpHelper.GetTextDataByTagAndAttributeName怎么用?C# GlobusHttpHelper.GetTextDataByTagAndAttributeName使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类BaseLib.GlobusHttpHelper
的用法示例。
在下文中一共展示了GlobusHttpHelper.GetTextDataByTagAndAttributeName方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。
示例1: CrawlingLinkedInPage
//.........这里部分代码省略.........
{
int startindex3 = item.IndexOf("startdate_my");
string start3 = item.Substring(startindex3).Replace("startdate_my", "");
int endindex3 = start3.IndexOf(",");
SessionStart = start3.Substring(0, endindex3).Replace("\\u002d", string.Empty).Replace(":", string.Empty).Replace("\"", string.Empty);
}
catch { }
if (SessionStart == string.Empty && SessionEnd == string.Empty)
{
Education = " [" + School + "] Degree: " + Degree;
}
else
{
Education = " [" + School + "] Degree: " + Degree + " Session: " + SessionStart + "-" + SessionEnd;
}
//University = item.Substring(item.IndexOf(":"), (item.IndexOf(",", item.IndexOf(":")) - item.IndexOf(":"))).Replace(":", string.Empty).Replace("\\u002d", string.Empty).Replace("\"", string.Empty).Replace(",", string.Empty).Replace(":", string.Empty).Trim();
}
catch { }
EducationList.Add(Education);
}
}
else
{
str_UniversityName = Regex.Split(stringSource, "<div class=\"education");
foreach (string tempItem in str_UniversityName)
{
try
{
if (!tempItem.Contains("<!DOCTYPE html>"))
{
List<string> lstSchool = HttpHelper.GetTextDataByTagAndAttributeName(tempItem, "h4", "summary fn org");
List<string> lstDegree = HttpHelper.GetTextDataByTagAndAttributeName(tempItem, "span", "degree");
List<string> lstSession = HttpHelper.GetTextDataByTagAndAttributeName(tempItem, "span", "education-date");
if (lstSession.Count == 0)
{
Education = " [" + lstSchool[0] + "] Degree: " + lstDegree[0];
}
else
{
Education = " [" + lstSchool[0] + "] Degree: " + lstDegree[0] + " Session: " + lstSession[0].Replace("–", "-").Replace(",", "").Trim();
}
EducationList.Add(Education);
}
}
catch { }
}
}
}
catch { }
}
EducationList = EducationList.Distinct().ToList();
foreach (string item in EducationList)
{
if (string.IsNullOrEmpty(EducationCollection))
{
EducationCollection = item.Replace("}", "").Replace("]", "").Replace("&", "&");
}
else
示例2: CrawlingLinkedInPageRecruiter
public bool CrawlingLinkedInPageRecruiter(string Url, ref GlobusHttpHelper HttpHelper)
{
bool isscraped = false;
string Jobtitle = string.Empty;
string Location = string.Empty;
string PersonUrlLink = string.Empty;
string FirstName = string.Empty;
string LastName = string.Empty;
string specialites = string.Empty;
string Website = string.Empty;
string Industry = string.Empty;
string ProfileUrl = string.Empty;
string strFamilyName = string.Empty;
string careerDetails = string.Empty;
try
{
//Url = "https://www.linkedin.com/jobs2/view/38612041?trk=vsrp_jobs_res_name&trkInfo=VSRPsearchId%3A82134271427382117065%2CVSRPtargetId%3A38612041%2CVSRPcmpt%3Aprimary";
string pagesource = HttpHelper.getHtmlfromUrl(new Uri(Url));
if (!pagesource.Contains("Contact the job poster"))
{
Log("[ " + DateTime.Now + " ] => [ No Data Found For Url "+Url+" ] ");
return false; ;
}
Jobtitle = Utils.getBetween(pagesource, "itemprop=\"title\">", "</h1>").Replace(",",";").Replace("&","&");
Location = Utils.getBetween(pagesource, "itemprop=\"description\">", "</span>").Replace(",", ";").Replace("&", "&");
if (string.IsNullOrEmpty(Location))
{
Location = Utils.getBetween(pagesource, "location\":", ",").Replace(",", ";").Replace("&", "&");
}
careerDetails = Utils.getBetween(pagesource, "companyPageNameLink\":", ",").Replace("\"", "").Replace(",", ";").Replace("&", "&").Replace("careers?", "home?");
string subPagedetails = HttpHelper.getHtmlfromUrl(new Uri(careerDetails));
List<string> websiteAddress = HttpHelper.GetTextDataByTagAndAttributeName(subPagedetails, "li", "website");
if (websiteAddress.Count > 0)
{
Website = websiteAddress[0].Replace("Website", "").Replace(",", ";").Replace("&", "&");
}
List<string> specialtiesAddress = HttpHelper.GetTextDataByTagAndAttributeName(subPagedetails, "div", "specialties");
if (specialtiesAddress.Count > 0)
{
specialites = specialtiesAddress[0].Replace("specialties", "").Replace(",", ";").Replace("&", "&");
}
List<string> lstIndustry = HttpHelper.GetTextDataByTagAndAttributeName(subPagedetails, "li", "industry");
if (lstIndustry.Count > 0)
{
Industry = lstIndustry[0].Replace("Industry", "").Replace(",", ";").Replace("&", "&");
}
string tempPagesource = Utils.getBetween(pagesource, "<div class=\"poster\"", "</div>");
ProfileUrl = Utils.getBetween(tempPagesource, "<a href=", ">").Replace("\"", "").Trim();
if (!string.IsNullOrEmpty(ProfileUrl))
{
string pagesourceProfildetails = HttpHelper.getHtmlfromUrl(new Uri(ProfileUrl));
#region Name
try
{
try
{
try
{
int StartIndex = pagesourceProfildetails.IndexOf("<title>");
string Start = pagesourceProfildetails.Substring(StartIndex).Replace("<title>", string.Empty);
int EndIndex = Start.IndexOf("| LinkedIn</title>");
string End = Start.Substring(0, EndIndex).Replace(":", string.Empty).Replace("'", string.Empty).Replace(",", string.Empty).Trim();
strFamilyName = End.Trim();
}
catch
{ }
}
catch
{
try
{
strFamilyName = pagesourceProfildetails.Substring(pagesourceProfildetails.IndexOf("fmt__full_name\":"), (pagesourceProfildetails.IndexOf(",", pagesourceProfildetails.IndexOf("fmt__full_name\":")) - pagesourceProfildetails.IndexOf("fmt__full_name\":"))).Replace("fmt__full_name\":", string.Empty).Replace("\\", string.Empty).Replace("\"", string.Empty).Replace(",", string.Empty).Trim();
}
catch { }
}
if (string.IsNullOrEmpty(strFamilyName))
{
try
{
strFamilyName = pagesourceProfildetails.Substring(pagesourceProfildetails.IndexOf("<span class=\"full-name\">"), (pagesourceProfildetails.IndexOf("</span><span></span></span></h1></div></div><div id=\"headline-container\" data-li-template=\"headline\">", pagesourceProfildetails.IndexOf("</span><span></span></span></h1></div></div><div id=\"headline-container\" data-li-template=\"headline\">")) - pagesourceProfildetails.IndexOf("<span class=\"full-name\">"))).Replace("<span class=\"full-name\">", string.Empty).Replace("\\", string.Empty).Replace("\"", string.Empty).Replace(",", string.Empty).Trim();
}
catch
{ }
}
if (string.IsNullOrEmpty(strFamilyName))
{
try
{
int StartIndex = pagesourceProfildetails.IndexOf("<span class=\"full-name\">");
string Start = pagesourceProfildetails.Substring(StartIndex).Replace("<span class=\"full-name\">", string.Empty);
int EndIndex = Start.IndexOf("</span>");
string End = Start.Substring(0, EndIndex).Replace("</span>", string.Empty);
strFamilyName = End.Trim();
//.........这里部分代码省略.........