本文整理汇总了C#中System.Uri.GetBaseDomain方法的典型用法代码示例。如果您正苦于以下问题:C# Uri.GetBaseDomain方法的具体用法?C# Uri.GetBaseDomain怎么用?C# Uri.GetBaseDomain使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类System.Uri
的用法示例。
在下文中一共展示了Uri.GetBaseDomain方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。
示例1: CreateLinkToCrawl
public virtual LinkToCrawl CreateLinkToCrawl(CrawledPage page, Uri targetUri, int sessionId)
{
var link = new LinkToCrawl();
link.SessionId = sessionId;
// this was the link that was just crawled to produce the CrawledPage
link.SourceUrl = page.Uri.AbsoluteUri;
// this is the link parsed that must be scheduled
link.TargetUrl = targetUri.AbsoluteUri;
link.TargetBaseDomain = targetUri.GetBaseDomain();
// creating a link from a crawled page, so it will not be the root
link.IsRoot = false;
link.IsInternal = string.Compare(page.Uri.GetBaseDomain(), targetUri.GetBaseDomain(), true) == 0;
// increasing depth is also done in the default scheduler
link.CrawlDepth = page.CrawlDepth + 1;
return link;
}
示例2: IsPageToBeProcessed_Returns_True_If_Status_Is_Ok_And_Url_Is_Not_Blacklisted_Or_Processed
public void IsPageToBeProcessed_Returns_True_If_Status_Is_Ok_And_Url_Is_Not_Blacklisted_Or_Processed()
{
//Arrange
var mockProvider = new Mock<ILogicProvider>();
var mockRepo = new Mock<IRepository>();
var uri = new Uri("http://www.x.com");
var code = HttpStatusCode.OK;
#region Set expectations
mockRepo.Setup(m => m.IsBlackListed(uri.GetBaseDomain()))
.Returns(false);
mockRepo.Setup(m => m.IsPageProcessed("blah"))
.Returns(false);
#endregion
//Act
var processor = new CrawlDaddy(mockProvider.Object, mockRepo.Object);
var result = processor.IsPageToBeProcessed(uri, code);
//Assert
Assert.True(result);
}
示例3: InitializeCrawler
public bool InitializeCrawler(string seedUrl, int sessionId, int crawlerId)
{
SessionId = sessionId;
CrawlerId = crawlerId;
Seed = new Uri(seedUrl);
BaseDomain = Seed.GetBaseDomain();
return true;
}
示例4: CrawledPage
public void ProcessLink_Adds_Duplicate_To_List_Of_Links_To_Bypass
(string[] currentLinksToCrawl, string duplicateLink, string[] expectedLinksToBypass)
{
//Arrange
var page = new CrawledPage(new Uri("http://www.z.com"));
// page.PageBag.SessionId = 3;
// page.PageBag.CrawlerId = 4;
var inputLinks = new List<Uri>();
page.ParsedLinks = inputLinks;
var targetUri = new Uri(duplicateLink);
var mockProvider = new Mock<ILogicProvider>();
var mockFactory = new Mock<IModelFactory>();
var processor = new ParsedLinksProcessor(mockProvider.Object);
processor.LinksToByPass = new List<CrawledLink>();
processor.MapOfLinksToCrawl = new Dictionary<string, LinkToCrawl>();
foreach (var url in currentLinksToCrawl)
{
var uri = new Uri(url);
processor.MapOfLinksToCrawl.Add(uri.AbsoluteUri, new LinkToCrawl(){TargetUrl = url, TargetBaseDomain = uri.GetBaseDomain()});
}
#region Set expectations
mockFactory.Setup(m => m.CreateCrawledLink(It.IsAny<Uri>(), It.IsAny<Uri>(), It.IsAny<int>(), It.IsAny<int>()))
.Returns(new CrawledLink() { TargetUrl = duplicateLink })
.Verifiable();
#endregion
//Act
processor.ProcessLink(page, mockFactory.Object, targetUri, 3, 4);
var results = processor.LinksToByPass;
//Assert
Assert.NotNull(results);
Assert.Equal(expectedLinksToBypass.Length, results.Count);
Assert.Equal(expectedLinksToBypass[0], results[0].TargetUrl);
mockFactory.Verify();
}
示例5: ProcessLink
/// <summary>
/// Processes the Uri specified by <paramref name="targetUri"/> as a potential link to be crawled,
/// bypassed, or ignored.
/// </summary>
/// <param name="page">The CrawledPage from which the targetUri was parsed.</param>
/// <param name="factory">An instance of IModelFactory</param>
/// <param name="targetUri">The target Uri being processed</param>
internal void ProcessLink(Abot.Poco.CrawledPage page, IModelFactory factory, Uri targetUri, int sessionId, int crawlerId)
{
CrawledLink bypassedLink = null;
if (targetUri.Scheme == Uri.UriSchemeMailto)
{
// Mailto schema: bypass
bypassedLink = factory.CreateCrawledLink(page.Uri, targetUri, sessionId, crawlerId);
bypassedLink.IsRoot = false;
bypassedLink.CrawlDepth = page.CrawlDepth + 1;
bypassedLink.StatusCode = HttpStatusCode.OK;
bypassedLink.Bypassed = true;
LinksToByPass.Add(bypassedLink);
}
else if (string.Compare(page.Uri.AbsoluteUri, targetUri.AbsoluteUri) == 0)
{
// Exact self loops: bypass
bypassedLink = factory.CreateCrawledLink(page.Uri, targetUri, sessionId, crawlerId);
bypassedLink.IsRoot = false;
bypassedLink.CrawlDepth = page.CrawlDepth + 1;
bypassedLink.StatusCode = HttpStatusCode.OK;
bypassedLink.Bypassed = true;
LinksToByPass.Add(bypassedLink);
}
else if (MapOfLinksToCrawl.ContainsKey(targetUri.AbsoluteUri))
{
// Duplicates: bypass
bypassedLink = factory.CreateCrawledLink(page.Uri, targetUri, sessionId, crawlerId);
bypassedLink.IsRoot = false;
bypassedLink.CrawlDepth = page.CrawlDepth + 1;
bypassedLink.StatusCode = HttpStatusCode.OK;
bypassedLink.Bypassed = true;
LinksToByPass.Add(bypassedLink);
}
else
{
// process link to be crawled that was parsed from a crawled page, so
// it will not be a root.
var link = factory.CreateLinkToCrawl(page, targetUri, sessionId);
MapOfLinksToCrawl.Add(targetUri.AbsoluteUri, link);
if (string.Compare(page.Uri.GetBaseDomain(), targetUri.GetBaseDomain(), true) != 0)
ExternalLinksFound |= true;
}
}
示例6: IsPageToBeProcessed
/// <summary>
/// Returns true if the page at the url is to be processed.
/// </summary>
/// <returns>Bool</returns>
public bool IsPageToBeProcessed(Uri uri, HttpStatusCode code)
{
bool processPage = false;
processPage = code == System.Net.HttpStatusCode.OK;
if (processPage)
{
processPage = !_repo.IsBlackListed(uri.GetBaseDomain());
if (processPage)
{
processPage = !_repo.IsPageProcessed(uri.AbsoluteUri);
}
}
return processPage;
}
示例7: InitializeCrawler
public bool InitializeCrawler(string seedUrl, int sessionId, int crawlerId, CrawlConfiguration config)
{
_config = config;
//check if a crawl is already defined
var existingRun = _repo.GetCrawl(sessionId, crawlerId);
if (existingRun != null)
{
var mssg = string.Format("CrawlerRun exists with sessionId: {0} and crawlerId: {1}; cancelling run ...", sessionId, crawlerId);
_logger.Error(mssg);
return false;
}
Seed = new Uri(seedUrl);
CrawlerDefinition = new CrawlerRun()
{
SessionId = sessionId,
SeedUrl = Seed.AbsoluteUri,
CrawlerId = crawlerId,
BaseDomain = Seed.GetBaseDomain()
};
_repo.AddCrawl(CrawlerDefinition);
_scheduler = new MyScheduler(new LogicProvider(), CrawlerDefinition, _repo);
_crawler = new PoliteWebCrawler(_config, null, null, _scheduler, null, null, null, null, null);
_crawler.CrawlBag.SessionId = CrawlerDefinition.SessionId;
_crawler.CrawlBag.CrawlerId = CrawlerDefinition.CrawlerId;
_crawler.ShouldScheduleLink(ShouldScheduleLink);
_crawler.ShouldCrawlPage(ShouldCrawlPage);
if (IsAsync)
{
_crawler.PageCrawlStartingAsync += crawler_ProcessPageCrawlStarting;
_crawler.PageCrawlCompletedAsync += crawler_ProcessPageCrawlCompleted;
_crawler.PageCrawlDisallowedAsync += crawler_PageCrawlDisallowed;
_crawler.PageLinksCrawlDisallowedAsync += crawler_PageLinksCrawlDisallowed;
}
else
{
_crawler.PageCrawlStarting += crawler_ProcessPageCrawlStarting;
_crawler.PageCrawlCompleted += crawler_ProcessPageCrawlCompleted;
_crawler.PageCrawlDisallowed += crawler_PageCrawlDisallowed;
_crawler.PageLinksCrawlDisallowed += crawler_PageLinksCrawlDisallowed;
}
return true;
}