当前位置: 首页>>代码示例>>C#>>正文


C# Uri.GetBaseDomain方法代码示例

本文整理汇总了C#中System.Uri.GetBaseDomain方法的典型用法代码示例。如果您正苦于以下问题:C# Uri.GetBaseDomain方法的具体用法?C# Uri.GetBaseDomain怎么用?C# Uri.GetBaseDomain使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在System.Uri的用法示例。


在下文中一共展示了Uri.GetBaseDomain方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。

示例1: CreateLinkToCrawl

 public virtual LinkToCrawl CreateLinkToCrawl(CrawledPage page, Uri targetUri, int sessionId)
 {
     var link = new LinkToCrawl();
     link.SessionId = sessionId;
     // this was the link that was just crawled to produce the CrawledPage
     link.SourceUrl = page.Uri.AbsoluteUri;
     // this is the link parsed that must be scheduled
     link.TargetUrl = targetUri.AbsoluteUri;
     link.TargetBaseDomain = targetUri.GetBaseDomain();
     // creating a link from a crawled page, so it will not be the root
     link.IsRoot = false;
     link.IsInternal = string.Compare(page.Uri.GetBaseDomain(), targetUri.GetBaseDomain(), true) == 0;
     // increasing depth is also done in the default scheduler
     link.CrawlDepth = page.CrawlDepth + 1;
     return link;
 }
开发者ID:BgRva,项目名称:ThrongBot,代码行数:16,代码来源:ModelFactory.cs

示例2: IsPageToBeProcessed_Returns_True_If_Status_Is_Ok_And_Url_Is_Not_Blacklisted_Or_Processed

        public void IsPageToBeProcessed_Returns_True_If_Status_Is_Ok_And_Url_Is_Not_Blacklisted_Or_Processed()
        {
            //Arrange
            var mockProvider = new Mock<ILogicProvider>();
            var mockRepo = new Mock<IRepository>();
            var uri = new Uri("http://www.x.com");
            var code = HttpStatusCode.OK;

            #region Set expectations

            mockRepo.Setup(m => m.IsBlackListed(uri.GetBaseDomain()))
                    .Returns(false);

            mockRepo.Setup(m => m.IsPageProcessed("blah"))
                    .Returns(false);

            #endregion

            //Act
            var processor = new CrawlDaddy(mockProvider.Object, mockRepo.Object);
            var result = processor.IsPageToBeProcessed(uri, code);

            //Assert
            Assert.True(result);
        }
开发者ID:BgRva,项目名称:ThrongBot,代码行数:25,代码来源:CrawlDaddyFixture.cs

示例3: InitializeCrawler

 public bool InitializeCrawler(string seedUrl, int sessionId, int crawlerId)
 {
     SessionId = sessionId;
     CrawlerId = crawlerId;
     Seed = new Uri(seedUrl);
     BaseDomain = Seed.GetBaseDomain();
     return true;
 }
开发者ID:BgRva,项目名称:ThrongBot,代码行数:8,代码来源:FakeCrawlDaddy.cs

示例4: CrawledPage

        public void ProcessLink_Adds_Duplicate_To_List_Of_Links_To_Bypass 
            (string[] currentLinksToCrawl, string duplicateLink, string[] expectedLinksToBypass)
        {
            //Arrange
            var page = new CrawledPage(new Uri("http://www.z.com"));
         //   page.PageBag.SessionId = 3;
        //    page.PageBag.CrawlerId = 4;
            var inputLinks = new List<Uri>();
            page.ParsedLinks = inputLinks;

            var targetUri = new Uri(duplicateLink);

            var mockProvider = new Mock<ILogicProvider>();
            var mockFactory = new Mock<IModelFactory>();
            var processor = new ParsedLinksProcessor(mockProvider.Object);
            processor.LinksToByPass = new List<CrawledLink>();
            processor.MapOfLinksToCrawl = new Dictionary<string, LinkToCrawl>();

            foreach (var url in currentLinksToCrawl)
            {
                var uri = new Uri(url);
                processor.MapOfLinksToCrawl.Add(uri.AbsoluteUri, new LinkToCrawl(){TargetUrl = url, TargetBaseDomain = uri.GetBaseDomain()});
            }

            #region Set expectations

            mockFactory.Setup(m => m.CreateCrawledLink(It.IsAny<Uri>(), It.IsAny<Uri>(), It.IsAny<int>(), It.IsAny<int>()))
                        .Returns(new CrawledLink() { TargetUrl = duplicateLink })
                        .Verifiable();

            #endregion

            //Act
            processor.ProcessLink(page, mockFactory.Object, targetUri, 3, 4);
            var results = processor.LinksToByPass;

            //Assert
            Assert.NotNull(results);
            Assert.Equal(expectedLinksToBypass.Length, results.Count);
            Assert.Equal(expectedLinksToBypass[0], results[0].TargetUrl);
            mockFactory.Verify();
        }
开发者ID:BgRva,项目名称:ThrongBot,代码行数:42,代码来源:ParsedLinksProcessorFixture.cs

示例5: ProcessLink

        /// <summary>
        /// Processes the Uri specified by <paramref name="targetUri"/> as a potential link to be crawled,
        /// bypassed, or ignored.
        /// </summary>
        /// <param name="page">The CrawledPage from which the targetUri was parsed.</param>
        /// <param name="factory">An instance of IModelFactory</param>
        /// <param name="targetUri">The target Uri being processed</param>
        internal void ProcessLink(Abot.Poco.CrawledPage page, IModelFactory factory, Uri targetUri, int sessionId, int crawlerId)
        {
            CrawledLink bypassedLink = null;

            if (targetUri.Scheme == Uri.UriSchemeMailto)
            {
                // Mailto schema: bypass
                bypassedLink = factory.CreateCrawledLink(page.Uri, targetUri, sessionId, crawlerId);
                bypassedLink.IsRoot = false;
                bypassedLink.CrawlDepth = page.CrawlDepth + 1;
                bypassedLink.StatusCode = HttpStatusCode.OK;
                bypassedLink.Bypassed = true;
                LinksToByPass.Add(bypassedLink);
            }
            else if (string.Compare(page.Uri.AbsoluteUri, targetUri.AbsoluteUri) == 0)
            {
                // Exact self loops: bypass
                bypassedLink = factory.CreateCrawledLink(page.Uri, targetUri, sessionId, crawlerId);
                bypassedLink.IsRoot = false;
                bypassedLink.CrawlDepth = page.CrawlDepth + 1;
                bypassedLink.StatusCode = HttpStatusCode.OK;
                bypassedLink.Bypassed = true;
                LinksToByPass.Add(bypassedLink);
            }
            else if (MapOfLinksToCrawl.ContainsKey(targetUri.AbsoluteUri))
            {
                // Duplicates: bypass
                bypassedLink = factory.CreateCrawledLink(page.Uri, targetUri, sessionId, crawlerId);
                bypassedLink.IsRoot = false;
                bypassedLink.CrawlDepth = page.CrawlDepth + 1;
                bypassedLink.StatusCode = HttpStatusCode.OK;
                bypassedLink.Bypassed = true;
                LinksToByPass.Add(bypassedLink);
            }
            else
            {
                // process link to be crawled that was parsed from a crawled page, so
                // it will not be a root.
                var link = factory.CreateLinkToCrawl(page, targetUri, sessionId);
                MapOfLinksToCrawl.Add(targetUri.AbsoluteUri, link);

                if (string.Compare(page.Uri.GetBaseDomain(), targetUri.GetBaseDomain(), true) != 0)
                    ExternalLinksFound |= true;
            }
        }
开发者ID:BgRva,项目名称:ThrongBot,代码行数:52,代码来源:ParsedLinksProcessor.cs

示例6: IsPageToBeProcessed

        /// <summary>
        /// Returns true if the page at the url is to be processed.
        /// </summary>
        /// <returns>Bool</returns>
        public bool IsPageToBeProcessed(Uri uri, HttpStatusCode code)
        {
            bool processPage = false;

            processPage = code == System.Net.HttpStatusCode.OK;

            if (processPage)
            {
                processPage = !_repo.IsBlackListed(uri.GetBaseDomain());
                if (processPage)
                {
                    processPage = !_repo.IsPageProcessed(uri.AbsoluteUri);
                }
            }

            return processPage;
        }
开发者ID:BgRva,项目名称:ThrongBot,代码行数:21,代码来源:CrawlDaddy.cs

示例7: InitializeCrawler

        public bool InitializeCrawler(string seedUrl, int sessionId, int crawlerId, CrawlConfiguration config)
        {
            _config = config;

            //check if a crawl is already defined
            var existingRun = _repo.GetCrawl(sessionId, crawlerId);
            if (existingRun != null)
            {
                var mssg = string.Format("CrawlerRun exists with sessionId: {0} and crawlerId: {1}; cancelling run ...", sessionId, crawlerId);
                _logger.Error(mssg);
                return false;
            }
            Seed = new Uri(seedUrl);
            CrawlerDefinition = new CrawlerRun()
            {
                SessionId = sessionId,
                SeedUrl = Seed.AbsoluteUri,
                CrawlerId = crawlerId,
                BaseDomain = Seed.GetBaseDomain()
            };
            _repo.AddCrawl(CrawlerDefinition);
            _scheduler = new MyScheduler(new LogicProvider(), CrawlerDefinition, _repo);

            _crawler = new PoliteWebCrawler(_config, null, null, _scheduler, null, null, null, null, null);
            _crawler.CrawlBag.SessionId = CrawlerDefinition.SessionId;
            _crawler.CrawlBag.CrawlerId = CrawlerDefinition.CrawlerId;
            _crawler.ShouldScheduleLink(ShouldScheduleLink);
            _crawler.ShouldCrawlPage(ShouldCrawlPage);

            if (IsAsync)
            {
                _crawler.PageCrawlStartingAsync += crawler_ProcessPageCrawlStarting;
                _crawler.PageCrawlCompletedAsync += crawler_ProcessPageCrawlCompleted;
                _crawler.PageCrawlDisallowedAsync += crawler_PageCrawlDisallowed;
                _crawler.PageLinksCrawlDisallowedAsync += crawler_PageLinksCrawlDisallowed;
            }
            else
            {
                _crawler.PageCrawlStarting += crawler_ProcessPageCrawlStarting;
                _crawler.PageCrawlCompleted += crawler_ProcessPageCrawlCompleted;
                _crawler.PageCrawlDisallowed += crawler_PageCrawlDisallowed;
                _crawler.PageLinksCrawlDisallowed += crawler_PageLinksCrawlDisallowed;
            }

            return true;
        }
开发者ID:BgRva,项目名称:ThrongBot,代码行数:46,代码来源:CrawlDaddy.cs


注:本文中的System.Uri.GetBaseDomain方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。