本文整理汇总了C#中Filter.resetDictionary方法的典型用法代码示例。如果您正苦于以下问题:C# Filter.resetDictionary方法的具体用法?C# Filter.resetDictionary怎么用?C# Filter.resetDictionary使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Filter
的用法示例。
在下文中一共展示了Filter.resetDictionary方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。
示例1: Test2
public void Test2()
{
List<String> urls = new List<string>();
urls.Add("http://www.autonews.com/");
urls.Add("http://www.geonius.com/www/");
urls.Add("http://en.wikipedia.org/wiki/Main_Page");
urls.Add("http://www.computerworld.com/");
List<string> seeds = StorageSystem.StorageSystem.getInstance().getSeedList(taskId);
foreach (string seed in seeds)
{
urls.Add(seed);
}
List<Category> _categories;
Constraints _constraints;
_categories = StorageSystem.StorageSystem.getInstance().getCategories(taskId);
_constraints = StorageSystem.StorageSystem.getInstance().getRestrictions(taskId);
StorageSystem.StorageSystem.getInstance().getSeedList(taskId);
Filter filter = new Filter("http://", _constraints);
Categorizer categorizer = new Categorizer(_categories);
Ranker ranker = new Ranker(categorizer);
Extractor extractor = new Extractor();
HttpResourceFetcher httpfetcher = new HttpResourceFetcher();
foreach (String url in urls)
{
DateTime startTime = DateTime.Now;
ResourceContent resource = null;
if (httpfetcher.canFetch(url))
resource = httpfetcher.fetch(url, 10000, 100);
DateTime fetchEndTime = DateTime.Now;
if ((resource == null)||(resource.getResourceContent()==null))
continue;
/*** 0. fetching the link from the internet ***/
TimeSpan fetchingTime = fetchEndTime - startTime;
List<LinkItem> listOfLinks = new List<LinkItem>();
//extract all the links in page
listOfLinks = extractor.extractLinks(resource.getResourceUrl(), resource.getResourceContent());
RuntimeStatistics.addToExtractedUrls(listOfLinks.Count);
DateTime extEndTime = DateTime.Now;
/*** 1. Extracting the link from the request ***/
TimeSpan extRequest = extEndTime - fetchEndTime;
//reset the dictionary in filter that contains the urls from the same page
filter.resetDictionary();
int filteredUrlsCount = 0;
foreach (LinkItem item in listOfLinks)
{
//Filter the links and return only links that can be crawled
List<String> links = new List<String>();
links.Add(item.getLink());
List<String> filteredLinks = filter.filterLinks(links);
//If filteredLinks is not empty
if (filteredLinks.Count > 0)
{
filteredUrlsCount++;
Url url1 = new Url(filteredLinks[0], hashUrl(filteredLinks[0]), ranker.rankUrl(resource, item),
item.getDomainUrl(), hashUrl(item.getDomainUrl()));
deployLinksToFrontier(url1);
RuntimeStatistics.addToFeedUrls(1);
}
}
DateTime catStartTime = DateTime.Now;
/*** 2. Ranking and deployment to the frontier ***/
TimeSpan rankTotalRequest = catStartTime - extEndTime;
//Ascribe the url to all the categories it is belonged to.
List<Result> classifiedResults = categorizer.classifyContent(resource.getResourceContent(),
resource.getResourceUrl());
if (classifiedResults.Count != 0) RuntimeStatistics.addToCrawledUrls(1);
DateTime catEndTime = DateTime.Now;
/*** 3. Classification of the current request ***/
TimeSpan catTotalRequest = catEndTime - catStartTime;
foreach (Result classifiedResult in classifiedResults)
{
Result result = new Result("0", classifiedResult.getUrl(), classifiedResult.getCategoryID(),
resource.getRankOfUrl(), classifiedResult.getTrustMeter());
deployResourceToStorage(result);
}
DateTime endTime = DateTime.Now;
/*** 4. deployment to the database (result) ***/
TimeSpan deployRequest = endTime - catEndTime;
//.........这里部分代码省略.........