本文整理汇总了Python中crawler.Crawler方法的典型用法代码示例。如果您正苦于以下问题:Python crawler.Crawler方法的具体用法?Python crawler.Crawler怎么用?Python crawler.Crawler使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类crawler
的用法示例。
在下文中一共展示了crawler.Crawler方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: DoCrawler
# 需要导入模块: import crawler [as 别名]
# 或者: from crawler import Crawler [as 别名]
def DoCrawler(message):
print "DO CRAWLER MESSAGE : "+message
import crawler
Jconf = json.loads(message)
RunnerID = Jconf["RunnerID"]
RunnerList = Jconf["RunnerList"]
JobID = Jconf["JobID"]
JobOwner = Jconf["JobOwner"]
client.JobDict[JobID] = Jconf
Cclass = crawler.Crawler(JobID, RunnerID, RunnerList, JobOwner)
Cclass.Run()
示例2: scan_web
# 需要导入模块: import crawler [as 别名]
# 或者: from crawler import Crawler [as 别名]
def scan_web(self, url):
'''
'''
w = Crawler()
req_list = w.crawl(url)
for item in req_list:
print item
self.scan_request(item)
示例3: test_load_crawler
# 需要导入模块: import crawler [as 别名]
# 或者: from crawler import Crawler [as 别名]
def test_load_crawler():
ini = Ini('files/config.ini')
crawler = Crawler(ini)
assert crawler
report = crawler.scan('http://wikitjerrta4qgz4.onion')
assert type(report) == DynamicObject
assert report.webpage.url == 'http://wikitjerrta4qgz4.onion'
assert report.webpage.domain == 'wikitjerrta4qgz4.onion'
del crawler
示例4: test_sortcrawl_sd_dir
# 需要导入模块: import crawler [as 别名]
# 或者: from crawler import Crawler [as 别名]
def test_sortcrawl_sd_dir(self):
with Sorter(db_handler=self.db_handler) as sortbot9k:
sortbot9k.scrape_directories(self.sd_directory)
sortbot9k.sort_onions(self.class_tests)
uptodate_class, uptodate_name = \
self.db_handler.get_onion_class(self.get_cur_runtime(), True)
self.assertEqual(type(uptodate_class), dict)
# At least 10 of our instances should be on the latest version
self.assertGreaterEqual(len(uptodate_class), 10)
self.assertRegex(list(uptodate_class)[0], "http")
self.assertRegex(list(uptodate_class)[0], ".onion")
outofdate_class, outofdate_name = \
self.db_handler.get_onion_class(self.get_cur_runtime(), False)
self.assertEqual(type(outofdate_class), dict)
# At least 1 of our instances will be lagging behind versions :'(
self.assertGreaterEqual(len(outofdate_class), 1)
self.assertRegex(list(outofdate_class)[0], "http")
self.assertRegex(list(outofdate_class)[0], ".onion")
class_data = self.db_handler.get_onions(self.get_cur_runtime())
nonmonitored_name, monitored_name = class_data.keys()
# Test that we get the expected class names, and data types back
self.assertEqual(nonmonitored_name, 'nonmonitored')
self.assertRegex(monitored_name, 'sd')
nonmonitored_class, monitored_class = class_data.values()
self.assertEqual(type(nonmonitored_class), dict)
self.assertEqual(type(monitored_class), dict)
with Crawler(db_handler=self.db_handler) as crawlbot9k:
crawlbot9k.collect_set_of_traces(nonmonitored_class)
# There are not yet methods to query crawled data, but in the future,
# tests will be added here to verify Crawler-related data is being
# read/written to the database in the expected manner.
示例5: test_crawl_of_bad_sites
# 需要导入模块: import crawler [as 别名]
# 或者: from crawler import Crawler [as 别名]
def test_crawl_of_bad_sites(self):
with Crawler(restart_on_sketchy_exception=True) as crawler:
crawler.collect_set_of_traces(self.bad_sites)