本文整理汇总了Python中scrapy.spiders.CrawlSpider类的典型用法代码示例。如果您正苦于以下问题:Python CrawlSpider类的具体用法?Python CrawlSpider怎么用?Python CrawlSpider使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了CrawlSpider类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
def __init__(self):
# 汎用データ辞書/リスト
self.generalData_dict = dict()
self.generalData_list = list()
self.setup_hooks() # フックセットアップ
self.setup_domains() # ドメイン名セットアップ
# クロールスパイダーを初期化(最後にするのが肝)
CrawlSpider.__init__(self)
示例2: __init__
def __init__(self):
self.domain = "www.gsmarena.com"
self.name = "gsmarena"
self.custom_settings = {}
self.allowed_domains = ["www.gsmarena.com"]
CrawlSpider.__init__(self)
self.start_urls = ["http://www.gsmarena.com/","http://www.gsmarena.com/makers.php3"]
self.count = 0
self.deny = ""
self.crawl_limt = 0
self.real_count = 0
示例3: __init__
def __init__(self):
self.domain = "www.gsmarena.com"
self.name = "gsmarena"
self.custom_settings = {}
self.allowed_domains = ["www.gsmarena.com"]
CrawlSpider.__init__(self)
self.start_urls = ["http://www.gsmarena.com/makers.php3",
"http://www.gsmarena.com/acer-phones-59.php",
"http://www.gsmarena.com/alcatel-phones-5.php"]
self.count = 0
self.deny = ""
self.crawl_limt = 0
self.real_count = 0
self.batch_size = 300
self.mobile_product = []
示例4: __init__
def __init__(self, rule, worksheet, logging):
CrawlSpider.__init__(self)
# use any browser you wish
self.browser = webdriver.Firefox()
self.logging = logging
self.rule = rule
self.name = self.rule["ranking_name"]
self.logging.info("==============================")
self.logging.info("self.rule[start_urls]: %s" % self.rule["start_urls"])
self.start_urls = self.rule["start_urls"]
# slef.next_page is a defined array.
self.next_page = self.rule["next_page"] \
if ("next_page" in self.rule) else ["NONE"]
self.logging.info("#### self.next_page %s" % self.next_page)
self.flag = self.rule["flag"] \
if ("flag" in self.rule) else ["NONE"]
self.logging.info("#### self.flag %s" % self.flag)
self.worksheet = worksheet
self.logging.info("Finish the __init__ method ... ")
示例5: __init__
def __init__(self, **kwargs):
'''
:param kwargs:
Read user arguments and initialize variables
'''
CrawlSpider.__init__(self)
self.outDir = kwargs['outDir']
self.startYear = kwargs['startYear']
self.endYear = kwargs['endYear']
print('startYear: ', self.startYear)
print('self.endYear: ', self.endYear)
print('self.outDir: ', self.outDir)
self.headers = ({'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'X-Requested-With': 'XMLHttpRequest'})
self.payload = {'username': '[user name for The Globe and Mail]', 'password': '[password for The Globe and Mail]'}
self.apikey = '[API Key for Gigya]'
self.categoryID = 'Production'
示例6: __init__
def __init__(self):
CrawlSpider.__init__(self)
#create database
try :
dbfile = '%s/%s' % (conf.PROJECT_PATH['data'], conf.SQLITE['file'])
if os.path.exists(dbfile):
moveto = '%s.%d' % (dbfile, int(time.time()))
shutil.move(dbfile, moveto)
print 'old db file %s is moved to %s.' % (dbfile, moveto)
conn = sqlite3.connect(dbfile)
cursor = conn.cursor()
for table in conf.SQLITE['tables']:
cursor.execute(table['sql'])
conn.commit()
print 'db initialization complete!'
finally:
conn.close()
示例7: __init__
def __init__(self):
CrawlSpider.__init__(self)
self.verificationErrors=[]
# self.selenium=selenium('localhost',4444,"*chrome")
self.driver=webdriver.Firefox()
示例8: __init__
def __init__(self, *arg, **karg):
self.name = karg['name']
self.init_yaml('scrapy_service/templates/product.yaml',self.name)
CrawlSpider.__init__(self, *arg)
示例9: parse
def parse(self, response):
for res in CrawlSpider.parse(self, response):
yield self.get_request(res.url)
示例10: __init__
def __init__(self, *args, **kwargs):
CrawlSpider.__init__(self, *args, **kwargs)
self.proxy_pool = proxy_list
示例11: _requests_to_follow
def _requests_to_follow(self, response):
if getattr(response, "encoding", None) != None:
return CrawlSpider._requests_to_follow(self, response)
else:
return []
示例12: __init__
def __init__(self, *arg, **karg):
self.init_yaml('scrapy_service/templates/product.yaml','lazada_sitemap')
CrawlSpider.__init__(self, *arg)
示例13: __init__
def __init__(self):
CrawlSpider.__init__(self)
self.verificationErrors = []
self.selenium = selenium("localhost", 4444, "*chrome", "http://www.domain.com")
self.selenium.start()
示例14: __init__
def __init__(self, *args, **kwargs):
CrawlSpider.__init__(self, *args, **kwargs)
示例15: set_crawler
def set_crawler(self, crawler):
print 'call set_crawler'
CrawlSpider.set_crawler(self, crawler)