本文整理汇总了Python中scrapy.spider.Spider方法的典型用法代码示例。如果您正苦于以下问题:Python spider.Spider方法的具体用法?Python spider.Spider怎么用?Python spider.Spider使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.spider
的用法示例。
在下文中一共展示了spider.Spider方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse_page
# 需要导入模块: from scrapy import spider [as 别名]
# 或者: from scrapy.spider import Spider [as 别名]
def parse_page(self, response):
sel = Selector(response)
emails = sel.re('(\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,6})')
emails = list(filter(lambda x: x != 'press@productionhub.com', emails))
if bool(emails):
for email in emails:
if email + "\n" not in email_in_file and email not in current_session_emails:
file.write(email+'\n')
current_session_emails.append(email)
print 'Spider: ProductionHub. Email {0} added to file'.format(email)
else:
print 'Spider: ProductionHub. Email {0} already in the file'.format(email)
示例2: parse_page
# 需要导入模块: from scrapy import spider [as 别名]
# 或者: from scrapy.spider import Spider [as 别名]
def parse_page(self, response):
sel = Selector(response)
email = sel.re('(\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,6})')
if bool(email):
email = email[0]
if email + "\n" not in email_in_file and email not in added_email:
file.write(email+'\n')
added_email.append(email)
print "Spider: Mandy. Email {0} added to file".format(email)
else:
print "Spider: Mandy. Email {0} already in the file".format(email)
示例3: parse
# 需要导入模块: from scrapy import spider [as 别名]
# 或者: from scrapy.spider import Spider [as 别名]
def parse(self, response):
sel = Selector(response)
for num_div in xrange(1, 31):
date = sel.xpath('//*[@id="mainContent"]/div[{0}]/span/text()'.format(str(num_div))).re('(\d{1,2}\/\d{1,2}\/\d{4})')[0]
email = sel.xpath('//*[@id="mainContent"]/div[{0}]/div/text()'.format(str(num_div))).re('(\w+@[a-zA-Z0-9_]+?\.[a-zA-Z]{2,6})')
if current_date == date:
for address in email:
if address + "\n" not in email_in_file and address not in email_current_session:
file_output.write(address + "\n")
email_current_session.append(address)
print "Spider: NewenglandFilm. Email {0} added to file".format(address)
else:
print "Spider: NewenglandFilm. Email {0} already in the file".format(address)
示例4: parse_page
# 需要导入模块: from scrapy import spider [as 别名]
# 或者: from scrapy.spider import Spider [as 别名]
def parse_page(self, response):
sel = Selector(response)
email = sel.xpath('//div/ul//input/@value').extract()
if bool(email):
email = email[0]
if email + "\n" not in emails_in_file and email not in emails_current_session:
file_out.write("{}\n".format(email))
emails_current_session.append(email)
print "Spider: CraigList. Email {0} added to file".format(email)
else:
print "Spider: CraigList. Email {0} already in the file".format(email)