本文整理汇总了Python中utils.CanadianLegislator.image方法的典型用法代码示例。如果您正苦于以下问题:Python CanadianLegislator.image方法的具体用法?Python CanadianLegislator.image怎么用?Python CanadianLegislator.image使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类utils.CanadianLegislator
的用法示例。
在下文中一共展示了CanadianLegislator.image方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_people
# 需要导入模块: from utils import CanadianLegislator [as 别名]
# 或者: from utils.CanadianLegislator import image [as 别名]
def get_people(self):
page = lxmlize(COUNCIL_PAGE)
councillors = page.xpath('//h1[@class="title"]')
for councillor in councillors:
if not ',' in councillor.text_content():
continue
name, district = councillor.text_content().split(',')
name = name.strip()
if 'Mayor' in district:
p = Legislator(name=name, post_id='Beaconsfield', role='Maire')
p.add_source(COUNCIL_PAGE)
p.image = councillor.xpath('./parent::div/parent::div/p//img/@src')[0]
phone = councillor.xpath('.//parent::div/following-sibling::div[contains(text(), "514")]/text()')[0]
phone = phone.split(':')[1].strip().replace(' ', '-')
p.add_contact('voice', phone, 'legislature')
script = councillor.xpath('.//parent::div/following-sibling::div/script')[0].text_content()
p.add_contact('email', get_email(script), None)
yield p
continue
district = district.split('-')[1].strip()
p = Legislator(name=name, post_id=district, role='Conseiller')
p.add_source(COUNCIL_PAGE)
p.image = councillor.xpath('./parent::div/parent::div/p//img/@src')[0]
phone = councillor.xpath('.//parent::div/following-sibling::p[contains(text(), "514")]/text()')
if phone:
phone = phone[0]
phone = phone.split(':')[1].strip().replace(' ', '-')
p.add_contact('voice', phone, 'legislature')
script = councillor.xpath('.//parent::div/following-sibling::p/script')[0].text_content()
p.add_contact('email', get_email(script), None)
yield p
示例2: get_people
# 需要导入模块: from utils import CanadianLegislator [as 别名]
# 或者: from utils.CanadianLegislator import image [as 别名]
def get_people(self):
page = lxmlize(COUNCIL_PAGE)
councillors = page.xpath('//div[@id="WebPartWPQ1"]/table/tbody/tr[1]')
for councillor in councillors:
node = councillor.xpath(".//td[1]//strong//strong//strong//strong") or councillor.xpath(".//td[1]//strong")
text = node[0].text_content()
name = text.strip().replace("Deputy ", "").replace("Warden ", "").replace("Mayor", "")
role = text.replace(name, "").strip()
if not role:
role = "Councillor"
if "," in name:
name = name.split(",")[0].strip()
district = councillor.xpath('.//td[1]//p[contains(text(),",")]/text()')[0].split(",")[1].strip()
district = re.sub(r"\A(?:City|Municipality|Town|Township|Village) of\b| Township\Z", "", district)
p = Legislator(name=name, post_id=district, role=role)
p.add_source(COUNCIL_PAGE)
p.image = councillor.xpath(".//td[1]//img/@src")[0]
info = councillor.xpath(".//td[2]")[0].text_content()
residential_info = re.findall(r"(?<=Residence:)(.*)(?=Municipal Office:)", info, flags=re.DOTALL)[0]
self.get_contacts(residential_info, "residence", p)
municipal_info = re.findall(r"(?<=Municipal Office:)(.*)", info, flags=re.DOTALL)[0]
self.get_contacts(municipal_info, "legislature", p)
yield p
示例3: scrape_mayor
# 需要导入模块: from utils import CanadianLegislator [as 别名]
# 或者: from utils.CanadianLegislator import image [as 别名]
def scrape_mayor(self, name, url):
page = lxmlize(url)
contact = page.xpath('//div[@id="secondary align_RightSideBar"]/blockquote/p/text()')
phone = contact[0]
fax = contact[1]
email = page.xpath('//div[@id="secondary align_RightSideBar"]/blockquote/p/a[contains(@href, "mailto:")]/text()')[0]
mayor_page = lxmlize('http://www.burlingtonmayor.com')
contact_url = mayor_page.xpath('//div[@class="menu"]//a[contains(text(),"Contact")]')[0].attrib['href']
mayor_page = lxmlize(contact_url)
address = mayor_page.xpath('//div[@class="entry-content"]//p[contains(text(),"City Hall")]')[0].text_content()
p = Legislator(name=name, post_id="Burlington", role='Mayor')
p.add_source(COUNCIL_PAGE)
p.add_source(url)
p.add_source('http://www.burlingtonmayor.com')
p.image = page.xpath('//div[@id="secondary align_RightSideBar"]/p/img/@src')[0]
p.add_contact('voice', phone, 'legislature')
p.add_contact('fax', fax, 'legislature')
p.add_contact('email', email, None)
p.add_contact('address', address, 'legislature')
return p
示例4: councillor_data
# 需要导入模块: from utils import CanadianLegislator [as 别名]
# 或者: from utils.CanadianLegislator import image [as 别名]
def councillor_data(url):
page = lxmlize(url)
name = page.xpath('string(//h1[@id="TitleOfPage"])')
district = page.xpath('string(//h2)')
# TODO: Councillor emails are built with JS to prevent scraping, but the JS can be scraped.
address = page.xpath('string(//div[@class="asideContent"])')
photo = page.xpath('string(//div[@id="contentright"]//img[1]/@src)')
phone = get_phone_data(page)
js = page.xpath('string(//span/script)')
email = email_js(js)
p = Legislator(name=name, post_id=district, role='Councillor')
p.add_source(COUNCIL_PAGE)
p.add_source(url)
p.add_contact('address', address, 'legislature')
p.add_contact('voice', phone, 'legislature')
p.add_contact('email', email, None)
p.image = photo
return p
示例5: mayor_data
# 需要导入模块: from utils import CanadianLegislator [as 别名]
# 或者: from utils.CanadianLegislator import image [as 别名]
def mayor_data(url):
page = lxmlize(url)
# Eliminate the word "Mayor" preceding the Mayor's name
name = page.xpath('string(//h1)')[6:]
p = Legislator(name=name, post_id='Waterloo', role='Mayor')
p.add_source(COUNCIL_PAGE)
p.add_source(url)
p.image = photo_url(page)
return p