本文整理汇总了Python中utils.CanadianLegislator.add_source方法的典型用法代码示例。如果您正苦于以下问题:Python CanadianLegislator.add_source方法的具体用法?Python CanadianLegislator.add_source怎么用?Python CanadianLegislator.add_source使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类utils.CanadianLegislator
的用法示例。
在下文中一共展示了CanadianLegislator.add_source方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_people
# 需要导入模块: from utils import CanadianLegislator [as 别名]
# 或者: from utils.CanadianLegislator import add_source [as 别名]
def get_people(self):
page = lxmlize(COUNCIL_PAGE)
mayor_info = page.xpath('//h2[contains(text(), "MAYOR")]//following-sibling::p')[0]
yield self.scrape_mayor(mayor_info)
wards = page.xpath('//h3')
for ward in wards:
district = re.sub('\AWARD \d+ - ', '', ward.text_content())
councillors = ward.xpath('following-sibling::p')
for councillor in councillors:
name = councillor.xpath('./strong')[0].text_content()
p = Legislator(name=name, post_id=district, role='Councillor')
p.add_source(COUNCIL_PAGE)
info = councillor.xpath('./text()')
address = info.pop(0)
p.add_contact('address', address, 'legislature')
# get phone numbers
for line in info:
stuff = re.split(ur'(\xbb)|(\xa0)', line)
tmp = [y for y in stuff if y and not re.match(ur'\xa0', y)]
self.get_tel_numbers(tmp, p)
email = councillor.xpath('string(./a)')
p.add_contact('email', email, None)
yield p
if councillor == councillors[1]:
break
示例2: get_people
# 需要导入模块: from utils import CanadianLegislator [as 别名]
# 或者: from utils.CanadianLegislator import add_source [as 别名]
def get_people(self):
# mayor first, can't find email
page = lxmlize(MAYOR_URL)
photo_url = page.xpath('string(//img/@src[contains(., "Maire")])')
name = page.xpath('string(//td[@class="contenu"]/text()[last()])')
p = Legislator(name=name, post_id=u"Trois-Rivières", role="Maire", image=photo_url)
p.add_source(MAYOR_URL)
yield p
resp = requests.get(COUNCIL_PAGE)
# page rendering through JS on the client
page_re = re.compile(r'createItemNiv3.+"District (.+?)".+(index.+)\\"')
for district, url_rel in page_re.findall(resp.text):
if district not in ("des Estacades", "des Plateaux", "des Terrasses", "du Sanctuaire"):
district = re.sub("\A(?:de(?: la)?|des|du) ", "", district)
url = urljoin(COUNCIL_PAGE, url_rel)
page = lxmlize(url)
name = page.xpath("string(//h2)")
email = page.xpath('string(//a/@href[contains(., "mailto:")])')[len("mailto:") :]
photo_url = page.xpath('string(//img/@src[contains(., "Conseiller")])')
p = Legislator(name=name, post_id=district, role="Conseiller", image=photo_url)
p.add_source(url)
p.add_contact("email", email, None)
yield p
示例3: councillor_data
# 需要导入模块: from utils import CanadianLegislator [as 别名]
# 或者: from utils.CanadianLegislator import add_source [as 别名]
def councillor_data(url):
page = lxmlize(url)
name = page.xpath('string(//h1[@id="TitleOfPage"])')
district = page.xpath('string(//h2)')
# TODO: Councillor emails are built with JS to prevent scraping, but the JS can be scraped.
address = page.xpath('string(//div[@class="asideContent"])')
photo = page.xpath('string(//div[@id="contentright"]//img[1]/@src)')
phone = get_phone_data(page)
js = page.xpath('string(//span/script)')
email = email_js(js)
p = Legislator(name=name, post_id=district, role='Councillor')
p.add_source(COUNCIL_PAGE)
p.add_source(url)
p.add_contact('address', address, 'legislature')
p.add_contact('voice', phone, 'legislature')
p.add_contact('email', email, None)
p.image = photo
return p
示例4: scrape_mayor
# 需要导入模块: from utils import CanadianLegislator [as 别名]
# 或者: from utils.CanadianLegislator import add_source [as 别名]
def scrape_mayor(self, div):
url = div.attrib['href']
page = lxmlize(url)
name = div.text_content().replace('Mayor ', '')
contact_url = page.xpath('//ul[@class="navSecondary"]//a[contains(text(),"Contact")]')[0].attrib['href']
page = lxmlize(contact_url)
contact_div = page.xpath('//div[@class="col"][2]')[0]
address = contact_div.xpath('.//p[1]')[0].text_content()
address = re.findall(r'(City of Greater .*)', address, flags=re.DOTALL)[0]
phone = contact_div.xpath('.//p[2]')[0].text_content()
phone = phone.replace('Phone: ', '')
fax = contact_div.xpath('.//p[3]')[0].text_content()
fax = fax.split(' ')[-1]
email = contact_div.xpath('//a[contains(@href, "mailto:")]')[0].text_content()
p = Legislator(name=name, post_id='Greater Sudbury', role='Mayor')
p.add_source(COUNCIL_PAGE)
p.add_source(contact_url)
p.add_contact('address', address, 'legislature')
p.add_contact('voice', phone, 'legislature')
p.add_contact('fax', fax, 'legislature')
p.add_contact('email', email, None)
return p