本文整理汇总了Python中pupa.scrape.Person.extras['district_name']方法的典型用法代码示例。如果您正苦于以下问题:Python Person.extras['district_name']方法的具体用法?Python Person.extras['district_name']怎么用?Python Person.extras['district_name']使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pupa.scrape.Person
的用法示例。
在下文中一共展示了Person.extras['district_name']方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: scrape_reps
# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import extras['district_name'] [as 别名]
def scrape_reps(self):
url = 'http://www.maine.gov/legis/house/dist_mem.htm'
page = self.get(url).text
page = lxml.html.fromstring(page)
page.make_links_absolute(url)
# These do not include the non-voting tribal representatives
# They do not have numbered districts, and lack a good deal of
# the standard profile information about representatives
for district in page.xpath('//a[contains(@href, "dist_twn")]/..'):
if "- Vacant" in district.text_content():
self.warning("District is vacant: '{}'".
format(district.text_content()))
continue
_, district_number = district.xpath('a[1]/@href')[0].split('#')
leg_url = district.xpath('a[2]/@href')[0]
leg_info = district.xpath('a[2]/text()')[0]
INFO_RE = r'''
Representative\s
(?P<member_name>.+?)
\s\(
(?P<party>[DRCUI])
-
(?P<district_name>.+?)
\)
'''
info_search = re.search(INFO_RE, leg_info, re.VERBOSE)
member_name = info_search.group('member_name')
party = _party_map[info_search.group('party')]
district_name = info_search.group('district_name')
# Get the photo url.
html = self.get(leg_url).text
doc = lxml.html.fromstring(html)
doc.make_links_absolute(leg_url)
(photo_url,) = doc.xpath('//img[contains(@src, ".jpg")]/@src')
# Add contact information from personal page
office_address = re.search(
r'<B>Address: </B>(.+?)\n?</?P>', html, re.IGNORECASE).group(1)
office_email = doc.xpath(
'//a[starts-with(@href, "mailto:")]/text()')
business_phone = re.search(
r'<B>Business Telephone: </B>(.+?)</?P>', html, re.IGNORECASE)
home_phone = re.search(
r'<B>Home Telephone: </B>(.+?)</?P>', html, re.IGNORECASE)
cell_phone = re.search(
r'<B>Cell Telephone: </B>(.+?)</?P>', html, re.IGNORECASE)
person = Person(
name=member_name,
district=district_number,
primary_org='lower',
party=party,
image=photo_url,
)
person.extras['district_name'] = district_name
person.add_link(leg_url)
person.add_source(leg_url)
if office_address:
leg_address = office_address
person.add_contact_detail(
type='address', value=leg_address, note='District Office')
else:
# If no address for legislator
if party == 'Democratic':
leg_address = (
'House Democratic Office, Room 333 State House, 2 State House Station, '
'Augusta, Maine 04333-0002'
)
person.add_contact_detail(
type='address', value=leg_address, note='Party Office')
elif party == 'Republican':
leg_address = (
'House GOP Office, Room 332 State House, 2 State House Station, '
'Augusta, Maine 04333-0002'
)
person.add_contact_detail(
type='address', value=leg_address, note='Party Office')
if office_email:
office_email = office_email[0]
person.add_contact_detail(type='email', value=office_email, note='District Office')
if business_phone:
person.add_contact_detail(
type='voice', value=clean_phone(business_phone.group(1)),
note='Business Phone')
if home_phone:
person.add_contact_detail(
type='voice', value=clean_phone(home_phone.group(1)),
#.........这里部分代码省略.........