本文整理汇总了Python中pupa.scrape.Person.extras['first_name']方法的典型用法代码示例。如果您正苦于以下问题:Python Person.extras['first_name']方法的具体用法?Python Person.extras['first_name']怎么用?Python Person.extras['first_name']使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pupa.scrape.Person
的用法示例。
在下文中一共展示了Person.extras['first_name']方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: scrape_senators
# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import extras['first_name'] [as 别名]
def scrape_senators(self):
mapping = {
'district': 0,
'first_name': 2,
'middle_name': 3,
'last_name': 4,
'suffixes': 5,
'party': 1,
'street_addr': 6,
'city': 7,
'state': 8,
'zip_code': 9,
'phone1': 10,
'phone2': 11,
'email': 12
}
url = (
'https://mainelegislature.org/uploads/visual_edit/'
'128th-senate-members-for-distribution-1.xlsx'
)
fn, result = self.urlretrieve(url)
wb = xlrd.open_workbook(fn)
sh = wb.sheet_by_index(0)
LEGISLATOR_ROSTER_URL = \
'https://mainelegislature.org/senate/128th-senators/9332'
roster_doc = lxml.html.fromstring(self.get(LEGISLATOR_ROSTER_URL).text)
roster_doc.make_links_absolute(LEGISLATOR_ROSTER_URL)
for rownum in range(1, sh.nrows):
# get fields out of mapping
d = {}
for field, col_num in mapping.items():
try:
d[field] = str(sh.cell(rownum, col_num).value).strip()
except IndexError:
# This col_num doesn't exist in the sheet.
pass
first_name = d['first_name']
middle_name = d['middle_name']
last_name = d['last_name']
full_name = " ".join((first_name, middle_name,
last_name))
full_name = re.sub(r'\s+', ' ', full_name).strip()
address = "{street_addr}\n{city}, ME {zip_code}".format(**d)
phone = d['phone1']
if not phone:
phone = d['phone2']
if not phone:
phone = None
district = d['district'].split('.')[0]
party = d['party'].split('.')[0]
# Determine legislator's URL to get their photo
URL_XPATH = '//li/a[contains(text(), "District {:02d}")]/@href'.format(int(district))
try:
(leg_url, ) = roster_doc.xpath(URL_XPATH)
except ValueError:
self.warning('vacant seat %s', district)
continue # Seat is vacant
html = self.get(leg_url).text
doc = lxml.html.fromstring(html)
doc.make_links_absolute(leg_url)
xpath = '//img[contains(@src, ".png")]/@src'
photo_url = doc.xpath(xpath)
if photo_url:
photo_url = photo_url.pop()
else:
photo_url = None
person = Person(
name=full_name,
district=district,
image=photo_url,
primary_org='upper',
party=party,
)
person.add_link(leg_url)
person.add_source(leg_url)
person.extras['first_name'] = first_name
person.extras['middle_name'] = middle_name
person.extras['last_name'] = last_name
person.add_contact_detail(type='address', value=address, note='District Office')
if phone:
person.add_contact_detail(
type='voice', value=clean_phone(phone), note='District Phone')
person.add_contact_detail(type='email', value=d['email'], note='District Email')
yield person