本文整理汇总了Python中pupa.scrape.Person.extras['notice']方法的典型用法代码示例。如果您正苦于以下问题:Python Person.extras['notice']方法的具体用法?Python Person.extras['notice']怎么用?Python Person.extras['notice']使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pupa.scrape.Person
的用法示例。
在下文中一共展示了Person.extras['notice']方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: scrape_chamber
# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import extras['notice'] [as 别名]
def scrape_chamber(self, chamber):
url = "http://www.ncga.state.nc.us/gascripts/members/"\
"memberListNoPic.pl?sChamber="
if chamber == 'lower':
url += 'House'
else:
url += 'Senate'
data = self.get(url).text
doc = lxml.html.fromstring(data)
doc.make_links_absolute('http://www.ncga.state.nc.us')
rows = doc.xpath('//div[@id="mainBody"]/table/tr')
for row in rows[1:]:
party, district, full_name, counties = row.getchildren()
party = party.text_content().strip("()")
party = party_map[party]
district = district.text_content().replace("District", "").strip()
notice = full_name.xpath('span')
if notice:
notice = notice[0].text_content()
# skip resigned legislators
if 'Resigned' in notice or 'Deceased' in notice:
continue
else:
notice = None
link = full_name.xpath('a/@href')[0]
full_name = full_name.xpath('a')[0].text_content()
full_name = full_name.replace(u'\u00a0', ' ')
# scrape legislator page details
lhtml = self.get(link).text
ldoc = lxml.html.fromstring(lhtml)
ldoc.make_links_absolute('http://www.ncga.state.nc.us')
photo_url = ldoc.xpath('//a[contains(@href, "pictures")]/@href')[0]
phone = get_table_item(ldoc, 'Phone:') or None
address = get_table_item(ldoc, 'Address:') or None
email = ldoc.xpath('//a[starts-with(@href, "mailto:")]')[0]
capitol_email = email.text
capitol_phone = email.xpath('ancestor::tr[1]/preceding-sibling::tr[1]/td/span')[0].text
capitol_address = email.xpath('ancestor::tr[1]/preceding-sibling::tr[2]/td/text()')
capitol_address = [x.strip() for x in capitol_address]
capitol_address = '\n'.join(capitol_address) or None
capitol_phone = capitol_phone.strip() or None
# save legislator
person = Person(name=full_name, district=district,
party=party, primary_org=chamber,
image=photo_url)
person.extras['notice'] = notice
person.add_link(link)
person.add_source(link)
if address:
person.add_contact_detail(type='address', value=address,
note='District Office')
if phone:
person.add_contact_detail(type='voice', value=phone,
note='District Office')
if capitol_address:
person.add_contact_detail(type='address', value=capitol_address,
note='Capitol Office')
if capitol_phone:
person.add_contact_detail(type='voice', value=capitol_phone,
note='Capitol Office')
if capitol_email:
person.add_contact_detail(type='email', value=capitol_email,
note='Capitol Office')
yield person