本文整理汇总了Python中pupa.scrape.Organization.extras['summary']方法的典型用法代码示例。如果您正苦于以下问题:Python Organization.extras['summary']方法的具体用法?Python Organization.extras['summary']怎么用?Python Organization.extras['summary']使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pupa.scrape.Organization
的用法示例。
在下文中一共展示了Organization.extras['summary']方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: scrape
# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import extras['summary'] [as 别名]
def scrape(self):
com_url = 'http://dccouncil.us/committees'
data = self.get(com_url).text
doc = lxml.html.fromstring(data)
doc.make_links_absolute(com_url)
comms = set(
doc.xpath('//a[contains(@href, "dccouncil.us/committees/")]'))
for committee in comms:
url = committee.attrib['href']
name = committee.text_content().strip()
comm_data = self.get(url).text
comm_page = lxml.html.fromstring(comm_data)
comm_page.make_links_absolute(url)
# classify these as belonging to the legislature
committee = Organization(name=name, classification='committee',
chamber='legislature')
if comm_page.xpath('//p[@class="page-summary"]'):
summary = comm_page.xpath(
'//p[@class="page-summary"]')[0].text_content().strip()
committee.extras['summary'] = summary
chair = comm_page.xpath(
"//h4[text()='Chairperson']/following-sibling::p")
chair_name = chair[0].text_content().strip()
chair_name = self.remove_title(chair_name)
committee.add_member(chair_name, role="chair")
members = comm_page.xpath(
"//h4[text()='Councilmembers']/following-sibling::ul")
members = members[0].xpath("./li")
for m in members:
mem_name = m.text_content().strip()
mem_name = self.remove_title(mem_name)
if mem_name != chair_name:
committee.add_member(mem_name)
committee.add_source(url)
committee.add_link(url, note='Official Website')
if not committee._related:
self.warning('empty committee: %s;', name)
else:
yield committee