本文整理汇总了Python中utils.CanadianLegislator.add_link方法的典型用法代码示例。如果您正苦于以下问题:Python CanadianLegislator.add_link方法的具体用法?Python CanadianLegislator.add_link怎么用?Python CanadianLegislator.add_link使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类utils.CanadianLegislator
的用法示例。
在下文中一共展示了CanadianLegislator.add_link方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_people
# 需要导入模块: from utils import CanadianLegislator [as 别名]
# 或者: from utils.CanadianLegislator import add_link [as 别名]
def get_people(self):
page = lxmlize(COUNCIL_PAGE)
councillors = page.xpath('//ul[@class="subNav top"]/li/ul//li/a')
for councillor in councillors:
name = councillor.text_content()
url = councillor.attrib['href']
page = lxmlize(url)
if councillor == councillors[0]:
district = 'Ajax'
role = 'Mayor'
else:
district = re.findall(r'Ward.*', page.xpath('//div[@id="printAreaContent"]//h1')[0].text_content())[0].strip()
role = page.xpath('//div[@id="printAreaContent"]//h1')[0].text_content()
role = re.findall('((Regional)? ?(Councillor))', role)[0][0]
p = Legislator(name=name, post_id=district, role=role)
p.add_source(COUNCIL_PAGE)
p.add_source(url)
p.image = page.xpath('//div[@class="intQuicklinksPhoto"]/img/@src')[0]
contact_info = page.xpath('//table[@class="datatable"][1]//tr')[1:]
for line in contact_info:
contact_type = line.xpath('./td')[0].text_content().strip()
contact = line.xpath('./td')[1].text_content().strip()
if re.match(r'(Phone)|(Fax)|(Email)', contact_type):
contact_type = CONTACT_DETAIL_TYPE_MAP[contact_type]
p.add_contact(contact_type, contact, None if contact_type == 'email' else 'legislature')
else:
p.add_link(contact, None)
yield p
示例2: get_people
# 需要导入模块: from utils import CanadianLegislator [as 别名]
# 或者: from utils.CanadianLegislator import add_link [as 别名]
def get_people(self):
page = lxmlize(COUNCIL_PAGE)
yield scrape_mayor()
councillors = page.xpath('//div[@id="centre_content"]//tr')
for councillor in councillors:
if "Position" in councillor.text_content():
continue
district = councillor.xpath("./td")[0].text_content().replace("Councillor", "")
name = councillor.xpath("./td")[1].text_content()
url = councillor.xpath("./td/a")[0].attrib["href"]
p = Legislator(name=name, post_id=district, role="Councillor")
p.add_source(COUNCIL_PAGE)
p.add_source(url)
page = lxmlize(url)
address = page.xpath('//div[@id="centre_content"]//p')[0].text_content().replace("\r\n", ", ")
email = page.xpath('//a[contains(@href,"mailto:")]')[0].attrib["href"].replace("mailto:", "")
p.add_contact("address", address, "legislature")
p.add_contact("email", email, None)
p.image = page.xpath('//div[@id="centre_content"]//img/@src')[0]
numbers = page.xpath('//div[@id="centre_content"]//p[contains(text(),"-")]')[0].text_content()
if "tel" in numbers:
phone = (
re.findall(r"(.*)tel", numbers)[0]
.strip()
.replace(" ", "-")
.replace("\\xc2", "")
.replace("\\xa0", "-")
)
p.add_contact("voice", phone, "legislature")
if "cell" in numbers:
cell = re.findall(r"(.*)cell", numbers)[0].strip().replace(" ", "-")
p.add_contact("cell", cell, "legislature")
if "fax" in numbers:
fax = re.findall(r"(.*)fax", numbers)[0].strip().replace(" ", "-")
p.add_contact("fax", fax, "legislature")
if len(page.xpath('//div[@id="centre_content"]//a')) > 2:
p.add_link(page.xpath('//div[@id="centre_content"]//a')[-1].attrib["href"], None)
yield p
示例3: scrape_mayor
# 需要导入模块: from utils import CanadianLegislator [as 别名]
# 或者: from utils.CanadianLegislator import add_link [as 别名]
def scrape_mayor(self, div):
name = div.xpath('.//a')[0].text_content().replace('Mayor', '')
url = div.xpath('.//a')[0].attrib['href']
p = Legislator(name=name, post_id='Guelph', role='Mayor')
p.add_source(COUNCIL_PAGE)
p.add_source(url)
phone = div.xpath('.//text()[normalize-space()]')[2]
email = div.xpath('.//a[contains(@href,"mailto:")]')[0].text_content()
page = lxmlize(url)
p.add_contact('voice', phone, 'legislature')
p.add_contact('email', email, None)
p.add_link(page.xpath('//div[@class="entry-content"]//a[contains(@href, "facebook")]')[0].attrib['href'], None)
p.add_link(page.xpath('//div[@class="entry-content"]//a[contains(@href, "twitter")]')[0].attrib['href'], None)
p.image = page.xpath('//header/img/@src')[0]
return p