本文整理汇总了Python中pupa.scrape.Person.add_contact_deatil方法的典型用法代码示例。如果您正苦于以下问题:Python Person.add_contact_deatil方法的具体用法?Python Person.add_contact_deatil怎么用?Python Person.add_contact_deatil使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pupa.scrape.Person
的用法示例。
在下文中一共展示了Person.add_contact_deatil方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: scrape_lower_chamber
# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_contact_deatil [as 别名]
#.........这里部分代码省略.........
'.//span[@class="info"]')
name_node = self.get_node(
info_node,
'.//span[@class="name"]')
# Strip titles from legislator name.
if name_node is not None:
name_text = name_node.text.strip()
name_text = re.sub(r'^Hon\.[\s]*', '', name_text)
name_text = re.sub(r' - .*$', '', name_text)
name = ' '.join(name_text.split())
party_node = self.get_node(
info_node,
'.//span[@class="party"]/span')
if party_node is not None:
party_text = party_node.text.strip()
party = party_map[party_text]
district_node = self.get_node(
info_node,
'.//span[@class="district"]')
if district_node is not None:
district_text = district_node.text.strip()
try:
# district_number = re.search(r'0?(\d{1,2})',
# district_text).group(1)
district = re.sub(r'^Distrito[\s]*', '',
district_text).strip()
except AttributeError:
if "Distrito" not in district_text:
district = 'At-Large'
else:
warning = u'{} missing district number.'
self.warning(warning.format(name))
address_node = self.get_node(
info_node,
'.//span[@class="address"]')
if address_node is not None:
address_text = address_node.text
if address_text and not address_text.isspace():
address = address_text.strip()
# Only grabs the first validated phone number found.
# Typically, representatives have multiple phone numbers.
phone_nodes = self.get_nodes(
member_node,
'.//span[@class="two-columns"]//span[@class="data-type"'
'and contains(text(), "Tel:")]')
if phone_nodes is not None:
has_valid_phone = False
for phone_node in phone_nodes:
# Don't keep searching phone numbers if a good
# one is found.
if has_valid_phone:
break
phone_text = phone_node.text
phone_text = re.sub(r'^Tel:[\s]*', '', phone_text)\
.strip()
if self.validate_phone_number(phone_text):
phone = phone_text
has_valid_phone = True
fax_node = self.get_node(
member_node,
'.//span[@class="two-columns"]//span[@class="data-type"'
' and contains(text(), "Fax:")]')
if fax_node is not None:
fax_text = fax_node.text
fax_text = re.sub(r'^Fax:[\s]*', '', fax_text).strip()
if self.validate_phone_number(fax_text):
fax = fax_text
person = Person(primary_org='lower',
district=district,
name=name,
party=party,
image=photo_url)
person.add_link(url)
person.add_source(url)
if address:
person.add_contact_deatil(type='address',
value=address,
note='capitol Office')
if phone:
person.add_contact_deatil(type='voice',
value=phone,
note='capitol Office')
if fax:
person.add_contact_deatil(type='fax',
value=fax,
note='capitol Office')
yield person
示例2: scrape_upper_chamber
# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_contact_deatil [as 别名]
def scrape_upper_chamber(self, term):
urls = {
'At-Large': 'http://www.senadopr.us/Pages/SenadoresporAcumulacion.aspx',
'I': 'http://www.senadopr.us/Pages/Senadores%20Distrito%20I.aspx',
'II': 'http://www.senadopr.us/Pages/Senadores%20Distrito%20II.aspx',
'III': 'http://www.senadopr.us/Pages/Senadores%20Distrito%20III.aspx',
'IV': 'http://www.senadopr.us/Pages/Senadores%20Distrito%20IV.aspx',
'V': 'http://www.senadopr.us/Pages/Senadores%20Distrito%20V.aspx',
'VI': 'http://www.senadopr.us/Pages/Senadores%20Distrito%20VI.aspx',
'VII': 'http://www.senadopr.us/Pages/Senadores%20Distrito%20VII.aspx',
'VIII': 'http://www.senadopr.us/Pages/Senadores%20Distrito%20VIII.aspx'
}
for district, url in urls.items():
leg_page_html = self.get(url).text
doc = lxml.html.fromstring(leg_page_html)
doc.make_links_absolute(url)
rows = doc.xpath('//table[@summary="Senadores 2013-2016"]'
'/tr[not(@class="ms-viewheadertr")]')
for row in rows:
tds = row.xpath('td')
name = tds[0].text_content().title().replace('Hon.', '', 1).strip()
party = tds[1].text_content()
phone = tds[2].text_content()
email = tds[3].text_content()
# Code to guess the picture
# Those middle names abbreviations are sometimes weird.
namefixed = str(name.replace(".", ". "))
# Remove the accents
namefixed = unicodedata.normalize('NFKD', namefixed).encode('ascii', 'ignore')
nameparts = namefixed.split()
if nameparts[1].endswith('.'):
lastname = nameparts[2]
else:
lastname = nameparts[1]
# Construct the photo url
photo_url = 'http://www.senadopr.us/Fotos%20Senadores/sen_' + \
(nameparts[0][0] + lastname).lower() + '.jpg'
try:
self.head(photo_url) # Checking to see if the file is there
except scrapelib.HTTPError: # If not, leave out the photo_url
photo_url = ''
person = Person(primary_org='upper',
district=district,
name=name,
party=party,
image=photo_url)
if email:
person.add_contact_deatil(type='email',
value=email,
note='Capitol Office')
if phone:
person.add_contact_deatil(type='voice',
value=phone,
note='Capitol Office')
person.add_link(url)
person.add_source(url)
yield person