本文整理汇总了Python中pupa.scrape.Person.add_party方法的典型用法代码示例。如果您正苦于以下问题:Python Person.add_party方法的具体用法?Python Person.add_party怎么用?Python Person.add_party使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pupa.scrape.Person
的用法示例。
在下文中一共展示了Person.add_party方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: scrape_chamber
# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_party [as 别名]
def scrape_chamber(self, chamber):
body = {'lower': 'H', 'upper': 'S'}[chamber]
url = 'http://www.azleg.gov/MemberRoster/?body=' + body
page = self.get(url).text
# there is a bad comment closing tag on this page
page = page.replace('--!>', '-->')
root = html.fromstring(page)
path = '//table//tr'
roster = root.xpath(path)[1:]
for row in roster:
position = ''
name, district, party, email, room, phone, = row.xpath('td')
if email.attrib.get('class') == 'vacantmember':
continue # Skip any vacant members.
link = name.xpath('string(a/@href)')
if len(name) == 1:
name = name.text_content().strip()
else:
position = name.tail.strip()
name = name[0].text_content().strip()
if '--' in name:
name = name.split('--')[0].strip()
linkpage = self.get(link).text
linkpage = linkpage.replace('--!>', '-->')
linkroot = html.fromstring(linkpage)
linkroot.make_links_absolute(link)
photos = linkroot.xpath("//img[contains(@src, 'MemberPhoto')]")
if len(photos) != 1:
self.warning('no photo on ' + link)
photo_url = ''
else:
photo_url = photos[0].attrib['src']
district = district.text_content().strip()
party = party.text_content().strip()
email = email.text_content().strip()
if email.startswith('Email: '):
email = email.replace('Email: ', '').lower() + '@azleg.gov'
else:
email = ''
party = self.get_party(party)
room = room.text_content().strip()
if chamber == 'lower':
address = "House of Representatives\n"
else:
address = "Senate\n"
address = address + "1700 West Washington\n Room " + room \
+ "\nPhoenix, AZ 85007"
phone = phone.text_content().strip()
if '602' not in re.findall(r'(\d+)', phone):
phone = "602-" + phone
leg = Person(primary_org=chamber, image=photo_url, name=name, district=district,
party=party)
leg.add_contact_detail(type='address', value=address, note='Capitol Office')
leg.add_contact_detail(type='voice', value=phone, note='Capitol Office')
leg.add_party(party=party)
leg.add_link(link)
if email:
leg.add_contact_detail(type='email', value=email)
if position:
leg.add_membership(name_or_org=party, role=position)
# leg.add_role(position, term, chamber=chamber,
# district=district, party=party)
leg.add_source(url)
# Probably just get this from the committee scraper
# self.scrape_member_page(link, session, chamber, leg)
yield leg
示例2: scrape
# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_party [as 别名]
def scrape(self):
# chambers = [chamber] if chamber is not None else ['upper', 'lower']
leg_url = "ftp://ftp.cga.ct.gov/pub/data/LegislatorDatabase.csv"
page = self.get(leg_url)
committees = {}
# Ensure that the spreadsheet's structure hasn't generally changed
_row_headers = page.text.split('\r\n')[0].replace('"', '').split(',')
assert _row_headers == HEADERS, "Spreadsheet structure may have changed"
page = open_csv(page)
for row in page:
chamber = {'H': 'lower', 'S': 'upper'}[row['office code']]
district = row['dist'].lstrip('0')
assert district.isdigit(), "Invalid district found: {}".format(district)
name = row['first name']
mid = row['middle initial'].strip()
if mid:
name += " %s" % mid
name += " %s" % row['last name']
suffix = row['suffix'].strip()
if suffix:
name += " %s" % suffix
party = row['party']
if party == 'Democrat':
party = 'Democratic'
leg = Person(primary_org=chamber,
name=name,
district=district,
party=party
)
legislator_url = row['URL'].replace('\\', '//').strip()
if legislator_url != '':
if not legislator_url.startswith('http'):
legislator_url = 'http://'
leg.add_link(legislator_url)
leg.add_party(party=party)
office_address = "%s\nRoom %s\nHartford, CT 06106" % (
row['capitol street address'], row['room number'])
# extra_office_fields = dict()
email = row['email'].strip()
if "@" not in email:
if not email:
email = None
elif email.startswith('http://') or email.startswith('https://'):
# extra_office_fields['contact_form'] = email
email = None
else:
raise ValueError("Problematic email found: {}".format(email))
leg.add_contact_detail(type='address', value=office_address, note='Capitol Office')
leg.add_contact_detail(type='voice', value=row['capitol phone'], note='Capitol Office')
if email:
leg.add_contact_detail(type='email', value=email)
home_address = "{}\n{}, {} {}".format(
row['home street address'],
row['home city'],
row['home state'],
row['home zip code'],
)
if "Legislative Office Building" not in home_address:
leg.add_contact_detail(type='address', value=home_address, note='District Office')
if row['home phone'].strip():
leg.add_contact_detail(type='voice',
value=row['home phone'],
note='District Office')
leg.add_source(leg_url)
for comm_name in row['committee member1'].split(';'):
if ' (' in comm_name:
comm_name, role = comm_name.split(' (')
role = role.strip(')').lower()
else:
role = 'member'
comm_name = comm_name.strip()
if comm_name:
if comm_name in committees:
com = committees[comm_name]
else:
com = Organization(comm_name, classification='committee', chamber=chamber)
com.add_source(leg_url)
committees[comm_name] = com
yield com
leg.add_membership(name_or_org=com, role=role)
yield leg
示例3: scrape
# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_party [as 别名]
def scrape(self):
noncommittees = {'Committee of the Whole'}
committee_d = {}
people_d = {}
for councilman, committees in self.councilMembers() :
if 'url' in councilman['Person Name'] :
councilman_url = councilman['Person Name']['url']
if councilman_url in people_d :
people_d[councilman_url][0].append(councilman)
else :
people_d[councilman_url] = [councilman], committees
for person_entries, committees in people_d.values() :
councilman = person_entries[-1]
p = Person(councilman['Person Name']['label'])
if p.name == 'Letitia James' :
p.name = 'Letitia Ms. James'
p.add_name('Letitia James')
spans = [(self.toTime(entry['Start Date']).date(),
self.toTime(entry['End Date']).date(),
entry['District'])
for entry in person_entries]
merged_spans = []
last_end_date = None
last_district = None
for start_date, end_date, district in sorted(spans) :
if last_end_date is None :
span = [start_date, end_date, district]
elif (start_date - last_end_date) == datetime.timedelta(1) and district == last_district :
span[1] = end_date
else :
merged_spans.append(span)
span = [start_date, end_date, district]
last_end_date = end_date
last_district = district
merged_spans.append(span)
for start_date, end_date, district in merged_spans :
district = councilman['District'].replace(' 0', ' ')
if end_date == datetime.date(2017, 12, 31) :
end_date = ''
else :
end_date = end_date.isoformat()
print(start_date, end_date)
p.add_term('Council Member', 'legislature',
district=district,
start_date=start_date.isoformat(),
end_date=end_date)
party = councilman['Political Party']
if party == 'Democrat' :
party = 'Democratic'
if party :
p.add_party(party)
if councilman['Photo'] :
p.image = councilman['Photo']
if councilman["E-mail"]:
p.add_contact_detail(type="email",
value=councilman['E-mail']['url'],
note='E-mail')
if councilman['Web site']:
p.add_link(councilman['Web site']['url'], note='web site')
p.extras = {'Notes' : councilman['Notes']}
p.add_source(councilman['Person Name']['url'], note='web')
for committee, _, _ in committees:
committee_name = committee['Department Name']['label']
if committee_name not in noncommittees and 'committee' in committee_name.lower():
o = committee_d.get(committee_name, None)
if o is None:
parent_id = PARENT_ORGS.get(committee_name,
'New York City Council')
o = Organization(committee_name,
classification='committee',
parent_id={'name' : parent_id})
o.add_source(committee['Department Name']['url'])
committee_d[committee_name] = o
membership = o.add_member(p, role=committee["Title"])
membership.start_date = self.mdY2Ymd(committee["Start Date"])
yield p
#.........这里部分代码省略.........
示例4: scrape
# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_party [as 别名]
def scrape(self):
web_scraper = LegistarPersonScraper(requests_per_minute = self.requests_per_minute)
web_scraper.MEMBERLIST = 'http://legistar.council.nyc.gov/DepartmentDetail.aspx?ID=6897&GUID=CDC6E691-8A8C-4F25-97CB-86F31EDAB081&Mode=MainBody'
if self.cache_storage:
web_scraper.cache_storage = self.cache_storage
if self.requests_per_minute == 0:
web_scraper.cache_write_only = False
web_info = {}
for member, _ in web_scraper.councilMembers():
name = member['Person Name']['label'].strip()
web_info[name] = member
city_council, = [body for body in self.bodies()
if body['BodyName'] == 'City Council']
terms = collections.defaultdict(list)
public_advocates = { # Match casing to Bill De Blasio as council member
'The Public Advocate (Mr. de Blasio)': 'Bill De Blasio',
'The Public Advocate (Ms. James)': 'Letitia James',
}
for office in self.body_offices(city_council):
name = office['OfficeRecordFullName']
name = public_advocates.get(name, name).strip()
terms[name].append(office)
# Add past members (and advocates public)
if name not in web_info:
web_info[name] = collections.defaultdict(lambda: None)
# Check that we have everyone we expect, formatted consistently, in
# both information arrays. For instance, this will fail if we forget to
# strip trailing spaces from names on one side or the other (which has
# the effect of omitting information, such as post, from the scrape).
assert set(web_info.keys()) == set(terms.keys())
members = {}
for member, offices in terms.items():
p = Person(member)
web = web_info[member]
for term in offices:
role = term['OfficeRecordTitle']
if role == 'Public Advocate':
role = 'Non-Voting Council Member'
else:
role = 'Council Member'
district = web.get('District', '').replace(' 0', ' ')
p.add_term(role,
'legislature',
district=district,
start_date=self.toDate(term['OfficeRecordStartDate']),
end_date=self.toDate(term['OfficeRecordEndDate']))
party = web.get('Political Party')
if party == 'Democrat':
party = 'Democratic'
if party:
p.add_party(party)
if web.get('Photo'):
p.image = web['Photo']
contact_types = {
"City Hall Office": ("address", "City Hall Office"),
"City Hall Phone": ("voice", "City Hall Phone"),
"Ward Office Phone": ("voice", "Ward Office Phone"),
"Ward Office Address": ("address", "Ward Office Address"),
"Fax": ("fax", "Fax")
}
for contact_type, (type_, _note) in contact_types.items():
if web.get(contact_type) and web(contact_type) != 'N/A':
p.add_contact_detail(type=type_,
value= web[contact_type],
note=_note)
if web.get('E-mail'):
p.add_contact_detail(type="email",
value=web['E-mail']['url'],
note='E-mail')
if web.get('Web site'):
p.add_link(web['Web site']['url'], note='web site')
#.........这里部分代码省略.........
示例5: test_person_add_party
# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_party [as 别名]
def test_person_add_party():
p = Person('Groot')
p.add_party('Green')
p._related[0].validate()
assert get_pseudo_id(p._related[0].organization_id) == {
'name': 'Green', 'classification': 'party'}