本文整理汇总了Python中pupa.scrape.Person.add_membership方法的典型用法代码示例。如果您正苦于以下问题:Python Person.add_membership方法的具体用法?Python Person.add_membership怎么用?Python Person.add_membership使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pupa.scrape.Person
的用法示例。
在下文中一共展示了Person.add_membership方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_person_add_membership
# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_membership [as 别名]
def test_person_add_membership():
p = Person('Bob B. Bear')
p.add_source('http://example.com')
o = Organization('test org', classification='unknown')
p.add_membership(o, role='member', start_date='2007')
assert len(p._related) == 1
p._related[0].validate()
assert p._related[0].person_id == p._id
assert p._related[0].organization_id == o._id
assert p._related[0].start_date == '2007'
示例2: test_person_add_membership_name
# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_membership [as 别名]
def test_person_add_membership_name():
p = Person('Leonardo DiCaprio')
p.add_membership('Academy of Motion Picture Arts and Sciences',
role='winner', start_date='2016')
p._related[0].validate()
assert get_pseudo_id(p._related[0].organization_id) == {
'name': 'Academy of Motion Picture Arts and Sciences'}
assert p._related[0].person_id == p._id
assert p._related[0].role == 'winner'
assert p._related[0].start_date == '2016'
示例3: scrape
# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_membership [as 别名]
def scrape(self):
urls = Urls(dict(list=legislators_url), self)
council = Organization(
'Temecula City Council',
classification='legislature')
council.add_source(urls.list.url)
yield council
for tr in urls.list.xpath('//table[2]//tr')[1:]:
# Parse some attributes.
name, role = tr.xpath('td/p[1]//font/text()')
image = tr.xpath('td/img/@src').pop()
# Create legislator.
person = Person(name, image=image)
# Add membership on council.
memb = person.add_membership(council, role=role)
# Add email address.
email, detail_url = tr.xpath('td//a/@href')
email = email[7:]
memb.contact_details.append(
dict(type='email', value=email, note='work'))
# Add sources.
person.add_source(urls.list.url)
person.add_source(detail_url)
yield person
示例4: scrape_chamber
# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_membership [as 别名]
def scrape_chamber(self, chamber):
body = {'lower': 'H', 'upper': 'S'}[chamber]
url = 'http://www.azleg.gov/MemberRoster/?body=' + body
page = self.get(url).text
# there is a bad comment closing tag on this page
page = page.replace('--!>', '-->')
root = html.fromstring(page)
path = '//table//tr'
roster = root.xpath(path)[1:]
for row in roster:
position = ''
name, district, party, email, room, phone, = row.xpath('td')
if email.attrib.get('class') == 'vacantmember':
continue # Skip any vacant members.
link = name.xpath('string(a/@href)')
if len(name) == 1:
name = name.text_content().strip()
else:
position = name.tail.strip()
name = name[0].text_content().strip()
if '--' in name:
name = name.split('--')[0].strip()
linkpage = self.get(link).text
linkpage = linkpage.replace('--!>', '-->')
linkroot = html.fromstring(linkpage)
linkroot.make_links_absolute(link)
photos = linkroot.xpath("//img[contains(@src, 'MemberPhoto')]")
if len(photos) != 1:
self.warning('no photo on ' + link)
photo_url = ''
else:
photo_url = photos[0].attrib['src']
district = district.text_content().strip()
party = party.text_content().strip()
email = email.text_content().strip()
if email.startswith('Email: '):
email = email.replace('Email: ', '').lower() + '@azleg.gov'
else:
email = ''
party = self.get_party(party)
room = room.text_content().strip()
if chamber == 'lower':
address = "House of Representatives\n"
else:
address = "Senate\n"
address = address + "1700 West Washington\n Room " + room \
+ "\nPhoenix, AZ 85007"
phone = phone.text_content().strip()
if '602' not in re.findall(r'(\d+)', phone):
phone = "602-" + phone
leg = Person(primary_org=chamber, image=photo_url, name=name, district=district,
party=party)
leg.add_contact_detail(type='address', value=address, note='Capitol Office')
leg.add_contact_detail(type='voice', value=phone, note='Capitol Office')
leg.add_party(party=party)
leg.add_link(link)
if email:
leg.add_contact_detail(type='email', value=email)
if position:
leg.add_membership(name_or_org=party, role=position)
# leg.add_role(position, term, chamber=chamber,
# district=district, party=party)
leg.add_source(url)
# Probably just get this from the committee scraper
# self.scrape_member_page(link, session, chamber, leg)
yield leg
示例5: scrape
# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_membership [as 别名]
def scrape(self):
# chambers = [chamber] if chamber is not None else ['upper', 'lower']
leg_url = "ftp://ftp.cga.ct.gov/pub/data/LegislatorDatabase.csv"
page = self.get(leg_url)
committees = {}
# Ensure that the spreadsheet's structure hasn't generally changed
_row_headers = page.text.split('\r\n')[0].replace('"', '').split(',')
assert _row_headers == HEADERS, "Spreadsheet structure may have changed"
page = open_csv(page)
for row in page:
chamber = {'H': 'lower', 'S': 'upper'}[row['office code']]
district = row['dist'].lstrip('0')
assert district.isdigit(), "Invalid district found: {}".format(district)
name = row['first name']
mid = row['middle initial'].strip()
if mid:
name += " %s" % mid
name += " %s" % row['last name']
suffix = row['suffix'].strip()
if suffix:
name += " %s" % suffix
party = row['party']
if party == 'Democrat':
party = 'Democratic'
leg = Person(primary_org=chamber,
name=name,
district=district,
party=party
)
legislator_url = row['URL'].replace('\\', '//').strip()
if legislator_url != '':
if not legislator_url.startswith('http'):
legislator_url = 'http://'
leg.add_link(legislator_url)
leg.add_party(party=party)
office_address = "%s\nRoom %s\nHartford, CT 06106" % (
row['capitol street address'], row['room number'])
# extra_office_fields = dict()
email = row['email'].strip()
if "@" not in email:
if not email:
email = None
elif email.startswith('http://') or email.startswith('https://'):
# extra_office_fields['contact_form'] = email
email = None
else:
raise ValueError("Problematic email found: {}".format(email))
leg.add_contact_detail(type='address', value=office_address, note='Capitol Office')
leg.add_contact_detail(type='voice', value=row['capitol phone'], note='Capitol Office')
if email:
leg.add_contact_detail(type='email', value=email)
home_address = "{}\n{}, {} {}".format(
row['home street address'],
row['home city'],
row['home state'],
row['home zip code'],
)
if "Legislative Office Building" not in home_address:
leg.add_contact_detail(type='address', value=home_address, note='District Office')
if row['home phone'].strip():
leg.add_contact_detail(type='voice',
value=row['home phone'],
note='District Office')
leg.add_source(leg_url)
for comm_name in row['committee member1'].split(';'):
if ' (' in comm_name:
comm_name, role = comm_name.split(' (')
role = role.strip(')').lower()
else:
role = 'member'
comm_name = comm_name.strip()
if comm_name:
if comm_name in committees:
com = committees[comm_name]
else:
com = Organization(comm_name, classification='committee', chamber=chamber)
com.add_source(leg_url)
committees[comm_name] = com
yield com
leg.add_membership(name_or_org=com, role=role)
yield leg
示例6: scrape
# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_membership [as 别名]
#.........这里部分代码省略.........
contact_types = {
"City Hall Office": ("address", "City Hall Office"),
"City Hall Phone": ("voice", "City Hall Phone"),
"Ward Office Phone": ("voice", "Ward Office Phone"),
"Ward Office Address": ("address", "Ward Office Address"),
"Fax": ("fax", "Fax")
}
for contact_type, (type_, _note) in contact_types.items():
if web.get(contact_type) and web(contact_type) != 'N/A':
p.add_contact_detail(type=type_,
value= web[contact_type],
note=_note)
if web.get('E-mail'):
p.add_contact_detail(type="email",
value=web['E-mail']['url'],
note='E-mail')
if web.get('Web site'):
p.add_link(web['Web site']['url'], note='web site')
if web.get('Notes'):
p.extras = {'Notes': web['Notes']}
if not p.sources: # Only add sources once
source_urls = self.person_sources_from_office(term)
person_api_url, person_web_url = source_urls
p.add_source(person_api_url, note='api')
p.add_source(person_web_url, note='web')
members[member] = p
committee_types = ['Committee',
'Inactive Committee',
'Select Committee',
'Subcommittee',
'Task Force',
'Land Use', # Committee on Land Use
]
body_types = {k: v for k, v in self.body_types().items()
if k in committee_types}
for body in self.bodies():
if body['BodyTypeName'] in body_types \
or body['BodyName'] in ('Legislative Documents Unit',
'Legal and Government Affairs Division'):
# Skip typo in API data
if body['BodyName'] == 'Committee on Mental Health, Developmental Disability, Alcoholism, Substance Abuse amd Disability Services':
continue
parent_org = PARENT_ORGS.get(body['BodyName'], 'New York City Council')
body_name = body['BodyName']
o = Organization(body_name,
classification='committee',
parent_id={'name': parent_org})
o.add_source(self.BASE_URL + '/bodies/{BodyId}'.format(**body), note='api')
o.add_source(self.WEB_URL + '/DepartmentDetail.aspx?ID={BodyId}&GUID={BodyGuid}'.format(**body), note='web')
for office in self.body_offices(body):
# Possible roles: 'Council Member', 'MEMBER', 'Ex-Officio',
# 'Committee Member', None, 'CHAIRPERSON'
role = office['OfficeRecordTitle']
if role and role.lower() == 'chairperson':
role = 'Chairperson'
else:
role = 'Member'
person = office['OfficeRecordFullName']
person = public_advocates.get(person, person).strip()
if person in members:
p = members[person]
else:
p = Person(person)
source_urls = self.person_sources_from_office(office)
person_api_url, person_web_url = source_urls
p.add_source(person_api_url, note='api')
p.add_source(person_web_url, note='web')
members[person] = p
p.add_membership(o,
role=role,
start_date=self.toDate(office['OfficeRecordStartDate']),
end_date=self.toDate(office['OfficeRecordEndDate']))
yield o
for p in members.values():
yield p
示例7: transform_parse
# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_membership [as 别名]
def transform_parse(self, parsed_form, response):
_source = {
"url": response.url,
"note": "LDA Form LD-1"
}
# basic disclosure fields
_disclosure = Disclosure(
effective_date=datetime.strptime(
parsed_form['datetimes']['effective_date'],
'%Y-%m-%d %H:%M:%S').replace(tzinfo=UTC),
timezone='America/New_York',
submitted_date=datetime.strptime(
parsed_form['datetimes']['signature_date'],
'%Y-%m-%d %H:%M:%S').replace(tzinfo=UTC),
classification="lobbying"
)
_disclosure.add_authority(name=self.authority.name,
type=self.authority._type,
id=self.authority._id)
_disclosure.add_identifier(
identifier=parsed_form['_meta']['document_id'],
scheme="urn:sopr:filing"
)
# disclosure extras
_disclosure.extras = {}
_disclosure.extras['registrant'] = {
'self_employed_individual': parsed_form['registrant']['self_employed_individual'],
'general_description': parsed_form['registrant']['registrant_general_description'],
'signature': {
"signature_date": parsed_form['datetimes']['signature_date'],
"signature": parsed_form['signature']
}
}
_disclosure.extras['client'] = {
'same_as_registrant':
parsed_form['client']['client_self'],
'general_description':
parsed_form['client']['client_general_description']
}
_disclosure.extras['registration_type'] = {
'is_amendment':
parsed_form['registration_type']['is_amendment'],
'new_registrant':
parsed_form['registration_type']['new_registrant'],
'new_client_for_existing_registrant':
parsed_form['registration_type'][
'new_client_for_existing_registrant'],
}
# # Registrant
# build registrant
_registrant_self_employment = None
if parsed_form['registrant']['self_employed_individual']:
n = ' '.join([p for p in [
parsed_form['registrant']['registrant_individual_prefix'],
parsed_form['registrant']['registrant_individual_firstname'],
parsed_form['registrant']['registrant_individual_lastname']
] if len(p) > 0]).strip()
_registrant = Person(
name=n,
source_identified=True
)
_registrant_self_employment = Organization(
name='SELF-EMPLOYMENT of {n}'.format(n=n),
classification='company',
source_identified=True
)
_registrant.add_membership(
organization=_registrant_self_employment,
role='self_employed',
label='self-employment of {n}'.format(n=n),
start_date=_disclosure.effective_date.strftime('%Y-%m-%d')
)
else:
_registrant = Organization(
name=parsed_form['registrant']['registrant_org_name'],
classification='company',
source_identified=True
)
if len(parsed_form['registrant']['registrant_house_id']) > 0:
_registrant.add_identifier(
identifier=parsed_form['registrant']['registrant_house_id'],
scheme='urn:house_clerk:registrant'
)
if len(parsed_form['registrant']['registrant_senate_id']) > 0:
_registrant.add_identifier(
identifier=parsed_form['registrant']['registrant_senate_id'],
#.........这里部分代码省略.........
示例8: scrape_chamber
# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_membership [as 别名]
#.........这里部分代码省略.........
if 'Resigned effective' in leg_html:
self.info('Resigned')
continue
party, district, _ = leg_doc.xpath('//p[@style="font-size: 17px;'
' margin: 0 0 0 0; padding: 0;"]/text()')
if 'Republican' in party:
party = 'Republican'
elif 'Democrat' in party:
party = 'Democratic'
# District # - County - Map
district = district.split()[1]
photo_url = leg_doc.xpath('//img[contains(@src,"/members/")]/@src')[0]
person = Person(name=full_name, district=district,
party=party, primary_org=chamber,
image=photo_url)
# office address / phone
try:
addr_div = leg_doc.xpath('//div[@style="float: left; width: 225px;'
' margin: 10px 5px 0 20px; padding: 0;"]')[0]
capitol_address = addr_div.xpath('p[@style="font-size: 13px;'
' margin: 0 0 10px 0; padding: 0;"]'
)[0].text_content()
phone = addr_div.xpath('p[@style="font-size: 13px;'
' margin: 0 0 0 0; padding: 0;"]/text()')[0]
capitol_phone = phone.strip()
if capitol_address:
person.add_contact_detail(type='address', value=capitol_address,
note='Capitol Office')
if capitol_phone:
person.add_contact_detail(type='voice', value=capitol_phone,
note='Capitol Office')
except IndexError:
self.warning('no capitol address for {0}'.format(full_name))
# home address / phone
try:
addr_div = leg_doc.xpath('//div[@style="float: left;'
' width: 225px; margin: 10px 0 0 20px;"]')[0]
addr = addr_div.xpath('p[@style="font-size: 13px;'
' margin: 0 0 10px 0; padding: 0;"]')[0].text_content()
phone = addr_div.xpath('p[@style="font-size: 13px;'
' margin: 0 0 0 0; padding: 0;"]/text()')[0]
phone = phone.strip()
if addr:
person.add_contact_detail(type='address', value=addr,
note='District Office')
if phone:
person.add_contact_detail(type='voice', value=phone,
note='District Office')
except IndexError:
self.warning('no district address for {0}'.format(full_name))
person.add_link(leg_url)
person.add_source(url)
person.add_source(leg_url)
# committees (skip first link)
for com in leg_doc.xpath('//a[contains(@href, "committee.php")]')[1:]:
if com.text.endswith(', '):
committee, role = com.text_content().rsplit(', ', 1)
# known roles
role = {'Treas.': 'treasurer',
'Secy.': 'secretary',
'Secy./Treas.': 'secretary/treasurer',
'V.C.': 'vice-chair',
'1st V.C.': 'first vice-chair',
'Co 1st V.C.': 'co-first vice-chair',
'2nd V.C.': 'second vice-chair',
'3rd V.C.': 'third vice-chair',
'Ex.Officio Member': 'ex-officio member',
'Chairman': 'chairman'}[role]
else:
committee = com.text
role = 'member'
# only yield each committee once
if committee not in seen_committees:
com = Organization(name=committee, classification='committee',
chamber=chamber)
com.add_source(url)
seen_committees[committee] = com
yield com
else:
com = seen_committees[committee]
person.add_membership(com, role=role)
yield person