当前位置: 首页>>代码示例>>Python>>正文


Python Person.extras方法代码示例

本文整理汇总了Python中pupa.scrape.Person.extras方法的典型用法代码示例。如果您正苦于以下问题:Python Person.extras方法的具体用法?Python Person.extras怎么用?Python Person.extras使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pupa.scrape.Person的用法示例。


在下文中一共展示了Person.extras方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_member

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import extras [as 别名]
    def get_member(self, session, chamber, kpid):
        url = '%smembers/%s' % (ksapi.url, kpid)
        content = json.loads(self.get(url).text)['content']

        party = content['PARTY']
        if party == 'Democrat':
            party = 'Democratic'

        slug = {'2013-2014': 'b2013_14',
                '2015-2016': 'b2015_16',
                '2017-2018': 'b2017_18',
                '2019-2020': 'b2019_20',
                }[session]
        leg_url = 'http://www.kslegislature.org/li/%s/members/%s/' % (slug, kpid)

        try:
            legislator_page = self.lxmlize(leg_url)
            photo_url, = legislator_page.xpath(
                '//img[@class="profile-picture"]/@src')
        except scrapelib.HTTPError:
            self.warning("{}'s legislator bio page not found".format(content['FULLNAME']))
            leg_url = ''
            photo_url = ''

        person = Person(
            name=content['FULLNAME'],
            district=str(content['DISTRICT']),
            primary_org=chamber,
            party=party,
            image=photo_url,
        )
        person.extras = {'occupation': content['OCCUPATION']}

        address = '\n'.join([
            'Room {}'.format(content['OFFICENUM']),
            'Kansas State Capitol Building',
            '300 SW 10th St.',
            'Topeka, KS 66612',
        ])

        note = 'Capitol Office'
        person.add_contact_detail(type='address', value=address, note=note)
        person.add_contact_detail(type='email', value=content['EMAIL'], note=note)
        if content['OFFPH']:
            person.add_contact_detail(type='voice', value=content['OFFPH'], note=note)

        person.add_source(url)
        person.add_link(leg_url)

        yield person
开发者ID:sunlightlabs,项目名称:openstates,代码行数:52,代码来源:people.py

示例2: _scrape_lower_chamber

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import extras [as 别名]
    def _scrape_lower_chamber(self):
        self.info('Scraping lower chamber for legislators.')

        chamber = 'lower'

        roster_url = (self._reps_url)
        page = self.get(roster_url).text
        page = lxml.html.fromstring(page)
        # This is the ASP.net table container
        table_xpath = ('id("ContentPlaceHolder1_'
                       'gridMembers_DXMainTable")')
        table = page.xpath(table_xpath)[0]
        for tr in table.xpath('tr')[1:]:
            # If a given term hasn't occurred yet, then ignore it
            # Eg, in 2017, the 2018 term page will have a blank table
            if tr.attrib.get('class') == 'dxgvEmptyDataRow':
                self.warning('No House members found')
                return

            tds = tr.xpath('td')
            last_name = tds[0].text_content().strip()
            first_name = tds[1].text_content().strip()
            full_name = '{} {}'.format(first_name, last_name)
            district = str(int(tds[2].text_content().strip()))
            party = tds[3].text_content().strip()
            if party == 'Democrat':
                party = 'Democratic'

            if party.strip() == "":  # Workaround for now.
                party = "Other"

            phone = tds[4].text_content().strip()
            room = tds[5].text_content().strip()
            address = self._assumed_address_fmt.format(room if room else '')

            if last_name == 'Vacant':
                person = Person(
                    name=full_name,
                    primary_org=chamber,
                    district=district,
                    party=party,
                )
                person.extras = {
                    'first_name': first_name,
                    'last_name': last_name,
                }

                person.add_contact_detail(type='address', value=address, note='Capitol Office')
                if phone.strip():
                    person.add_contact_detail(type='voice', value=phone, note='Capitol Office')

                person.add_source(roster_url)

                self._save_vacant_legislator(person)
            else:
                party_override = {" Green": "Democratic",
                                  " Sisco": "Republican"}

                if party == "" and full_name in party_override:
                    party = party_override[full_name]

                details_url = self._rep_details_url.format(district)
                details_page = lxml.html.fromstring(self.get(details_url).text)

                person = Person(
                    name=full_name,
                    primary_org=chamber,
                    district=district,
                    party=party,
                )
                person.extras = {
                    'first_name': first_name,
                    'last_name': last_name,
                }
                person.add_source(roster_url)
                person.add_source(details_url)
                person.add_link(details_url)

                email = details_page.xpath(
                    '//*[@id="ContentPlaceHolder1_lblAddresses"]'
                    '/table/tr[4]/td/a/@href'
                )
                if len(email) > 0 and email[0].lower() != 'mailto:':
                    email = email[0].split(':')[1]
                else:
                    email = None

                person.add_contact_detail(type='address', value=address, note='Capitol Office')
                if phone:
                    person.add_contact_detail(type='voice', value=phone, note='Capitol Office')
                if email:
                    person.add_contact_detail(type='email', value=email, note='Capitol Office')

                picture = details_page.xpath(
                    '//*[@id="ContentPlaceHolder1_imgPhoto"]/@src')
                if len(picture) > 0:
                    person.image = picture[0]

                yield person
开发者ID:cliftonmcintosh,项目名称:openstates,代码行数:101,代码来源:people.py

示例3: _parse_person

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import extras [as 别名]
    def _parse_person(self, row, chamber, seat_map):
        # Capture legislator vitals.
        first_name = row['FirstName']
        middle_name = row['MiddleName']
        last_name = row['LastName']
        full_name = '{} {} {}'.format(first_name, middle_name, last_name)
        full_name = re.sub(r'[\s]{2,}', ' ', full_name)

        if chamber == 'lower':
            district = '{} {}'.format(row['County'], int(row['District'])).strip()
        else:
            district = str(int(row['District'])).strip()

        party = self.party_map[row['party'].upper()]
        email = row['WorkEmail']

        print(district)
        person = Person(primary_org=chamber,
                        district=district,
                        name=full_name,
                        party=party)

        extras = {
            'first_name': first_name,
            'middle_name': middle_name,
            'last_name': last_name
        }

        person.extras = extras
        if email:
            person.add_contact_detail(type='email', value=email, note='District Office')

        # Capture legislator office contact information.
        district_address = '{}\n{}\n{}, {} {}'.format(row['Address'],
                                                      row['address2'],
                                                      row['city'], row['State'],
                                                      row['Zipcode']).strip()

        phone = row['Phone'].strip()
        if not phone:
            phone = None

        if district_address:
            person.add_contact_detail(type='address', value=district_address, note='Home Office')
        if phone:
            person.add_contact_detail(type='voice', value=phone, note='Home Office')

        # Retrieve legislator portrait.
        profile_url = None
        if chamber == 'upper':
            profile_url = self.senate_profile_url.format(row['District'])
        elif chamber == 'lower':
            try:
                seat_number = seat_map[row['seatno']]
                profile_url = self.house_profile_url.format(seat_number)
            except KeyError:
                pass

        if profile_url:
            person.image = self._get_photo(profile_url, chamber)
            person.add_source(profile_url)

        return person
开发者ID:neelneelpurk,项目名称:openstates,代码行数:65,代码来源:people.py

示例4: scrape

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import extras [as 别名]

#.........这里部分代码省略.........

                if party == 'Democrat':
                    party = 'Democratic'

                if party:
                    p.add_party(party)

                if web.get('Photo'):
                    p.image = web['Photo']

                contact_types = {
                    "City Hall Office": ("address", "City Hall Office"),
                    "City Hall Phone": ("voice", "City Hall Phone"),
                    "Ward Office Phone": ("voice", "Ward Office Phone"),
                    "Ward Office Address": ("address", "Ward Office Address"),
                    "Fax": ("fax", "Fax")
                }

                for contact_type, (type_, _note) in contact_types.items():
                    if web.get(contact_type) and web(contact_type) != 'N/A':
                        p.add_contact_detail(type=type_,
                                             value= web[contact_type],
                                             note=_note)

                if web.get('E-mail'):
                    p.add_contact_detail(type="email",
                                         value=web['E-mail']['url'],
                                         note='E-mail')

                if web.get('Web site'):
                    p.add_link(web['Web site']['url'], note='web site')

                if web.get('Notes'):
                    p.extras = {'Notes': web['Notes']}

                if not p.sources:  # Only add sources once
                    source_urls = self.person_sources_from_office(term)
                    person_api_url, person_web_url = source_urls
                    p.add_source(person_api_url, note='api')
                    p.add_source(person_web_url, note='web')

            members[member] = p

        committee_types = ['Committee',
                           'Inactive Committee',
                           'Select Committee',
                           'Subcommittee',
                           'Task Force',
                           'Land Use', # Committee on Land Use
                          ]

        body_types = {k: v for k, v in self.body_types().items()
                      if k in committee_types}

        for body in self.bodies():
            if body['BodyTypeName'] in body_types \
                or body['BodyName'] in ('Legislative Documents Unit',
                                        'Legal and Government Affairs Division'):

                # Skip typo in API data
                if body['BodyName'] == 'Committee on Mental Health, Developmental Disability, Alcoholism, Substance Abuse amd Disability Services':
                    continue

                parent_org = PARENT_ORGS.get(body['BodyName'], 'New York City Council')

                body_name = body['BodyName']
开发者ID:datamade,项目名称:scrapers-us-municipal,代码行数:70,代码来源:people.py

示例5: scrape

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import extras [as 别名]
    def scrape(self):
        noncommittees = {'Committee of the Whole'}
        committee_d = {}

        people_d = {}

        for councilman, committees in self.councilMembers() :

            
            if 'url' in councilman['Person Name'] :
                councilman_url = councilman['Person Name']['url']

                if councilman_url in people_d :
                    people_d[councilman_url][0].append(councilman) 
                else :
                    people_d[councilman_url] = [councilman], committees

        for person_entries, committees in people_d.values() :

            councilman = person_entries[-1]
            
            p = Person(councilman['Person Name']['label'])
            
            if p.name == 'Letitia James' :
                p.name = 'Letitia Ms. James'
                p.add_name('Letitia James')

            spans = [(self.toTime(entry['Start Date']).date(), 
                      self.toTime(entry['End Date']).date(),
                      entry['District'])
                     for entry in person_entries]

            merged_spans = []
            last_end_date = None
            last_district = None
            for start_date, end_date, district in sorted(spans) :
                if last_end_date is None :
                    span = [start_date, end_date, district]
                elif (start_date - last_end_date) == datetime.timedelta(1) and district == last_district :
                    span[1] = end_date
                else :
                    merged_spans.append(span)
                    span = [start_date, end_date, district]

                last_end_date = end_date
                last_district = district

            merged_spans.append(span)

            for start_date, end_date, district in merged_spans :
                district = councilman['District'].replace(' 0', ' ')
                if end_date == datetime.date(2017, 12, 31) :
                    end_date = ''
                else :
                    end_date = end_date.isoformat()
                print(start_date, end_date)
                p.add_term('Council Member', 'legislature', 
                           district=district, 
                           start_date=start_date.isoformat(),
                           end_date=end_date)

            party = councilman['Political Party']
            if party == 'Democrat' :
                party = 'Democratic'
            
            if party :
                p.add_party(party)

            if councilman['Photo'] :
                p.image = councilman['Photo']

            if councilman["E-mail"]:
                p.add_contact_detail(type="email",
                                     value=councilman['E-mail']['url'],
                                     note='E-mail')

            if councilman['Web site']:
                p.add_link(councilman['Web site']['url'], note='web site')

            p.extras = {'Notes' : councilman['Notes']}
                 
            p.add_source(councilman['Person Name']['url'], note='web')

            for committee, _, _ in committees:
                committee_name = committee['Department Name']['label']
                if committee_name not in noncommittees and 'committee' in committee_name.lower():
                    o = committee_d.get(committee_name, None)
                    if o is None:
                        parent_id = PARENT_ORGS.get(committee_name,
                                                    'New York City Council')
                        o = Organization(committee_name,
                                         classification='committee',
                                         parent_id={'name' : parent_id})
                        o.add_source(committee['Department Name']['url'])
                        committee_d[committee_name] = o

                    membership = o.add_member(p, role=committee["Title"])
                    membership.start_date = self.mdY2Ymd(committee["Start Date"])
            yield p
            
#.........这里部分代码省略.........
开发者ID:Code-for-Miami,项目名称:scrapers-us-municipal,代码行数:103,代码来源:people.py

示例6: scrape_session

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import extras [as 别名]
    def scrape_session(self, session, chambers):
        sid = SESSION_SITE_IDS[session]
        members = backoff(
            self.sservice.GetMembersBySession,
            sid
        )['MemberListing']

        seen_guids = []
        for member in members:
            guid = member['Id']
            member_info = backoff(self.sservice.GetMember, guid)

            # If a member switches chambers during the session, they may
            # appear twice. Skip the duplicate record accordingly.
            if guid in seen_guids:
                self.warning('Skipping duplicate record of {}'.format(member_info['Name']['Last']))
                continue
            else:
                seen_guids.append(guid)

            # Check to see if the member has vacated; skip if so.
            # A member can have multiple services for a given session,
            # if they switched chambers. Filter these down to just the
            # active service.
            try:
                (legislative_service, ) = [
                    service for service
                    in member_info['SessionsInService']['LegislativeService']
                    if service['Session']['Id'] == sid and service['DateVacated'] is None
                ]
            except ValueError:
                self.info('Skipping retired member {}'.format(member_info['Name']['Last']))
                continue

            nick_name, first_name, middle_name, last_name = (
                member_info['Name'][x] for x in [
                    'Nickname', 'First', 'Middle', 'Last'
                ]
            )

            first_name = nick_name if nick_name else first_name

            if middle_name:
                full_name = "%s %s %s" % (first_name, middle_name, last_name)
            else:
                full_name = "%s %s" % (first_name, last_name)

            party = legislative_service['Party']

            if party == 'Democrat':
                party = 'Democratic'

            elif party.strip() == '':
                party = 'other'

            chamber, district = (
                legislative_service['District'][x] for x in [
                    'Type', 'Number'
                ]
            )

            chamber = {
                "House": 'lower',
                "Senate": 'upper'
            }[chamber]

            url, photo = self.scrape_homepage(HOMEPAGE_URLS[chamber],
                                              {"code": guid, "sid": sid})

            legislator = Person(
                name=full_name,
                district=str(district),
                party=party,
                primary_org=chamber,
                image=photo,
            )
            legislator.extras = {
                'family_name': last_name,
                'given_name': first_name,
                'guid': guid,
            }

            if member_info['Address']['Street'] is not None and \
                    member_info['Address']['Street'].strip():
                capitol_address_info = {
                    k: v.strip() for k, v
                    in dict(member_info['Address']).items()
                    if k in ['Street', 'City', 'State', 'Zip']
                }
                capitol_address = '{Street}\n{City}, {State} {Zip}'.format(**capitol_address_info)
                legislator.add_contact_detail(
                    type='address', value=capitol_address, note='Capitol Address')
            else:
                self.warning('Could not find full capitol address for {}'.format(full_name))

            capitol_contact_info = self.clean_list([
                member_info['Address'][x] for x in [
                    'Email', 'Phone', 'Fax'
                ]
            ])
#.........这里部分代码省略.........
开发者ID:sunlightlabs,项目名称:openstates,代码行数:103,代码来源:people.py

示例7: transform_parse

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import extras [as 别名]
    def transform_parse(self, parsed_form, response):

        _source = {
            "url": response.url,
            "note": "LDA Form LD-1"
        }

        # basic disclosure fields
        _disclosure = Disclosure(
            effective_date=datetime.strptime(
                parsed_form['datetimes']['effective_date'],
                '%Y-%m-%d %H:%M:%S').replace(tzinfo=UTC),
            timezone='America/New_York',
            submitted_date=datetime.strptime(
                parsed_form['datetimes']['signature_date'],
                '%Y-%m-%d %H:%M:%S').replace(tzinfo=UTC),
            classification="lobbying"
        )

        _disclosure.add_authority(name=self.authority.name,
                                  type=self.authority._type,
                                  id=self.authority._id)

        _disclosure.add_identifier(
            identifier=parsed_form['_meta']['document_id'],
            scheme="urn:sopr:filing"
        )

        # disclosure extras
        _disclosure.extras = {}
        _disclosure.extras['registrant'] = {
            'self_employed_individual': parsed_form['registrant']['self_employed_individual'],
            'general_description': parsed_form['registrant']['registrant_general_description'],
            'signature': {
                "signature_date": parsed_form['datetimes']['signature_date'],
                "signature": parsed_form['signature']
            }
        }

        _disclosure.extras['client'] = {
            'same_as_registrant':
                parsed_form['client']['client_self'],
            'general_description':
                parsed_form['client']['client_general_description']
        }

        _disclosure.extras['registration_type'] = {
            'is_amendment':
                parsed_form['registration_type']['is_amendment'],
            'new_registrant':
                parsed_form['registration_type']['new_registrant'],
            'new_client_for_existing_registrant':
                parsed_form['registration_type'][
                    'new_client_for_existing_registrant'],
        }

        # # Registrant
        # build registrant
        _registrant_self_employment = None

        if parsed_form['registrant']['self_employed_individual']:
            n = ' '.join([p for p in [
                parsed_form['registrant']['registrant_individual_prefix'],
                parsed_form['registrant']['registrant_individual_firstname'],
                parsed_form['registrant']['registrant_individual_lastname']
            ] if len(p) > 0]).strip()

            _registrant = Person(
                name=n,
                source_identified=True
            )

            _registrant_self_employment = Organization(
                name='SELF-EMPLOYMENT of {n}'.format(n=n),
                classification='company',
                source_identified=True
            )

            _registrant.add_membership(
                organization=_registrant_self_employment,
                role='self_employed',
                label='self-employment of {n}'.format(n=n),
                start_date=_disclosure.effective_date.strftime('%Y-%m-%d')
            )
        else:
            _registrant = Organization(
                name=parsed_form['registrant']['registrant_org_name'],
                classification='company',
                source_identified=True
            )

        if len(parsed_form['registrant']['registrant_house_id']) > 0:
            _registrant.add_identifier(
                identifier=parsed_form['registrant']['registrant_house_id'],
                scheme='urn:house_clerk:registrant'
            )

        if len(parsed_form['registrant']['registrant_senate_id']) > 0:
            _registrant.add_identifier(
                identifier=parsed_form['registrant']['registrant_senate_id'],
#.........这里部分代码省略.........
开发者ID:influence-usa,项目名称:scrapers-us-federal,代码行数:103,代码来源:disclosures.py

示例8: scrape_session

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import extras [as 别名]
    def scrape_session(self, session, chambers):
        sid = SESSION_SITE_IDS[session]
        members = backoff(
            self.sservice.GetMembersBySession,
            sid
        )['MemberListing']

        for member in members:
            guid = member['Id']
            member_info = backoff(self.sservice.GetMember, guid)

            # Check to see if the member has vacated; skip if so:
            try:
                legislative_service = next(
                    service for service
                    in member_info['SessionsInService']['LegislativeService']
                    if service['Session']['Id'] == sid
                )
            except IndexError:
                raise Exception("Something very bad is going on with the "
                                "Legislative service")

            if legislative_service['DateVacated']:
                continue

            nick_name, first_name, middle_name, last_name = (
                member_info['Name'][x] for x in [
                    'Nickname', 'First', 'Middle', 'Last'
                ]
            )

            first_name = nick_name if nick_name else first_name

            if middle_name:
                full_name = "%s %s %s" % (first_name, middle_name, last_name)
            else:
                full_name = "%s %s" % (first_name, last_name)

            party = legislative_service['Party']

            if party == 'Democrat':
                party = 'Democratic'

            elif party.strip() == '':
                party = 'other'

            chamber, district = (
                legislative_service['District'][x] for x in [
                    'Type', 'Number'
                ]
            )

            chamber = {
                "House": 'lower',
                "Senate": 'upper'
            }[chamber]

            url, photo = self.scrape_homepage(HOMEPAGE_URLS[chamber],
                                              {"code": guid, "sid": sid})

            legislator = Person(
                name=full_name,
                district=str(district),
                party=party,
                primary_org=chamber,
                image=photo,
            )
            legislator.extras = {
                'last_name': last_name,
                'first_name': first_name,
                'guid': guid,
            }

            capitol_address = self.clean_list([
                member_info['Address'][x] for x in [
                    'Street', 'City', 'State', 'Zip'
                ]
            ])

            capitol_address = " ".join(
                addr_component for addr_component
                in capitol_address if addr_component
            ).strip()

            capitol_contact_info = self.clean_list([
                member_info['Address'][x] for x in [
                    'Email', 'Phone', 'Fax'
                ]
            ])

            # Sometimes email is set to a long cryptic string.
            # If it doesn't have a @ character, simply set it to None
            # examples:
            # 01X5dvct3G1lV6RQ7I9o926Q==&c=xT8jBs5X4S7ZX2TOajTx2W7CBprTaVlpcvUvHEv78GI=
            # 01X5dvct3G1lV6RQ7I9o926Q==&c=eSH9vpfdy3XJ989Gpw4MOdUa3n55NTA8ev58RPJuzA8=

            if capitol_contact_info[0] and '@' not in capitol_contact_info[0]:
                capitol_contact_info[0] = None

            # if we have more than 2 chars (eg state)
#.........这里部分代码省略.........
开发者ID:neelneelpurk,项目名称:openstates,代码行数:103,代码来源:people.py


注:本文中的pupa.scrape.Person.extras方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。