当前位置: 首页>>代码示例>>Python>>正文


Python Person.extras['occupation']方法代码示例

本文整理汇总了Python中pupa.scrape.Person.extras['occupation']方法的典型用法代码示例。如果您正苦于以下问题:Python Person.extras['occupation']方法的具体用法?Python Person.extras['occupation']怎么用?Python Person.extras['occupation']使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pupa.scrape.Person的用法示例。


在下文中一共展示了Person.extras['occupation']方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: scrape_legislator

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import extras['occupation'] [as 别名]
    def scrape_legislator(self, name, chamber, url):
        page = self.get(url).text
        page = lxml.html.fromstring(page)
        page.make_links_absolute(url)

        party = page.xpath("string(//span[contains(@id, 'Party')])")
        party = party.strip()

        if party == 'Democrat':
            party = 'Democratic'

        district = page.xpath("string(//span[contains(@id, 'District')])")
        district = district.strip().lstrip('0')

        occupation = page.xpath(
            "string(//span[contains(@id, 'Occupation')])")
        occupation = occupation.strip()

        (photo_url, ) = page.xpath('//img[contains(@id, "_imgMember")]/@src')

        office_phone = page.xpath(
            "string(//span[contains(@id, 'CapitolPhone')])").strip()

        email = None

        email_link = page.xpath('//a[@id="lnkMail"]')

        legislator = Person(primary_org=chamber,
                            image=photo_url,
                            name=name,
                            party=party,
                            district=district
                            )
        legislator.extras['occupation'] = occupation
        if office_phone.strip() != "":
            legislator.add_contact_detail(type='voice', value=office_phone, note='Capitol Office')
        if email_link:
            email = email_link[0].attrib['href'].split(":")[1]
            legislator.add_contact_detail(type='email', value=email, note='Capitol Office')

        # SD is hiding their email addresses entirely in JS now, so
        # search through <script> blocks looking for them
        for script in page.xpath('//script'):
            if script.text:
                match = re.search(r'([\w.][email protected]\.gov)', script.text)
                if match:
                    legislator.add_contact_detail(type='email',
                                                  value=match.group(0),
                                                  note='Capitol Office')
                    break

        home_address = [
                x.strip() for x in
                page.xpath('//td/span[contains(@id, "HomeAddress")]/text()')
                if x.strip()
                ]
        if home_address:
            home_address = "\n".join(home_address)
            home_phone = page.xpath(
                "string(//span[contains(@id, 'HomePhone')])").strip()
            legislator.add_contact_detail(type='address',
                                          value=home_address,
                                          note='District Office')
            if home_phone:
                legislator.add_contact_detail(type='voice',
                                              value=home_phone,
                                              note='District Office')

        legislator.add_source(url)

        comm_url = page.xpath("//a[. = 'Committees']")[0].attrib['href']
        yield from self.scrape_committees(legislator, comm_url, chamber)
        yield legislator
开发者ID:cliftonmcintosh,项目名称:openstates,代码行数:75,代码来源:people.py

示例2: scrape_legislator

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import extras['occupation'] [as 别名]
    def scrape_legislator(self, name, chamber, url, contact_page):
        page = self.get(url).text
        page = lxml.html.fromstring(page)
        page.make_links_absolute(url)

        party = page.xpath("string(//span[contains(@id, 'Party')])")
        party = party.strip()

        if party == 'Democrat':
            party = 'Democratic'

        district = page.xpath("string(//span[contains(@id, 'District')])")
        district = district.strip().lstrip('0')

        occupation = page.xpath(
            "string(//span[contains(@id, 'Occupation')])")
        occupation = occupation.strip()

        (photo_url, ) = page.xpath('//img[contains(@id, "_imgMember")]/@src')

        office_phone = page.xpath(
            "string(//span[contains(@id, 'CapitolPhone')])").strip()

        legislator = Person(primary_org=chamber,
                            image=photo_url,
                            name=name,
                            party=party,
                            district=district
                            )
        legislator.extras['occupation'] = occupation
        if office_phone.strip() != "":
            legislator.add_contact_detail(
                type='voice', value=office_phone, note='Capitol Office')

        # SD removed email from the detail pages but it's still in the
        # contact page, shared for all congress people
        member_id = re.search(r'Member=(\d+)', url).group(1)

        # find the profile block by finding a link inside it to their
        # detail page
        profile_link = contact_page.xpath(
            '//ul[@id="contact-list"]//a[contains(@href, "Member=%s")]' % (member_id,))
        if profile_link:
            # look for the adjacent email mailto link
            profile_link = profile_link[0]
            profile_block = profile_link.getparent().getparent().getparent()
            email_link = profile_block.xpath(
                './span/span/a[@class="mail-break"]')
            if email_link:
                email = email_link[0].text
                email = email.lstrip()
                email = email.rstrip()
                if email:
                    legislator.add_contact_detail(type='email',
                                                  value=email,
                                                  note='Capitol Office')
        home_address = [
            x.strip() for x in
            page.xpath('//td/span[contains(@id, "HomeAddress")]/text()')
            if x.strip()
        ]
        if home_address:
            home_address = "\n".join(home_address)
            home_phone = page.xpath(
                "string(//span[contains(@id, 'HomePhone')])").strip()
            legislator.add_contact_detail(type='address',
                                          value=home_address,
                                          note='District Office')
            if home_phone:
                legislator.add_contact_detail(type='voice',
                                              value=home_phone,
                                              note='District Office')

        legislator.add_source(url)
        legislator.add_link(url)

        committees = page.xpath(
            '//div[@id="divCommittees"]/span/section/table/tbody/tr/td/a')
        for committee in committees:
            self.scrape_committee(legislator, url, committee, chamber)
        yield legislator
开发者ID:sunlightlabs,项目名称:openstates,代码行数:83,代码来源:people.py

示例3: scrape_member

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import extras['occupation'] [as 别名]
    def scrape_member(self, chamber, member_url):
        page = self.get(member_url).text
        root = lxml.html.fromstring(page)

        name_and_party = root.xpath(
            'string(//td[@class="SiteNames"])').split()

        title = name_and_party[0]
        # Account for Representative-Elect and Senator-Elect, for incoming class
        if title.startswith('Representative'):
            chamber = 'lower'
        elif title.startswith('Senator'):
            chamber = 'upper'

        full_name = ' '.join(name_and_party[1:-1])

        party = name_and_party[-1]

        if party == '(R)':
            party = 'Republican'
        elif party == '(D)':
            party = 'Democratic'
        elif party == '(G)':
            party = 'Green'
        elif party == '(I)':
            party = 'Independent'
        elif '-Elect' in title and not party.startswith('('):
            self.warning('Member-elect is currently missing a party')
            full_name = ' '.join(name_and_party[1:])
            party = ''
        else:
            raise AssertionError(
                "Unknown party ({0}) for {1}".format(party, full_name))

        try:
            img = root.xpath('//img[@class="SitePhotos"]')[0]
            photo_url = img.attrib['src']
        except IndexError:
            self.warning("No member photo found")
            photo_url = ""

        # Need to figure out a cleaner method for this later
        info_box = root.xpath('string(//table[@class="InfoTable"])')
        try:
            district = re.search(r'District(.+)\r', info_box).group(1)
        except AttributeError:
            self.warning('Member has no district listed; skipping them')
            return

        person = Person(name=full_name, district=district,
                        party=party, primary_org=chamber, image=photo_url)

        person.add_link(member_url)
        person.add_source(member_url)

        try:
            phone = re.search(r'Phone(.+)\r', info_box).group(1)
        except AttributeError:
            phone = None
        try:
            email = re.search(r'Email(.+)\r', info_box).group(1)
        except AttributeError:
            email = None
        address = root.xpath('//nobr/text()')[0].replace(u'\xa0', ' ')

        person.add_contact_detail(type='address', value=address, note='District Office')
        person.add_contact_detail(type='voice', value=phone, note='District Office')
        person.add_contact_detail(type='email', value=email, note='District Office')

        try:
            person.extras['occupation'] = re.search(
                r'Occupation(.+)\r', info_box).group(1)
        except AttributeError:
            pass

        yield person
开发者ID:neelneelpurk,项目名称:openstates,代码行数:78,代码来源:people.py

示例4: scrape_chamber

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import extras['occupation'] [as 别名]
    def scrape_chamber(self, chamber, session):
        chamber_abbrev = {'upper': 'S', 'lower': 'H'}[chamber]

        url = "https://wyoleg.gov/LsoService/api/legislator/2018/{}".format(
            chamber_abbrev)

        response = self.get(url)
        people_json = json.loads(response.content.decode('utf-8'))

        for row in people_json:

            # some fields are only available in the list json, some only in the details call
            details_url = 'https://wyoleg.gov/LsoService/api/legislator/{}'.format(
                row['legID'])
            details_response = self.get(details_url)
            details = json.loads(details_response.content.decode('utf-8'))

            party = self.party_map[row['party']]

            if details['dob'] is not None:
                dob = datetime.datetime.strptime(
                    details['dob'], '%m/%d/%Y %I:%M:%S %p')
                dob_str = datetime.datetime.strftime(dob, "%Y-%m-%d")
            else:
                dob_str = ''

            photo_url = 'http://wyoleg.gov/LegislatorSummary/Photos/{}'.format(
                details['legPhoto'])

            person = Person(
                name=row['name'],
                district=row['district'].lstrip('SH0'),
                party=party,
                primary_org=chamber,
                birth_date=dob_str,
                image=photo_url,
            )

            if details['address']:
                address = '{}, {} {} {}'.format(
                    details['address'],
                    details['city'],
                    details['state'],
                    details['zip']
                )
                person.add_contact_detail(type='address', value=address)

            if row['eMail']:
                person.add_contact_detail(type='email', value=row['eMail'])

            if row['phone']:
                person.add_contact_detail(type='voice', value=row['phone'])

            person.extras['wy_leg_id'] = row['legID']
            person.extras['county'] = row['county']
            person.extras['given_name'] = row['firstName']
            person.extras['family_name'] = row['lastName']
            person.extras['religion'] = details['religion']
            person.extras['number_children'] = details['noChildren']
            person.extras['spouse_given_name'] = details['spouseName']
            person.extras['place_of_birth'] = details['birthPlace']
            person.extras['occupation'] = details['occupationDesc']

            if details['legEducation']:
                person.extras['education'] = details['legEducation']

            if details['civicOrgs']:
                person.extras['civic_organizations'] = details['civicOrgs']

            # http://wyoleg.gov/Legislators/2018/S/2032
            leg_url = 'http://wyoleg.gov/Legislators/{}/{}/{}'.format(
                session,
                row['party'],
                row['legID'])

            person.add_source(leg_url)
            person.add_link(leg_url)

            yield person
开发者ID:sunlightlabs,项目名称:openstates,代码行数:81,代码来源:people.py


注:本文中的pupa.scrape.Person.extras['occupation']方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。