当前位置: 首页>>代码示例>>Python>>正文


Python Person.image方法代码示例

本文整理汇总了Python中pupa.scrape.Person.image方法的典型用法代码示例。如果您正苦于以下问题:Python Person.image方法的具体用法?Python Person.image怎么用?Python Person.image使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pupa.scrape.Person的用法示例。


在下文中一共展示了Person.image方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_same_name_people

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import image [as 别名]
def test_same_name_people():
    # ensure two people with the same name don't import without birthdays
    o = Organization.objects.create(name='WWE', jurisdiction_id='jurisdiction-id')
    p1 = ScrapePerson('Dwayne Johnson', image='http://example.com/1')
    p2 = ScrapePerson('Dwayne Johnson', image='http://example.com/2')

    # the people have the same name but are apparently different
    with pytest.raises(SameNameError):
        PersonImporter('jurisdiction-id').import_data([p1.as_dict(), p2.as_dict()])

    # when we give them birth dates all is well though
    p1.birth_date = '1970'
    p2.birth_date = '1930'
    resp = PersonImporter('jurisdiction-id').import_data([p1.as_dict(), p2.as_dict()])
    assert resp['person']['insert'] == 2
    assert resp['person']['noop'] == 0
    assert resp['person']['update'] == 0
    assert Person.objects.count() == 2

    # fake some memberships so future lookups work on these people
    for p in Person.objects.all():
        Membership.objects.create(person=p, organization=o)

    # and now test that an update works and we can insert a new one with the same name
    p1.image = 'http://example.com/1.jpg'
    p2.birth_date = '1931'  # change birth_date, means a new insert
    resp = PersonImporter('jurisdiction-id').import_data([p1.as_dict(), p2.as_dict()])
    assert Person.objects.count() == 3
    assert resp['person']['insert'] == 1
    assert resp['person']['noop'] == 0
    assert resp['person']['update'] == 1
开发者ID:johnfelipe,项目名称:pupa,代码行数:33,代码来源:test_people_importer.py

示例2: scrape_alderman

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import image [as 别名]
	def scrape_alderman(self, ward_num):
		ward_url = "{}/ward-{}".format(Utils.ALDERMEN_HOME, ward_num)
		alderman_url = self.alderman_url(ward_url)
		alderman_page = self.lxmlize(alderman_url)

		# person's name is the only <h1> tag on the page
		name = alderman_page.xpath("//h1/text()")[0]

		# initialize person object with appropriate data so that pupa can 
		# automatically create a membership object linking this person to
		# a post in the jurisdiction's "Board of Aldermen" organization
		district = "Ward {} Alderman".format(ward_num)
		person = Person(name=name, district=district, role="Alderman", 
										primary_org="legislature")

		# set additional fields
		person.image = alderman_page.xpath("//div/img/@src")[0]
		phone_number = alderman_page.xpath("//strong[text()='Phone:']/../text()")[1].strip()
		person.add_contact_detail(type="voice", value=phone_number)

		# add sources
		person.add_source(alderman_url, note="profile")
		person.add_source(ward_url, note="ward")

		return person
开发者ID:rchrist,项目名称:scrapers-us-municipal,代码行数:27,代码来源:people.py

示例3: test_same_name_people

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import image [as 别名]
def test_same_name_people():
    o = Organization.objects.create(name='WWE', jurisdiction_id='jurisdiction-id')

    # importing two people with the same name to a pristine database should error
    p1 = ScrapePerson('Dwayne Johnson', image='http://example.com/1')
    p2 = ScrapePerson('Dwayne Johnson', image='http://example.com/2')
    with pytest.raises(SameNameError):
        PersonImporter('jurisdiction-id').import_data([p1.as_dict(), p2.as_dict()])

    # importing one person should pass
    PersonImporter('jurisdiction-id').import_data([p1.as_dict()])
    # create fake memberships so that future lookups work on the imported people
    for p in Person.objects.all():
        Membership.objects.create(person=p, organization=o)

    # importing another person with the same name should fail
    with pytest.raises(SameNameError):
        PersonImporter('jurisdiction-id').import_data([p1.as_dict(), p2.as_dict()])

    # adding birth dates should pass
    p1.birth_date = '1970'
    p2.birth_date = '1930'
    resp = PersonImporter('jurisdiction-id').import_data([p1.as_dict(), p2.as_dict()])
    assert resp['person']['insert'] == 1
    assert resp['person']['noop'] == 0
    assert resp['person']['update'] == 1
    assert Person.objects.count() == 2
    # create fake memberships so that future lookups work on the imported people
    for p in Person.objects.all():
        Membership.objects.create(person=p, organization=o)

    # adding a third person with the same name but without a birthday should error
    p3 = ScrapePerson('Dwayne Johnson', image='http://example.com/3')

    with pytest.raises(SameNameError):
        PersonImporter('jurisdiction-id').import_data([p3.as_dict()])

    # and now test that an update works and we can insert a new one with the same name
    p1.image = 'http://example.com/1.jpg'
    p2.birth_date = '1931'  # change birth_date, means a new insert
    resp = PersonImporter('jurisdiction-id').import_data([p1.as_dict(), p2.as_dict()])
    assert Person.objects.count() == 3
    assert resp['person']['insert'] == 1
    assert resp['person']['noop'] == 0
    assert resp['person']['update'] == 1
开发者ID:anukat2015,项目名称:pupa,代码行数:47,代码来源:test_people_importer.py

示例4: scrape

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import image [as 别名]
    def scrape(self):
        committee_d = {}
        non_committees = {'City Council', 'Office of the Mayor',
                          'Office of the City Clerk'}

        for councilman, committees in self.councilMembers() :
            if councilman['Ward/Office'] == "":
                continue

            ward = councilman['Ward/Office']
            if ward not in {"Mayor", "Clerk"} :

                ward = "Ward {}".format(int(ward))
                role = "Alderman"
                p = Person(councilman['Person Name']['label'],
                           district=ward,
                           primary_org="legislature",
                           role=role)
                

            if councilman['Photo'] :
                p.image = councilman['Photo']

            contact_types = {
                "City Hall Office": ("address", "City Hall Office"),
                "City Hall Phone": ("voice", "City Hall Phone"),
                "Ward Office Phone": ("voice", "Ward Office Phone"),
                "Ward Office Address": ("address", "Ward Office Address"),
                "Fax": ("fax", "Fax")
            }

            for contact_type, (type_, _note) in contact_types.items():
                if councilman[contact_type]:
                    p.add_contact_detail(type=type_,
                                         value= councilman[contact_type],
                                         note=_note)

            if councilman["E-mail"]:
                p.add_contact_detail(type="email",
                                     value=councilman['E-mail']['label'],
                                     note='E-mail')


            if councilman['Website']:
                p.add_link(councilman['Website']['url'])
            p.add_source(councilman['Person Name']['url'], note='web')

            for committee, _, _ in committees:
                committee_name = committee['Legislative Body']['label']
                if committee_name and committee_name not in non_committees:
                    o = committee_d.get(committee_name, None)
                    if o is None:
                        o = Organization(committee_name,
                                         classification='committee',
                                         parent_id={'name' : 'Chicago City Council'})
                        o.add_source(committee['Legislative Body']['url'], 
                                     note='web')
                        committee_d[committee_name] = o

                    o.add_member(p, role=committee["Title"])

            yield p

        for name, term in FORMER_ALDERMEN.items() :
            p =  Person(name=name,
                        primary_org="legislature",
                        start_date=term['term'][0],
                        end_date=term['term'][1],
                        district="Ward {}".format(term['ward']),
                        role='Alderman')
            if name == 'Chandler, Michael D.' :
                p.add_term('Alderman',
                           "legislature",
                           district="Ward {}".format(term['ward']),
                           start_date=datetime.date(2011, 5, 16),
                           end_date=datetime.date(2015, 5, 18))

            p.add_source(term['source'], note='web')
            yield p

        for o in committee_d.values() :
            yield o

        for committee_name in FORMER_COMMITTEES :
            o = Organization(committee_name, 
                             classification='committee',
                             parent_id={'name' : 'Chicago City Council'})
            o.add_source("https://chicago.legistar.com/Departments.aspx", 
                         note='web')
            yield o

        for joint_committee in JOINT_COMMITTEES :

            o = Organization(joint_committee, 
                             classification='committee',
                             parent_id={'name' : 'Chicago City Council'})
            o.add_source("https://chicago.legistar.com/Departments.aspx",
                         note='web')
            yield o
开发者ID:ErnieAtLYD,项目名称:scrapers-us-municipal,代码行数:101,代码来源:people.py

示例5: _scrape_upper_chamber

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import image [as 别名]
    def _scrape_upper_chamber(self):
        self.info('Scraping upper chamber for legislators.')

        chamber = 'upper'

        url = self._senators_url
        source_url = url
        page = self.get(url).text
        page = lxml.html.fromstring(page)
        table = page.xpath('//*[@id="content-2"]//table//tr')
        rowcount = 0
        for tr in table:
            rowcount += 1

            # the first two rows are headers, skip:
            if rowcount <= 2:
                continue

            tds = tr.xpath('td')
            full_name = tds[0].xpath('div/a')[0].text_content().strip()

            if full_name.startswith('Vacant'):
                continue

            party_and_district = tds[1].xpath('div')[0].text_content() \
                                       .strip().split('-')
            if party_and_district[0] == 'D':
                party = 'Democratic'
            elif party_and_district[0] == 'R':
                party = 'Republican'

            district = party_and_district[1]
            phone = tds[3].xpath('div')[0].text_content().strip()
            url = self._senator_details_url.format(int(district))

            details_page = self.get(url).text
            if 'currently vacant' in details_page:
                continue

            person = Person(
                name=full_name,
                primary_org=chamber,
                district=district,
                party=party,
            )

            person.add_source(source_url)
            person.add_source(url)
            person.add_link(url)

            page = lxml.html.fromstring(details_page)
            photo_url = page.xpath('//*[@id="content-2"]//img[contains(@src, "uploads")]/@src')[0]

            contact_info = [
                line.strip()
                for line
                in page.xpath('//div[@class="textwidget"]/p[1]')[0]
                       .text_content().split('\n')
                if 'Capitol Office:' not in line
            ]
            address = '\n'.join(contact_info[:2])
            email = next((line for line in iter(contact_info) if '@' in line),
                         None)
            phone_pattern = re.compile(r'\(\d{3}\) \d{3}-\d{4}')
            phone_numbers = [line for line in contact_info
                             if phone_pattern.search(line) is not None]

            phone = phone_pattern.search(phone_numbers[0]).group()
            fax = next(
                (phone_pattern.search(phone_number).group()
                 for phone_number in iter(phone_numbers)
                 if 'fax' in phone_number.lower()),
                None
            )

            person.add_contact_detail(type='address', value=address, note='Capitol Office')
            person.add_contact_detail(type='voice', value=phone, note='Capitol Office')
            if fax:
                person.add_contact_detail(type='fax', value=fax, note='Capitol Office')
            if email:
                person.add_contact_detail(type='email', value=email, note='Capitol Office')

            person.image = photo_url

            yield person
开发者ID:cliftonmcintosh,项目名称:openstates,代码行数:87,代码来源:people.py

示例6: _scrape_lower_chamber

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import image [as 别名]
    def _scrape_lower_chamber(self):
        self.info('Scraping lower chamber for legislators.')

        chamber = 'lower'

        roster_url = (self._reps_url)
        page = self.get(roster_url).text
        page = lxml.html.fromstring(page)
        # This is the ASP.net table container
        table_xpath = ('id("ContentPlaceHolder1_'
                       'gridMembers_DXMainTable")')
        table = page.xpath(table_xpath)[0]
        for tr in table.xpath('tr')[1:]:
            # If a given term hasn't occurred yet, then ignore it
            # Eg, in 2017, the 2018 term page will have a blank table
            if tr.attrib.get('class') == 'dxgvEmptyDataRow':
                self.warning('No House members found')
                return

            tds = tr.xpath('td')
            last_name = tds[0].text_content().strip()
            first_name = tds[1].text_content().strip()
            full_name = '{} {}'.format(first_name, last_name)
            district = str(int(tds[2].text_content().strip()))
            party = tds[3].text_content().strip()
            if party == 'Democrat':
                party = 'Democratic'

            if party.strip() == "":  # Workaround for now.
                party = "Other"

            phone = tds[4].text_content().strip()
            room = tds[5].text_content().strip()
            address = self._assumed_address_fmt.format(room if room else '')

            if last_name == 'Vacant':
                person = Person(
                    name=full_name,
                    primary_org=chamber,
                    district=district,
                    party=party,
                )
                person.extras = {
                    'first_name': first_name,
                    'last_name': last_name,
                }

                person.add_contact_detail(type='address', value=address, note='Capitol Office')
                if phone.strip():
                    person.add_contact_detail(type='voice', value=phone, note='Capitol Office')

                person.add_source(roster_url)

                self._save_vacant_legislator(person)
            else:
                party_override = {" Green": "Democratic",
                                  " Sisco": "Republican"}

                if party == "" and full_name in party_override:
                    party = party_override[full_name]

                details_url = self._rep_details_url.format(district)
                details_page = lxml.html.fromstring(self.get(details_url).text)

                person = Person(
                    name=full_name,
                    primary_org=chamber,
                    district=district,
                    party=party,
                )
                person.extras = {
                    'first_name': first_name,
                    'last_name': last_name,
                }
                person.add_source(roster_url)
                person.add_source(details_url)
                person.add_link(details_url)

                email = details_page.xpath(
                    '//*[@id="ContentPlaceHolder1_lblAddresses"]'
                    '/table/tr[4]/td/a/@href'
                )
                if len(email) > 0 and email[0].lower() != 'mailto:':
                    email = email[0].split(':')[1]
                else:
                    email = None

                person.add_contact_detail(type='address', value=address, note='Capitol Office')
                if phone:
                    person.add_contact_detail(type='voice', value=phone, note='Capitol Office')
                if email:
                    person.add_contact_detail(type='email', value=email, note='Capitol Office')

                picture = details_page.xpath(
                    '//*[@id="ContentPlaceHolder1_imgPhoto"]/@src')
                if len(picture) > 0:
                    person.image = picture[0]

                yield person
开发者ID:cliftonmcintosh,项目名称:openstates,代码行数:101,代码来源:people.py

示例7: _parse_person

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import image [as 别名]
    def _parse_person(self, row, chamber, seat_map):
        # Capture legislator vitals.
        first_name = row['FirstName']
        middle_name = row['MiddleName']
        last_name = row['LastName']
        full_name = '{} {} {}'.format(first_name, middle_name, last_name)
        full_name = re.sub(r'[\s]{2,}', ' ', full_name)

        if chamber == 'lower':
            district = '{} {}'.format(row['County'], int(row['District'])).strip()
        else:
            district = str(int(row['District'])).strip()

        party = self.party_map[row['party'].upper()]
        email = row['WorkEmail']

        print(district)
        person = Person(primary_org=chamber,
                        district=district,
                        name=full_name,
                        party=party)

        extras = {
            'first_name': first_name,
            'middle_name': middle_name,
            'last_name': last_name
        }

        person.extras = extras
        if email:
            person.add_contact_detail(type='email', value=email, note='District Office')

        # Capture legislator office contact information.
        district_address = '{}\n{}\n{}, {} {}'.format(row['Address'],
                                                      row['address2'],
                                                      row['city'], row['State'],
                                                      row['Zipcode']).strip()

        phone = row['Phone'].strip()
        if not phone:
            phone = None

        if district_address:
            person.add_contact_detail(type='address', value=district_address, note='Home Office')
        if phone:
            person.add_contact_detail(type='voice', value=phone, note='Home Office')

        # Retrieve legislator portrait.
        profile_url = None
        if chamber == 'upper':
            profile_url = self.senate_profile_url.format(row['District'])
        elif chamber == 'lower':
            try:
                seat_number = seat_map[row['seatno']]
                profile_url = self.house_profile_url.format(seat_number)
            except KeyError:
                pass

        if profile_url:
            person.image = self._get_photo(profile_url, chamber)
            person.add_source(profile_url)

        return person
开发者ID:neelneelpurk,项目名称:openstates,代码行数:65,代码来源:people.py

示例8: scrape

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import image [as 别名]
    def scrape(self):
        committee_d = {}
        non_committees = {'City Council', 'Office of the Mayor',
                          'Office of the City Clerk'}

        for councilman, committees in self.councilMembers() :
            if councilman['Ward/Office'] == "":
                continue

            ward = councilman['Ward/Office']
            if ward not in {"Mayor", "Clerk"} :

                ward = "Ward {}".format(int(ward))
                role = "Alderman"
                p = Person(councilman['Person Name']['label'],
                           district=ward,
                           primary_org="legislature",
                           role=role)
                

            if councilman['Photo'] :
                p.image = councilman['Photo']

            contact_types = {
                "City Hall Office": ("address", "City Hall Office"),
                "City Hall Phone": ("voice", "City Hall Phone"),
                "Ward Office Phone": ("voice", "Ward Office Phone"),
                "Ward Office Address": ("address", "Ward Office Address"),
                "Fax": ("fax", "Fax")
            }

            for contact_type, (type_, _note) in contact_types.items():
                if councilman[contact_type]:
                    p.add_contact_detail(type=type_,
                                         value= councilman[contact_type],
                                         note=_note)

            if councilman["E-mail"]:
                p.add_contact_detail(type="email",
                                     value=councilman['E-mail']['label'],
                                     note='E-mail')


            if councilman['Website']:
                p.add_link(councilman['Website']['url'])
            p.add_source(MEMBERLIST)

            for committee, _, _ in committees:
                committee_name = committee['Legislative Body']['label']
                if committee_name and committee_name not in non_committees:
                    o = committee_d.get(committee_name, None)
                    if o is None:
                        o = Organization(committee_name,
                                         classification='committee',
                                         parent_id={'name' : 'Chicago City Council'})
                        o.add_source("https://chicago.legistar.com/Departments.aspx")
                        committee_d[committee_name] = o

                    o.add_member(p, role=committee["Title"])

            yield p

        for o in committee_d.values() :
            yield o


        o = Organization('Council Office of Financial Analysis Oversight Committee', 
                         classification='committee',
                         parent_id={'name' : 'Chicago City Council'})

        o.add_source("https://chicago.legistar.com/Departments.aspx")
        
        yield o

        o = Organization('Committee on Parks and Recreation', 
                         classification='committee',
                         parent_id={'name' : 'Chicago City Council'})

        o.add_source("https://chicago.legistar.com/Departments.aspx")
        
        yield o
开发者ID:a2civictech,项目名称:scrapers-us-municipal,代码行数:83,代码来源:people.py

示例9: scrape

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import image [as 别名]
    def scrape(self):
        web_scraper = LegistarPersonScraper(requests_per_minute = self.requests_per_minute)
        web_scraper.MEMBERLIST = 'http://legistar.council.nyc.gov/DepartmentDetail.aspx?ID=6897&GUID=CDC6E691-8A8C-4F25-97CB-86F31EDAB081&Mode=MainBody'

        if self.cache_storage:
            web_scraper.cache_storage = self.cache_storage

        if self.requests_per_minute == 0:
            web_scraper.cache_write_only = False

        web_info = {}

        for member, _ in web_scraper.councilMembers():
            name = member['Person Name']['label'].strip()
            web_info[name] = member

        city_council, = [body for body in self.bodies()
                         if body['BodyName'] == 'City Council']

        terms = collections.defaultdict(list)

        public_advocates = {  # Match casing to Bill De Blasio as council member
            'The Public Advocate (Mr. de Blasio)': 'Bill De Blasio',
            'The Public Advocate (Ms. James)': 'Letitia James',
        }

        for office in self.body_offices(city_council):
            name = office['OfficeRecordFullName']
            name = public_advocates.get(name, name).strip()

            terms[name].append(office)

            # Add past members (and advocates public)
            if name not in web_info:
                web_info[name] = collections.defaultdict(lambda: None)

        # Check that we have everyone we expect, formatted consistently, in
        # both information arrays. For instance, this will fail if we forget to
        # strip trailing spaces from names on one side or the other (which has
        # the effect of omitting information, such as post, from the scrape).

        assert set(web_info.keys()) == set(terms.keys())

        members = {}

        for member, offices in terms.items():

            p = Person(member)

            web = web_info[member]

            for term in offices:
                role = term['OfficeRecordTitle']

                if role == 'Public Advocate':
                    role = 'Non-Voting Council Member'
                else:
                    role = 'Council Member'

                district = web.get('District', '').replace(' 0', ' ')

                p.add_term(role,
                           'legislature',
                           district=district,
                           start_date=self.toDate(term['OfficeRecordStartDate']),
                           end_date=self.toDate(term['OfficeRecordEndDate']))

                party = web.get('Political Party')

                if party == 'Democrat':
                    party = 'Democratic'

                if party:
                    p.add_party(party)

                if web.get('Photo'):
                    p.image = web['Photo']

                contact_types = {
                    "City Hall Office": ("address", "City Hall Office"),
                    "City Hall Phone": ("voice", "City Hall Phone"),
                    "Ward Office Phone": ("voice", "Ward Office Phone"),
                    "Ward Office Address": ("address", "Ward Office Address"),
                    "Fax": ("fax", "Fax")
                }

                for contact_type, (type_, _note) in contact_types.items():
                    if web.get(contact_type) and web(contact_type) != 'N/A':
                        p.add_contact_detail(type=type_,
                                             value= web[contact_type],
                                             note=_note)

                if web.get('E-mail'):
                    p.add_contact_detail(type="email",
                                         value=web['E-mail']['url'],
                                         note='E-mail')

                if web.get('Web site'):
                    p.add_link(web['Web site']['url'], note='web site')

#.........这里部分代码省略.........
开发者ID:datamade,项目名称:scrapers-us-municipal,代码行数:103,代码来源:people.py

示例10: scrape_current_legislators

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import image [as 别名]
    def scrape_current_legislators(self, repos):
        for repo in repos:
            CURRENT_LEGISLATORS = self.get_url(repo)

            people = self.yamlize(CURRENT_LEGISLATORS)
            parties = set()
            posts = {}
            person_cache = defaultdict(lambda: defaultdict(lambda: None))

            for person in people:
                name = person['name'].get('official_full')
                if name is None:
                    name = "{name[first]} {name[last]}".format(**person)

                if 'birthday' in person['bio']:
                    birth_date = person['bio']['birthday']

                who = person_cache[name][birth_date]
                has_term = False

                if who is None:
                    who = Person(name=name, birth_date=birth_date)
                    who.add_source(url=CURRENT_LEGISLATORS, note="unitedstates project on GitHub")

                for term in person.get('terms', []):
                    has_term = True
                    start_date = term['start']
                    end_date = term['end']
                    state = term['state']
                    type_ = term['type']
                    district = term.get('district', None)
                    party = term.get('party', None)

                    chamber = {'rep': self.house,
                               'sen': self.senate}[type_]

                    role = {'rep': 'Representative',
                            'sen': 'Senator'}[type_]

                    if type_ == "rep" and district is not None:
                        label = "%s for District %s in %s" % (role, district, state)

                        division_id = ("ocd-division/country:us/state:{state}".format(state=state.lower()))

                        if district != 0:
                            division_id += "/cd:{district}".format(district=district)

                    if type_ == "sen":
                        label = "Senator for %s" % state

                        division_id = ("ocd-division/country:us/state:{state}".format(state=state.lower()))

                    post = posts.get(division_id)
                    if post is None:
                        post = Post(organization_id=chamber._id,
                            division_id=division_id,
                            label=label, role=role)
                        posts[division_id] = post
                        yield post

                    membership = Membership(
                        post_id=post._id,
                        role=role,
                        label=label,
                        start_date=start_date,
                        end_date=end_date,
                        person_id=who._id,
                        organization_id=chamber._id)
                    yield membership

                    if party == "Democrat":
                        party = "Democratic"

                    if party:
                        membership = Membership(
                            role='member',
                            start_date=start_date,
                            end_date=end_date,
                            person_id=who._id,
                            organization_id=make_pseudo_id(
                                classification="party",
                                name=party))
                        yield membership

                for key, value in person.get('id', {}).items():
                    if isinstance(value, list):
                        for v in value:
                            who.add_identifier(str(v), scheme=key)
                    else:
                        who.add_identifier(str(value), scheme=key)
                        if key == 'bioguide':
                            who.image = self.get_image_url(str(value))

                if has_term:
                    yield who
开发者ID:crdunwel,项目名称:scrapers-us-federal,代码行数:97,代码来源:legislative.py

示例11: _scrape_representative

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import image [as 别名]
    def _scrape_representative(self, url, parties):
        # logger.info(f'Generating representative person object from {url}')
        """
        Returns a Person object representing a member of the lower
        legislative chamber.
        """
        # url = self.get(url).text.replace('<br>', '')
        member_page = self.lxmlize(url)

        photo_url = member_page.xpath('//img[@class="member-photo"]/@src')[0]
        if photo_url.endswith('/.jpg'):
            photo_url = None

        scraped_name, district_text = member_page.xpath(
            '//div[@class="member-info"]/h2')
        scraped_name = scraped_name.text_content().strip().replace('Rep. ', '')
        scraped_name = ' '.join(scraped_name.split())

        name = ' '.join(scraped_name.split(', ')[::-1])

        district_text = district_text.text_content().strip()
        district = str(self.district_re.search(district_text).group(1))

        # Vacant house "members" are named after their district numbers:
        if re.match(r'^District \d+$', scraped_name):
            return None

        party = parties[district]

        person = Person(name=name, district=district, party=party,
                        primary_org='lower')

        if photo_url is not None:
            person.image = photo_url

        person.add_link(url)
        person.add_source(url)

        def office_name(element):
            """Returns the office address type."""
            return element.xpath('preceding-sibling::h4[1]/text()')[0] \
                .rstrip(':')

        offices_text = [{
            'name': office_name(p_tag),
            'type': office_name(p_tag).replace(' Address', '').lower(),
            'details': p_tag.text_content()
        } for p_tag in member_page.xpath(
            '//h4/following-sibling::p[@class="double-space"]')]

        for office_text in offices_text:
            details = office_text['details'].strip()

            # A few member pages have blank office listings:
            if details == '':
                continue

            # At the time of writing, this case of multiple district
            # offices occurs exactly once, for the representative at
            # District 43:
            if details.count('Office') > 1:
                district_offices = [
                    district_office.strip()
                    for district_office
                    in re.findall(r'(\w+ Office.+?(?=\w+ Office|$))',
                                  details, flags=re.DOTALL)
                ]
                offices_text += [{
                    'name': re.match(r'\w+ Office', office).group(),
                    'type': 'district',
                    'details': re.search(
                        r'(?<=Office).+(?=\w+ Office|$)?', office,
                        re.DOTALL).group()
                } for office in district_offices]

            match = self.address_re.search(details)
            if match is not None:
                address = re.sub(
                    ' +$', '',
                    match.group().replace('\r', '').replace('\n\n', '\n'),
                    flags=re.MULTILINE
                )
            else:
                # No valid address found in the details.
                continue

            phone_number = extract_phone(details)
            fax_number = extract_fax(details)

            if address:
                person.add_contact_detail(type='address', value=address,
                                          note=office_text['name'])
            if phone_number:
                person.add_contact_detail(type='voice', value=phone_number,
                                          note=office_text['name'])
            if fax_number:
                person.add_contact_detail(type='fax', value=fax_number,
                                          note=office_text['name'])

        yield person
开发者ID:sunlightlabs,项目名称:openstates,代码行数:102,代码来源:people.py

示例12: _scrape_senator

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import image [as 别名]
    def _scrape_senator(self, url, parties):
        # logger.info(f'Generating senator person object from {url}')
        """
        Returns a Person object representing a member of the upper
        legislative chamber.
        """
        # Scrape legislator information from roster URL
        # Example: view-source:https://senate.texas.gov/member.php?d=1
        member_page = self.lxmlize(url)

        photo_url = member_page.xpath('//img[@id="memhead"]/@src')[0]
        scraped_name_district_text = member_page.xpath(
            '//div[@class="pgtitle"]/text()')[0]
        scraped_name, district_text = scraped_name_district_text.split(':')
        name = ' '.join(scraped_name.replace('Senator ', '').split()).strip()
        district = str(district_text.split()[1]).strip()
        # Vacant house "members" are named after their district numbers:
        if re.match(r'^District \d+$', name):
            return None
        bio = ' '.join(member_page.xpath('//div[@class="bio"]/text()'))
        party = parties[district]

        person = Person(name=name,
                        district=district,
                        party=party,
                        primary_org='upper',
                        biography=bio)

        if photo_url is not None:
            person.image = photo_url
        person.add_link(url)
        person.add_source(url)

        office_ids = []
        # Get offices based on table headers
        for th_tag in member_page.xpath('//table[@class="memdir"]/tr/th'):
            # logger.warn([th_tag.xpath('text()'),th_tag.xpath('@id')])
            id = th_tag.xpath('@id')[0] if th_tag.xpath('@id') else ''
            label = th_tag.xpath('text()')[0].strip() if th_tag.xpath('text()') else ''
            if id != '' and label != '':
                office_ids.append({'id': id, 'label': label})

        # logger.warn(office_ids)
        for office in office_ids:
            # logger.warn(office)
            row = member_page.xpath(
                f'//table[@class="memdir"]/tr/td[@headers="{office["id"]}"]')
            # A few member pages have broken ids for office listings:
            if len(row) == 0:
                row = member_page.xpath(
                    f'//table[@class="memdir"]/tr/td[@headers="dDA1"]')
            if len(row) > 0:
                details = " ".join(row[0].xpath('text()')).strip()
                details = details.replace('\r', '').replace('\n', '')
            # logger.warn(details)
            # A few member pages have blank office listings:
            if details == '':
                continue

            match = self.address_re.search(details)
            if match is not None:
                address = re.sub(
                    ' +$', '',
                    match.group().replace('\r', '').replace('\n', ''),
                    flags=re.MULTILINE
                )
            else:
                # No valid address found in the details.
                continue

            phone_number = extract_phone(details)
            fax_number = extract_fax(details)

            if address:
                person.add_contact_detail(type='address', value=address,
                                          note=office['label'])
            if phone_number:
                person.add_contact_detail(type='voice', value=phone_number,
                                          note=office['label'])
            if fax_number:
                person.add_contact_detail(type='fax', value=fax_number,
                                          note=office['label'])

        yield person
开发者ID:sunlightlabs,项目名称:openstates,代码行数:86,代码来源:people.py

示例13: scrape

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import image [as 别名]
    def scrape(self):
        noncommittees = {'Committee of the Whole'}
        committee_d = {}

        people_d = {}

        for councilman, committees in self.councilMembers() :

            
            if 'url' in councilman['Person Name'] :
                councilman_url = councilman['Person Name']['url']

                if councilman_url in people_d :
                    people_d[councilman_url][0].append(councilman) 
                else :
                    people_d[councilman_url] = [councilman], committees

        for person_entries, committees in people_d.values() :

            councilman = person_entries[-1]
            
            p = Person(councilman['Person Name']['label'])
            
            if p.name == 'Letitia James' :
                p.name = 'Letitia Ms. James'
                p.add_name('Letitia James')

            spans = [(self.toTime(entry['Start Date']).date(), 
                      self.toTime(entry['End Date']).date(),
                      entry['District'])
                     for entry in person_entries]

            merged_spans = []
            last_end_date = None
            last_district = None
            for start_date, end_date, district in sorted(spans) :
                if last_end_date is None :
                    span = [start_date, end_date, district]
                elif (start_date - last_end_date) == datetime.timedelta(1) and district == last_district :
                    span[1] = end_date
                else :
                    merged_spans.append(span)
                    span = [start_date, end_date, district]

                last_end_date = end_date
                last_district = district

            merged_spans.append(span)

            for start_date, end_date, district in merged_spans :
                district = councilman['District'].replace(' 0', ' ')
                if end_date == datetime.date(2017, 12, 31) :
                    end_date = ''
                else :
                    end_date = end_date.isoformat()
                print(start_date, end_date)
                p.add_term('Council Member', 'legislature', 
                           district=district, 
                           start_date=start_date.isoformat(),
                           end_date=end_date)

            party = councilman['Political Party']
            if party == 'Democrat' :
                party = 'Democratic'
            
            if party :
                p.add_party(party)

            if councilman['Photo'] :
                p.image = councilman['Photo']

            if councilman["E-mail"]:
                p.add_contact_detail(type="email",
                                     value=councilman['E-mail']['url'],
                                     note='E-mail')

            if councilman['Web site']:
                p.add_link(councilman['Web site']['url'], note='web site')

            p.extras = {'Notes' : councilman['Notes']}
                 
            p.add_source(councilman['Person Name']['url'], note='web')

            for committee, _, _ in committees:
                committee_name = committee['Department Name']['label']
                if committee_name not in noncommittees and 'committee' in committee_name.lower():
                    o = committee_d.get(committee_name, None)
                    if o is None:
                        parent_id = PARENT_ORGS.get(committee_name,
                                                    'New York City Council')
                        o = Organization(committee_name,
                                         classification='committee',
                                         parent_id={'name' : parent_id})
                        o.add_source(committee['Department Name']['url'])
                        committee_d[committee_name] = o

                    membership = o.add_member(p, role=committee["Title"])
                    membership.start_date = self.mdY2Ymd(committee["Start Date"])
            yield p
            
#.........这里部分代码省略.........
开发者ID:Code-for-Miami,项目名称:scrapers-us-municipal,代码行数:103,代码来源:people.py

示例14: legislators

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import image [as 别名]
    def legislators(self, latest_only):
        legs = {}

        for member, chamber, term, url in self._memberships(latest_only):
            name, _, _, district, party = member.xpath('td')
            district = district.text
            detail_url = name.xpath('a/@href')[0]

            if party.text_content().strip() == "":
                self.warning("Garbage party: Skipping!")
                continue

            party = {'D': 'Democratic', 'R': 'Republican', 'I': 'Independent'}[party.text]
            name = name.text_content().strip()

            # inactive legislator, skip them for now
            if name.endswith('*'):
                name = name.strip('*')
                continue

            name = AKA.get(name, name)

            if name in legs:
                p, terms = legs[name]
                terms.append((chamber, district, term, party))
            else:
                p = Person(name, party=party)
                legs[name] = p, [(chamber, district, term, party)]

            p.add_source(url)
            p.add_source(detail_url)
            p.add_link(detail_url)

            birth_date = BIRTH_DATES.get(name, None)
            if birth_date:
                p.birth_date = birth_date

            leg_html = self.get(detail_url).text
            leg_doc = lxml.html.fromstring(leg_html)
            leg_doc.make_links_absolute(detail_url)

            hotgarbage = (
                'Senate Biography Information for the 98th General '
                'Assembly is not currently available.')

            if hotgarbage in leg_html:
                # The legislator's bio isn't available yet.
                self.logger.warning('No legislator bio available for ' + name)
                continue

            photo_url = leg_doc.xpath('//img[contains(@src, "/members/")]/@src')[0]
            p.image = photo_url

            p.contact_details = []
            # email
            email = leg_doc.xpath('//b[text()="Email: "]')
            if email:
                p.add_contact_detail(type='email', value=email[0].tail.strip(), note='capitol')

            offices = {'capitol': '//table[contains(string(), "Springfield Office")]',
                       'district': '//table[contains(string(), "District Office")]'}

            for location, xpath in offices.items():
                table = leg_doc.xpath(xpath)
                if table:
                    for type, value in self._table_to_office(table[3]):
                        if type in ('fax', 'voice') and not validate_phone_number(value):
                            continue

                        p.add_contact_detail(type=type, value=value, note=location)

        return legs
开发者ID:sunlightlabs,项目名称:openstates,代码行数:74,代码来源:people.py

示例15: scrape_chamber

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import image [as 别名]
    def scrape_chamber(self, chamber, session):
        url = 'https://docs.legis.wisconsin.gov/{}/legislators/{}'.format(
            session,
            {'upper': 'senate', 'lower': 'assembly'}[chamber],
        )

        body = self.get(url).text
        page = lxml.html.fromstring(body)
        page.make_links_absolute(url)

        for row in page.xpath(".//div[@class='box-content']/div[starts-with(@id,'district')]"):
            if row.xpath(".//a/@href") and not row.xpath(".//a[text()='Vacant']"):
                rep_url = row.xpath(".//a[text()='Details']/@href")[0].strip("https://")
                rep_url = "https://" + rep_url
                rep_doc = lxml.html.fromstring(self.get(rep_url).text)
                rep_doc.make_links_absolute(rep_url)

                full_name = rep_doc.xpath(
                    './/div[@id="district"]/h1/text()'
                )[0].replace("Senator ", "").replace("Representative ", "")

                party = rep_doc.xpath('.//div[@id="district"]//small/text()')
                if len(party) > 0:
                    party = PARTY_DICT[party[0].split("-")[0].strip("(").strip()]
                else:
                    party = None
                district = rep_doc.xpath('.//div[@id="district"]/h3/a/@href')[1]
                district = district.split("/")[-1]
                district = str(int(district))

                # email
                email = rep_doc.xpath("//span[@class='info email']/a/text()")
                if email:
                    email = email[0]
                else:
                    email = ''

                assert party is not None, "{} is missing party".format(full_name)

                person = Person(
                    name=full_name,
                    district=district,
                    primary_org=chamber,
                    party=party,
                )

                img = rep_doc.xpath('.//div[@id="district"]/img/@src')
                if img:
                    person.image = img[0]

                # office ####
                address_lines = rep_doc.xpath('.//span[@class="info office"]/text()')
                address = '\n'.join([line.strip() for line in address_lines if line.strip() != ""])
                person.add_contact_detail(type='address', value=address, note='Capitol Office')

                phone = rep_doc.xpath('.//span[@class="info telephone"]/text()')
                if phone:
                    phone = re.sub(r'\s+', ' ', phone[1]).strip()
                    person.add_contact_detail(type='voice', value=phone, note='Capitol Office')

                fax = rep_doc.xpath('.//span[@class="info fax"]/text()')
                if fax:
                    fax = re.sub(r'\s+', ' ', fax[1]).strip()
                    person.add_contact_detail(type='fax', value=fax, note='Capitol Office')

                if email:
                    person.add_contact_detail(type='email', value=email, note='Capitol Office')

                person.add_link(rep_url)
                person.add_source(rep_url)

                yield person
开发者ID:sunlightlabs,项目名称:openstates,代码行数:74,代码来源:people.py


注:本文中的pupa.scrape.Person.image方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。