当前位置: 首页>>代码示例>>Python>>正文


Python Person.add_party方法代码示例

本文整理汇总了Python中pupa.scrape.Person.add_party方法的典型用法代码示例。如果您正苦于以下问题:Python Person.add_party方法的具体用法?Python Person.add_party怎么用?Python Person.add_party使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pupa.scrape.Person的用法示例。


在下文中一共展示了Person.add_party方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: scrape_chamber

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_party [as 别名]
    def scrape_chamber(self, chamber):
        body = {'lower': 'H', 'upper': 'S'}[chamber]
        url = 'http://www.azleg.gov/MemberRoster/?body=' + body
        page = self.get(url).text

        # there is a bad comment closing tag on this page
        page = page.replace('--!>', '-->')

        root = html.fromstring(page)

        path = '//table//tr'
        roster = root.xpath(path)[1:]
        for row in roster:
            position = ''
            name, district, party, email, room, phone, = row.xpath('td')

            if email.attrib.get('class') == 'vacantmember':
                continue  # Skip any vacant members.

            link = name.xpath('string(a/@href)')
            if len(name) == 1:
                name = name.text_content().strip()
            else:
                position = name.tail.strip()
                name = name[0].text_content().strip()
            if '--' in name:
                name = name.split('--')[0].strip()

            linkpage = self.get(link).text
            linkpage = linkpage.replace('--!>', '-->')
            linkroot = html.fromstring(linkpage)
            linkroot.make_links_absolute(link)

            photos = linkroot.xpath("//img[contains(@src, 'MemberPhoto')]")

            if len(photos) != 1:
                self.warning('no photo on ' + link)
                photo_url = ''
            else:
                photo_url = photos[0].attrib['src']

            district = district.text_content().strip()
            party = party.text_content().strip()
            email = email.text_content().strip()

            if email.startswith('Email: '):
                email = email.replace('Email: ', '').lower() + '@azleg.gov'
            else:
                email = ''

            party = self.get_party(party)
            room = room.text_content().strip()
            if chamber == 'lower':
                address = "House of Representatives\n"
            else:
                address = "Senate\n"
            address = address + "1700 West Washington\n Room " + room  \
                              + "\nPhoenix, AZ 85007"

            phone = phone.text_content().strip()
            if '602' not in re.findall(r'(\d+)', phone):
                phone = "602-" + phone

            leg = Person(primary_org=chamber, image=photo_url, name=name, district=district,
                         party=party)
            leg.add_contact_detail(type='address', value=address, note='Capitol Office')
            leg.add_contact_detail(type='voice', value=phone, note='Capitol Office')
            leg.add_party(party=party)
            leg.add_link(link)

            if email:
                leg.add_contact_detail(type='email', value=email)
            if position:
                leg.add_membership(name_or_org=party, role=position)
                # leg.add_role(position, term, chamber=chamber,
                #             district=district, party=party)

            leg.add_source(url)

            # Probably just get this from the committee scraper
            # self.scrape_member_page(link, session, chamber, leg)
            yield leg
开发者ID:sunlightlabs,项目名称:openstates,代码行数:84,代码来源:people.py

示例2: scrape

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_party [as 别名]
    def scrape(self):
        # chambers = [chamber] if chamber is not None else ['upper', 'lower']
        leg_url = "ftp://ftp.cga.ct.gov/pub/data/LegislatorDatabase.csv"
        page = self.get(leg_url)

        committees = {}

        # Ensure that the spreadsheet's structure hasn't generally changed
        _row_headers = page.text.split('\r\n')[0].replace('"', '').split(',')
        assert _row_headers == HEADERS, "Spreadsheet structure may have changed"

        page = open_csv(page)
        for row in page:

            chamber = {'H': 'lower', 'S': 'upper'}[row['office code']]

            district = row['dist'].lstrip('0')
            assert district.isdigit(), "Invalid district found: {}".format(district)

            name = row['first name']
            mid = row['middle initial'].strip()
            if mid:
                name += " %s" % mid
            name += " %s" % row['last name']
            suffix = row['suffix'].strip()
            if suffix:
                name += " %s" % suffix

            party = row['party']
            if party == 'Democrat':
                party = 'Democratic'

            leg = Person(primary_org=chamber,
                         name=name,
                         district=district,
                         party=party
                         )

            legislator_url = row['URL'].replace('\\', '//').strip()
            if legislator_url != '':
                if not legislator_url.startswith('http'):
                    legislator_url = 'http://'
                leg.add_link(legislator_url)

            leg.add_party(party=party)

            office_address = "%s\nRoom %s\nHartford, CT 06106" % (
                row['capitol street address'], row['room number'])
            # extra_office_fields = dict()
            email = row['email'].strip()
            if "@" not in email:
                if not email:
                    email = None
                elif email.startswith('http://') or email.startswith('https://'):
                    # extra_office_fields['contact_form'] = email
                    email = None
                else:
                    raise ValueError("Problematic email found: {}".format(email))
            leg.add_contact_detail(type='address', value=office_address, note='Capitol Office')
            leg.add_contact_detail(type='voice', value=row['capitol phone'], note='Capitol Office')
            if email:
                leg.add_contact_detail(type='email', value=email)

            home_address = "{}\n{}, {} {}".format(
                row['home street address'],
                row['home city'],
                row['home state'],
                row['home zip code'],
            )
            if "Legislative Office Building" not in home_address:
                leg.add_contact_detail(type='address', value=home_address, note='District Office')
                if row['home phone'].strip():
                    leg.add_contact_detail(type='voice',
                                           value=row['home phone'],
                                           note='District Office')
            leg.add_source(leg_url)

            for comm_name in row['committee member1'].split(';'):
                if ' (' in comm_name:
                    comm_name, role = comm_name.split(' (')
                    role = role.strip(')').lower()
                else:
                    role = 'member'
                comm_name = comm_name.strip()
                if comm_name:
                    if comm_name in committees:
                        com = committees[comm_name]
                    else:
                        com = Organization(comm_name, classification='committee', chamber=chamber)
                        com.add_source(leg_url)
                        committees[comm_name] = com
                        yield com

                    leg.add_membership(name_or_org=com, role=role)

            yield leg
开发者ID:sunlightlabs,项目名称:openstates,代码行数:98,代码来源:people.py

示例3: scrape

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_party [as 别名]
    def scrape(self):
        noncommittees = {'Committee of the Whole'}
        committee_d = {}

        people_d = {}

        for councilman, committees in self.councilMembers() :

            
            if 'url' in councilman['Person Name'] :
                councilman_url = councilman['Person Name']['url']

                if councilman_url in people_d :
                    people_d[councilman_url][0].append(councilman) 
                else :
                    people_d[councilman_url] = [councilman], committees

        for person_entries, committees in people_d.values() :

            councilman = person_entries[-1]
            
            p = Person(councilman['Person Name']['label'])
            
            if p.name == 'Letitia James' :
                p.name = 'Letitia Ms. James'
                p.add_name('Letitia James')

            spans = [(self.toTime(entry['Start Date']).date(), 
                      self.toTime(entry['End Date']).date(),
                      entry['District'])
                     for entry in person_entries]

            merged_spans = []
            last_end_date = None
            last_district = None
            for start_date, end_date, district in sorted(spans) :
                if last_end_date is None :
                    span = [start_date, end_date, district]
                elif (start_date - last_end_date) == datetime.timedelta(1) and district == last_district :
                    span[1] = end_date
                else :
                    merged_spans.append(span)
                    span = [start_date, end_date, district]

                last_end_date = end_date
                last_district = district

            merged_spans.append(span)

            for start_date, end_date, district in merged_spans :
                district = councilman['District'].replace(' 0', ' ')
                if end_date == datetime.date(2017, 12, 31) :
                    end_date = ''
                else :
                    end_date = end_date.isoformat()
                print(start_date, end_date)
                p.add_term('Council Member', 'legislature', 
                           district=district, 
                           start_date=start_date.isoformat(),
                           end_date=end_date)

            party = councilman['Political Party']
            if party == 'Democrat' :
                party = 'Democratic'
            
            if party :
                p.add_party(party)

            if councilman['Photo'] :
                p.image = councilman['Photo']

            if councilman["E-mail"]:
                p.add_contact_detail(type="email",
                                     value=councilman['E-mail']['url'],
                                     note='E-mail')

            if councilman['Web site']:
                p.add_link(councilman['Web site']['url'], note='web site')

            p.extras = {'Notes' : councilman['Notes']}
                 
            p.add_source(councilman['Person Name']['url'], note='web')

            for committee, _, _ in committees:
                committee_name = committee['Department Name']['label']
                if committee_name not in noncommittees and 'committee' in committee_name.lower():
                    o = committee_d.get(committee_name, None)
                    if o is None:
                        parent_id = PARENT_ORGS.get(committee_name,
                                                    'New York City Council')
                        o = Organization(committee_name,
                                         classification='committee',
                                         parent_id={'name' : parent_id})
                        o.add_source(committee['Department Name']['url'])
                        committee_d[committee_name] = o

                    membership = o.add_member(p, role=committee["Title"])
                    membership.start_date = self.mdY2Ymd(committee["Start Date"])
            yield p
            
#.........这里部分代码省略.........
开发者ID:Code-for-Miami,项目名称:scrapers-us-municipal,代码行数:103,代码来源:people.py

示例4: scrape

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_party [as 别名]
    def scrape(self):
        web_scraper = LegistarPersonScraper(requests_per_minute = self.requests_per_minute)
        web_scraper.MEMBERLIST = 'http://legistar.council.nyc.gov/DepartmentDetail.aspx?ID=6897&GUID=CDC6E691-8A8C-4F25-97CB-86F31EDAB081&Mode=MainBody'

        if self.cache_storage:
            web_scraper.cache_storage = self.cache_storage

        if self.requests_per_minute == 0:
            web_scraper.cache_write_only = False

        web_info = {}

        for member, _ in web_scraper.councilMembers():
            name = member['Person Name']['label'].strip()
            web_info[name] = member

        city_council, = [body for body in self.bodies()
                         if body['BodyName'] == 'City Council']

        terms = collections.defaultdict(list)

        public_advocates = {  # Match casing to Bill De Blasio as council member
            'The Public Advocate (Mr. de Blasio)': 'Bill De Blasio',
            'The Public Advocate (Ms. James)': 'Letitia James',
        }

        for office in self.body_offices(city_council):
            name = office['OfficeRecordFullName']
            name = public_advocates.get(name, name).strip()

            terms[name].append(office)

            # Add past members (and advocates public)
            if name not in web_info:
                web_info[name] = collections.defaultdict(lambda: None)

        # Check that we have everyone we expect, formatted consistently, in
        # both information arrays. For instance, this will fail if we forget to
        # strip trailing spaces from names on one side or the other (which has
        # the effect of omitting information, such as post, from the scrape).

        assert set(web_info.keys()) == set(terms.keys())

        members = {}

        for member, offices in terms.items():

            p = Person(member)

            web = web_info[member]

            for term in offices:
                role = term['OfficeRecordTitle']

                if role == 'Public Advocate':
                    role = 'Non-Voting Council Member'
                else:
                    role = 'Council Member'

                district = web.get('District', '').replace(' 0', ' ')

                p.add_term(role,
                           'legislature',
                           district=district,
                           start_date=self.toDate(term['OfficeRecordStartDate']),
                           end_date=self.toDate(term['OfficeRecordEndDate']))

                party = web.get('Political Party')

                if party == 'Democrat':
                    party = 'Democratic'

                if party:
                    p.add_party(party)

                if web.get('Photo'):
                    p.image = web['Photo']

                contact_types = {
                    "City Hall Office": ("address", "City Hall Office"),
                    "City Hall Phone": ("voice", "City Hall Phone"),
                    "Ward Office Phone": ("voice", "Ward Office Phone"),
                    "Ward Office Address": ("address", "Ward Office Address"),
                    "Fax": ("fax", "Fax")
                }

                for contact_type, (type_, _note) in contact_types.items():
                    if web.get(contact_type) and web(contact_type) != 'N/A':
                        p.add_contact_detail(type=type_,
                                             value= web[contact_type],
                                             note=_note)

                if web.get('E-mail'):
                    p.add_contact_detail(type="email",
                                         value=web['E-mail']['url'],
                                         note='E-mail')

                if web.get('Web site'):
                    p.add_link(web['Web site']['url'], note='web site')

#.........这里部分代码省略.........
开发者ID:datamade,项目名称:scrapers-us-municipal,代码行数:103,代码来源:people.py

示例5: test_person_add_party

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_party [as 别名]
def test_person_add_party():
    p = Person('Groot')
    p.add_party('Green')
    p._related[0].validate()
    assert get_pseudo_id(p._related[0].organization_id) == {
        'name': 'Green', 'classification': 'party'}
开发者ID:opencivicdata,项目名称:pupa,代码行数:8,代码来源:test_people_org_scrape.py


注:本文中的pupa.scrape.Person.add_party方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。