当前位置: 首页>>代码示例>>Python>>正文


Python scrape.Person类代码示例

本文整理汇总了Python中pupa.scrape.Person的典型用法代码示例。如果您正苦于以下问题:Python Person类的具体用法?Python Person怎么用?Python Person使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Person类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: scrape_csv

 def scrape_csv(self, reader):
     for row in reader:
         contributor = Person(
             name="{Contact First Name} {Contact Last Name}".format(**row)
         )
         contributor.add_source(SEARCH_URL)
         yield contributor
开发者ID:AshleyTemple,项目名称:scrapers-us-state,代码行数:7,代码来源:contributions.py

示例2: test_deduplication_no_name_overlap

def test_deduplication_no_name_overlap():
    create_person()
    # make sure we're not just being ridiculous and avoiding importing anything in the same org
    person = ScrapePerson('CM Punk')
    pd = person.as_dict()
    PersonImporter('jurisdiction-id').import_data([pd])
    assert Person.objects.all().count() == 2
开发者ID:johnfelipe,项目名称:pupa,代码行数:7,代码来源:test_people_importer.py

示例3: scrape_upper_chamber

    def scrape_upper_chamber(self, term):
        url = "http://oksenate.gov/Senators/Default.aspx"
        html = self.get(url).text
        doc = lxml.html.fromstring(html)
        doc.make_links_absolute(url)

        for a in doc.xpath('//table[@summary]')[0]. \
                xpath('.//td//a[contains(@href, "biographies")]'):
            tail = a.xpath('..')[0].tail
            if tail:
                district = tail.split()[1]
            else:
                district = a.xpath('../../span')[1].text.split()[1]

            if a.text is None or a.text.strip() == 'Vacant':
                self.warning("District {} appears to be empty".format(district))
                continue
            else:
                match = re.match(r'(.+) \(([A-Z])\)', a.text.strip())
                name, party = match.group(1), self._parties[match.group(2)]

            url = a.get('href')

            person = Person(primary_org='upper',
                            district=district,
                            name=name.strip(),
                            party=party,
                            )
            person.add_link(url)
            person.add_source(url)
            self.scrape_upper_offices(person, url)
            yield person
开发者ID:neelneelpurk,项目名称:openstates,代码行数:32,代码来源:people.py

示例4: get_council

    def get_council(self):
        council_doc = self.lxmlize(self.COUNCIL_URL)

        member_urls = council_doc.xpath(
            '//table[@summary="City Directory"]/tr//'
            'a[contains(@href, "/directory.aspx?EID=")]/@href')
        for member_url in member_urls:
            member_doc = self.lxmlize(member_url)

            (name, ) = member_doc.xpath('//h1[@class="BioName"]/text()')
            (name, ) = re.findall(r'^(?:Mr\.|Mrs\.|Hon\.)?\s*(.*?)\s*$', name)

            # Returning everything into a list because the number of values returned varies 
            # depending on if the person has an email or not
            text_list = member_doc.xpath(
                '//a[@class="BioLink"]/parent::div/text()')
            title = text_list[1].strip()
            (title, ) = re.findall(
                r'^Title: (Council Member,?(?: Ward \d)|Mayor)\s*$', title)

            try:
                (image_url, ) = member_doc.xpath(
                    '//span[@class="BioText"]//img/@src')
            except ValueError:
                image_url = ''

            member = Person(name=name,
                            image=image_url,
                            primary_org='legislature',
                            role=title)

            member.add_source(member_url)

            yield member
开发者ID:datamade,项目名称:scrapers-us-municipal,代码行数:34,代码来源:people.py

示例5: test_bill_sponsor_by_identifier

def test_bill_sponsor_by_identifier():
    create_jurisdiction()
    org = create_org()

    bill = ScrapeBill('HB 1', '1900', 'Axe & Tack Tax Act',
                      classification='tax bill', chamber='lower')
    bill.add_sponsorship_by_identifier(name="SNODGRASS",
                                       classification='sponsor',
                                       entity_type='person',
                                       primary=True,
                                       identifier="TOTALLY_REAL_ID",
                                       scheme="TOTALLY_REAL_SCHEME")

    oi = OrganizationImporter('jid')
    pi = PersonImporter('jid')

    zs = ScrapePerson(name='Zadock Snodgrass')
    zs.add_identifier(identifier='TOTALLY_REAL_ID',
                      scheme='TOTALLY_REAL_SCHEME')
    pi.import_data([zs.as_dict()])
    za_db = Person.objects.get()
    Membership.objects.create(person_id=za_db.id,
                              organization_id=org.id)

    BillImporter('jid', oi, pi).import_data([bill.as_dict()])

    obj = Bill.objects.get()
    (entry,) = obj.sponsorships.all()
    assert entry.person.name == "Zadock Snodgrass"
开发者ID:rshorey,项目名称:pupa,代码行数:29,代码来源:test_bill_importer.py

示例6: scrape_counciler

    def scrape_counciler(self, url):
        page = self.lxmlize(url)
        who, = page.xpath("//h3[@class='subtitle']/text()")
        district, = page.xpath("//div[@class='right-bar']//h2/text()")
        image, = page.xpath(
            "//div[@class='left-bar']//a[@class='image lightbox']//img"
        )

        member = Person(
            primary_org='legislature',
            name=who, district=district,
            image=image.attrib['src']
        )
        member.add_source(url)

        details = page.xpath("//table[@align='center']//td")
        for detail in details:
            detail = detail.text_content().strip()
            if detail is None or detail == "":
                continue

            type_, value = detail.split(":", 1)
            cdtype = {
                "Home Phone": "voice",
                "Address": "address",
                "Email": "email",
                "Cell Phone": "voice",
            }[type_]
            member.add_contact_detail(type=cdtype,
                                      note=type_,
                                      value=value)

        yield member
开发者ID:ChaelCodes,项目名称:scrapers-us-municipal,代码行数:33,代码来源:people.py

示例7: scrape_legislator

    def scrape_legislator(self, chamber, name, url):
        html = self.get(url).text
        page = lxml.html.fromstring(html)
        page.make_links_absolute(url)

        district = page.xpath('//h1[contains(., "DISTRICT")]/text()').pop() \
            .split()[1].strip().lstrip('0')

        party = page.xpath('//h2').pop().text_content()
        party = re.search(r'\((R|D|I)[ \-\]]', party).group(1)

        if party == 'D':
            party = 'Democratic'
        elif party == 'R':
            party = 'Republican'
        elif party == 'I':
            party = 'Independent'

        photo_url = page.xpath(
            "//img[contains(@src, 'images/members/')]")[0].attrib['src']

        leg = Person(name, district=district, party=party, image=photo_url, primary_org=chamber)
        leg.add_link(url)
        leg.add_source(url)
        self.scrape_offices(leg, page)

        yield leg
开发者ID:neelneelpurk,项目名称:openstates,代码行数:27,代码来源:people.py

示例8: test_deduplication_same_name

def test_deduplication_same_name():
    create_person()
    # simplest case- just the same name
    person = ScrapePerson('Dwayne Johnson')
    pd = person.as_dict()
    PersonImporter('jurisdiction-id').import_data([pd])
    assert Person.objects.all().count() == 1
开发者ID:johnfelipe,项目名称:pupa,代码行数:7,代码来源:test_people_importer.py

示例9: test_deduplication_other_name_exists

def test_deduplication_other_name_exists():
    create_person()
    # Rocky is already saved in other_names
    person = ScrapePerson('Rocky')
    pd = person.as_dict()
    PersonImporter('jurisdiction-id').import_data([pd])
    assert Person.objects.all().count() == 1
开发者ID:johnfelipe,项目名称:pupa,代码行数:7,代码来源:test_people_importer.py

示例10: test_multiple_orgs_of_same_class

def test_multiple_orgs_of_same_class():
    """
    We should be able to set memberships on organizations with the
    same classification within the same jurisdictions
    """
    Organization.objects.create(id="fnd", name="Foundation", classification="foundation",
                                jurisdiction_id="fnd-jid")
    Organization.objects.create(id="fdr", name="Federation", classification="foundation",
                                jurisdiction_id="fnd-jid")

    hari = ScrapePerson('Hari Seldon',
                        primary_org='foundation',
                        role='founder',
                        primary_org_name='Foundation')

    picard = ScrapePerson('Jean Luc Picard',
                        primary_org='foundation',
                        role='founder',
                        primary_org_name='Federation')

    person_imp = PersonImporter('fnd-jid')
    person_imp.import_data([hari.as_dict()])
    person_imp.import_data([picard.as_dict()])

    # try to import a membership
    org_imp = OrganizationImporter('fnd-jid')
    dumb_imp = DumbMockImporter()
    memimp = MembershipImporter('fnd-jid', person_imp, org_imp, dumb_imp)

    memimp.import_data([hari._related[0].as_dict(), 
                        picard._related[0].as_dict()])

    assert Person.objects.get(name='Hari Seldon').memberships.get().organization.name == 'Foundation'
    assert Person.objects.get(name='Jean Luc Picard').memberships.get().organization.name == 'Federation'
开发者ID:tor-councilmatic,项目名称:pupa,代码行数:34,代码来源:test_membership_importer.py

示例11: scrape_member

    def scrape_member(self, chamber, link):
        name = link.text.strip()
        leg_url = link.get('href')
        district = link.xpath("string(../../td[3])")
        party = link.xpath("string(../../td[4])")

        # we get email on the next page now
        # email = link.xpath("string(../../td[5])")

        if party == 'Democrat':
            party = 'Democratic'
        elif party == 'No Party Specified':
            party = 'Independent'

        pid = re.search(r"personID=(\d+)", link.attrib['href']).group(1)
        photo_url = ("https://www.legis.iowa.gov/photo"
                     "?action=getPhoto&ga=%s&pid=%s" % (self.latest_session(), pid))

        leg = Person(
            name=name,
            primary_org=chamber,
            district=district,
            party=party,
            image=photo_url)

        leg.add_link(leg_url)
        leg.add_source(leg_url)

        leg_page = lxml.html.fromstring(self.get(link.attrib['href']).text)
        self.scrape_member_page(leg, leg_page)
        yield leg
开发者ID:sunlightlabs,项目名称:openstates,代码行数:31,代码来源:people.py

示例12: test_deduplication_no_jurisdiction_overlap

def test_deduplication_no_jurisdiction_overlap():
    create_person()
    # make sure we get a new person if we're in a different org
    person = ScrapePerson('Dwayne Johnson')
    pd = person.as_dict()
    PersonImporter('new-jurisdiction-id').import_data([pd])
    assert Person.objects.all().count() == 2
开发者ID:johnfelipe,项目名称:pupa,代码行数:7,代码来源:test_people_importer.py

示例13: get_council

    def get_council(self):
        council_doc = self.lxmlize(self.COUNCIL_URL)

        member_urls = council_doc.xpath(
            '//table[@summary="City Directory"]/tr//' 'a[contains(@href, "/directory.aspx?EID=")]/@href'
        )
        for member_url in member_urls:
            member_doc = self.lxmlize(member_url)

            (name,) = member_doc.xpath('//span[@class="BioName"]/span/text()')
            (name,) = re.findall(r"^(?:Mr\.|Mrs\.|Hon\.)?\s*(.*?)\s*$", name)

            (title,) = member_doc.xpath('//a[@class="BioLink"]/following-sibling::text()')
            (title,) = re.findall(r"^Title: (Council Member(?: Ward \d)|Mayor)\s*$", title)

            try:
                (image_url,) = member_doc.xpath('//span[@class="BioText"]//img/@src')
            except ValueError:
                image_url = ""

            member = Person(name=name, image=image_url, primary_org="legislature", role=title)

            member.add_source(member_url)

            yield member
开发者ID:dtpeters,项目名称:scrapers-us-municipal,代码行数:25,代码来源:people.py

示例14: table_row_to_legislator_and_profile_url

def table_row_to_legislator_and_profile_url(table_row_element, chamber):
    """Derive a Legislator from an HTML table row lxml Element, and a link to their profile"""
    td_elements = table_row_element.xpath('td')
    (role_element, name_element, district_element, party_element,
     phone_element, email_element) = td_elements

    # Name comes in the form Last, First
    # last_name_first_name = name_element.text_content().strip()
    # full_name = last_name_first_name_to_full_name(last_name_first_name)
    full_name = name_element.text_content().strip()
    district = district_element.text_content().strip()
    party = party_element.text_content().strip()
    if party == 'Democrat':
        party = 'Democratic'

    role = role_element.text_content().strip()
    address = co_address_from_role(role)
    phone = phone_element.text_content().strip()
    email = email_element.text_content().strip()

    (profile_url, ) = name_element.xpath('a/@href')
    print(chamber, district, party)
    legislator = Person(primary_org=chamber,
                        name=full_name,
                        district=district,
                        party=party)
    legislator.add_contact_detail(type='address', value=address, note='Capitol Office')
    legislator.add_contact_detail(type='voice', value=phone, note='Capitol Office')
    legislator.add_contact_detail(type='email', value=email, note='Capitol Office')

    return legislator, profile_url
开发者ID:neelneelpurk,项目名称:openstates,代码行数:31,代码来源:people.py

示例15: scrape_chamber

    def scrape_chamber(self, chamber):
        self._party_map = {
            'Democrat': 'Democratic',
            'Republican': 'Republican',
            'Non Affiliated': 'Independent',
            'Not Affiliated': 'Independent',
        }

        if chamber == 'upper':
            url = 'http://senate.legis.state.ak.us/'
        else:
            url = 'http://house.legis.state.ak.us/'

        page = self.lxmlize(url)

        items = page.xpath('//ul[@class="item"]')[1].getchildren()

        for item in items:
            photo_url = item.xpath('.//img/@src')[0]
            name = item.xpath('.//strong/text()')[0]
            leg_url = item.xpath('.//a/@href')[0]
            email = item.xpath('.//a[text()="Email Me"]/@href')
            if email:
                email = email[0].replace('mailto:', '')
            else:
                self.warning('no email for ' + name)

            party = district = None
            skip = False

            for dt in item.xpath('.//dt'):
                dd = dt.xpath('following-sibling::dd')[0].text_content()
                label = dt.text.strip()
                if label == 'Party:':
                    party = dd
                elif label == 'District:':
                    district = dd
                elif label.startswith('Deceased'):
                    skip = True
                    self.warning('skipping deceased ' + name)
                    break

            if skip:
                continue

            person = Person(
                primary_org=chamber,
                district=district,
                name=name,
                party=self._party_map[party],
                image=photo_url,
            )
            person.add_source(leg_url)
            person.add_link(leg_url)

            # scrape offices
            self._scrape_offices(person, leg_url, email)

            yield person
开发者ID:neelneelpurk,项目名称:openstates,代码行数:59,代码来源:people.py


注:本文中的pupa.scrape.Person类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。