当前位置: 首页>>代码示例>>Python>>正文


Python Person.add_source方法代码示例

本文整理汇总了Python中pupa.scrape.Person.add_source方法的典型用法代码示例。如果您正苦于以下问题:Python Person.add_source方法的具体用法?Python Person.add_source怎么用?Python Person.add_source使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pupa.scrape.Person的用法示例。


在下文中一共展示了Person.add_source方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: scrape

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_source [as 别名]
    def scrape(self):
        urls = Urls(dict(list=legislators_url), self)

        council = Organization(
            'Temecula City Council',
            classification='legislature')
        council.add_source(urls.list.url)
        yield council

        for tr in urls.list.xpath('//table[2]//tr')[1:]:

            # Parse some attributes.
            name, role = tr.xpath('td/p[1]//font/text()')
            image = tr.xpath('td/img/@src').pop()

            # Create legislator.
            person = Person(name, image=image)

            # Add membership on council.
            memb = person.add_membership(council, role=role)

            # Add email address.
            email, detail_url = tr.xpath('td//a/@href')
            email = email[7:]
            memb.contact_details.append(
                dict(type='email', value=email, note='work'))

            # Add sources.
            person.add_source(urls.list.url)
            person.add_source(detail_url)

            yield person
开发者ID:ChaelCodes,项目名称:scrapers-us-municipal,代码行数:34,代码来源:people.py

示例2: bos_scrape_people

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_source [as 别名]
    def bos_scrape_people(self):
        page = self.lxmlize(MEMBER_LIST)
        people = page.xpath(
            "//table[@width='100%']//td[@style='TEXT-ALIGN: center']")

        for person in people:
            image, name = [self.get_one(person, x) for x in [
                ".//img",
                ".//a[contains(@href, 'councillors') and (text()!='')]"
            ]]
            role = person.xpath(".//br")[0].tail.strip()
            image = image.attrib['src']  # Fallback if we don't get one from the
            # homepage.
            homepage = name.attrib['href']
            name = clean_name(name.text)
            info = self.scrape_homepage(homepage)
            if info.get('image', None):
                image = info['image']

            p = Person(name=name, district=role, image=image,
                       primary_org="legislature", biography=info['bio'])
            p.add_link(url=homepage, note='homepage')
            p.add_source(homepage)
            p.add_source(MEMBER_LIST)
            yield p
开发者ID:Code-for-Miami,项目名称:scrapers-us-municipal,代码行数:27,代码来源:people.py

示例3: scrape_alderman

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_source [as 别名]
	def scrape_alderman(self, ward_num):
		ward_url = "{}/ward-{}".format(Utils.ALDERMEN_HOME, ward_num)
		alderman_url = self.alderman_url(ward_url)
		alderman_page = self.lxmlize(alderman_url)

		# person's name is the only <h1> tag on the page
		name = alderman_page.xpath("//h1/text()")[0]

		# initialize person object with appropriate data so that pupa can 
		# automatically create a membership object linking this person to
		# a post in the jurisdiction's "Board of Aldermen" organization
		district = "Ward {} Alderman".format(ward_num)
		person = Person(name=name, district=district, role="Alderman", 
										primary_org="legislature")

		# set additional fields
		person.image = alderman_page.xpath("//div/img/@src")[0]
		phone_number = alderman_page.xpath("//strong[text()='Phone:']/../text()")[1].strip()
		person.add_contact_detail(type="voice", value=phone_number)

		# add sources
		person.add_source(alderman_url, note="profile")
		person.add_source(ward_url, note="ward")

		return person
开发者ID:rchrist,项目名称:scrapers-us-municipal,代码行数:27,代码来源:people.py

示例4: get_council

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_source [as 别名]
    def get_council(self):
        council_doc = self.lxmlize(self.COUNCIL_URL)

        member_urls = council_doc.xpath(
            '//table[@summary="City Directory"]/tr//'
            'a[contains(@href, "/directory.aspx?EID=")]/@href')
        for member_url in member_urls:
            member_doc = self.lxmlize(member_url)

            (name, ) = member_doc.xpath('//h1[@class="BioName"]/text()')
            (name, ) = re.findall(r'^(?:Mr\.|Mrs\.|Hon\.)?\s*(.*?)\s*$', name)

            # Returning everything into a list because the number of values returned varies 
            # depending on if the person has an email or not
            text_list = member_doc.xpath(
                '//a[@class="BioLink"]/parent::div/text()')
            title = text_list[1].strip()
            (title, ) = re.findall(
                r'^Title: (Council Member,?(?: Ward \d)|Mayor)\s*$', title)

            try:
                (image_url, ) = member_doc.xpath(
                    '//span[@class="BioText"]//img/@src')
            except ValueError:
                image_url = ''

            member = Person(name=name,
                            image=image_url,
                            primary_org='legislature',
                            role=title)

            member.add_source(member_url)

            yield member
开发者ID:datamade,项目名称:scrapers-us-municipal,代码行数:36,代码来源:people.py

示例5: scrape_counciler

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_source [as 别名]
    def scrape_counciler(self, url):
        page = self.lxmlize(url)
        who, = page.xpath("//h3[@class='subtitle']/text()")
        district, = page.xpath("//div[@class='right-bar']//h2/text()")
        image, = page.xpath(
            "//div[@class='left-bar']//a[@class='image lightbox']//img"
        )

        member = Person(
            primary_org='legislature',
            name=who, district=district,
            image=image.attrib['src']
        )
        member.add_source(url)

        details = page.xpath("//table[@align='center']//td")
        for detail in details:
            detail = detail.text_content().strip()
            if detail is None or detail == "":
                continue

            type_, value = detail.split(":", 1)
            cdtype = {
                "Home Phone": "voice",
                "Address": "address",
                "Email": "email",
                "Cell Phone": "voice",
            }[type_]
            member.add_contact_detail(type=cdtype,
                                      note=type_,
                                      value=value)

        yield member
开发者ID:ChaelCodes,项目名称:scrapers-us-municipal,代码行数:35,代码来源:people.py

示例6: scrape_legislator

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_source [as 别名]
    def scrape_legislator(self, chamber, name, url):
        html = self.get(url).text
        page = lxml.html.fromstring(html)
        page.make_links_absolute(url)

        district = page.xpath('//h1[contains(., "DISTRICT")]/text()').pop() \
            .split()[1].strip().lstrip('0')

        party = page.xpath('//h2').pop().text_content()
        party = re.search(r'\((R|D|I)[ \-\]]', party).group(1)

        if party == 'D':
            party = 'Democratic'
        elif party == 'R':
            party = 'Republican'
        elif party == 'I':
            party = 'Independent'

        photo_url = page.xpath(
            "//img[contains(@src, 'images/members/')]")[0].attrib['src']

        leg = Person(name, district=district, party=party, image=photo_url, primary_org=chamber)
        leg.add_link(url)
        leg.add_source(url)
        self.scrape_offices(leg, page)

        yield leg
开发者ID:neelneelpurk,项目名称:openstates,代码行数:29,代码来源:people.py

示例7: scrape_chamber

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_source [as 别名]
    def scrape_chamber(self, session):
        session_key = SESSION_KEYS[session]
        legislators_reponse = self.api_client.get('legislators', session=session_key)

        for legislator in legislators_reponse:
            url_name = legislator['WebSiteUrl'].split('/')[-1]
            chamber_name = 'house' if legislator['Chamber'] == 'H' else 'senate'
            img = 'https://www.oregonlegislature.gov/{}/MemberPhotos/{}.jpg'.format(
                chamber_name, url_name
            )

            party = legislator['Party']
            if party == 'Democrat':
                party = 'Democratic'

            person = Person(name='{} {}'.format(legislator['FirstName'], legislator['LastName']),
                            primary_org={'S': 'upper', 'H': 'lower'}[legislator['Chamber']],
                            party=party,
                            district=legislator['DistrictNumber'],
                            image=img)
            person.add_link(legislator['WebSiteUrl'])
            person.add_source(legislator['WebSiteUrl'])

            if legislator['CapitolAddress']:
                person.add_contact_detail(type='address', value=legislator['CapitolAddress'],
                                          note='Capitol Office')

            if legislator['CapitolPhone']:
                person.add_contact_detail(type='voice', value=legislator['CapitolPhone'],
                                          note='Capitol Office')

            person.add_contact_detail(type='email', value=legislator['EmailAddress'],
                                      note='Capitol Office')

            yield person
开发者ID:cliftonmcintosh,项目名称:openstates,代码行数:37,代码来源:people.py

示例8: scrape_member

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_source [as 别名]
    def scrape_member(self, chamber, link):
        name = link.text.strip()
        leg_url = link.get('href')
        district = link.xpath("string(../../td[3])")
        party = link.xpath("string(../../td[4])")

        # we get email on the next page now
        # email = link.xpath("string(../../td[5])")

        if party == 'Democrat':
            party = 'Democratic'
        elif party == 'No Party Specified':
            party = 'Independent'

        pid = re.search(r"personID=(\d+)", link.attrib['href']).group(1)
        photo_url = ("https://www.legis.iowa.gov/photo"
                     "?action=getPhoto&ga=%s&pid=%s" % (self.latest_session(), pid))

        leg = Person(
            name=name,
            primary_org=chamber,
            district=district,
            party=party,
            image=photo_url)

        leg.add_link(leg_url)
        leg.add_source(leg_url)

        leg_page = lxml.html.fromstring(self.get(link.attrib['href']).text)
        self.scrape_member_page(leg, leg_page)
        yield leg
开发者ID:sunlightlabs,项目名称:openstates,代码行数:33,代码来源:people.py

示例9: scrape_upper_chamber

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_source [as 别名]
    def scrape_upper_chamber(self, term):
        url = "http://oksenate.gov/Senators/Default.aspx"
        html = self.get(url).text
        doc = lxml.html.fromstring(html)
        doc.make_links_absolute(url)

        for a in doc.xpath('//table[@summary]')[0]. \
                xpath('.//td//a[contains(@href, "biographies")]'):
            tail = a.xpath('..')[0].tail
            if tail:
                district = tail.split()[1]
            else:
                district = a.xpath('../../span')[1].text.split()[1]

            if a.text is None or a.text.strip() == 'Vacant':
                self.warning("District {} appears to be empty".format(district))
                continue
            else:
                match = re.match(r'(.+) \(([A-Z])\)', a.text.strip())
                name, party = match.group(1), self._parties[match.group(2)]

            url = a.get('href')

            person = Person(primary_org='upper',
                            district=district,
                            name=name.strip(),
                            party=party,
                            )
            person.add_link(url)
            person.add_source(url)
            self.scrape_upper_offices(person, url)
            yield person
开发者ID:neelneelpurk,项目名称:openstates,代码行数:34,代码来源:people.py

示例10: test_full_person

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_source [as 别名]
def test_full_person():
    person = ScrapePerson('Tom Sawyer')
    person.add_identifier('1')
    person.add_name('Tommy', start_date='1880')
    person.add_contact_detail(type='phone', value='555-555-1234', note='this is fake')
    person.add_link('http://example.com/link')
    person.add_source('http://example.com/source')

    # import person
    pd = person.as_dict()
    PersonImporter('jurisdiction-id').import_data([pd])

    # get person from db and assert it imported correctly
    p = Person.objects.get()
    assert 'ocd-person' in p.id
    assert p.name == person.name

    assert p.identifiers.all()[0].identifier == '1'
    assert p.identifiers.all()[0].scheme == ''

    assert p.other_names.all()[0].name == 'Tommy'
    assert p.other_names.all()[0].start_date == '1880'

    assert p.contact_details.all()[0].type == 'phone'
    assert p.contact_details.all()[0].value == '555-555-1234'
    assert p.contact_details.all()[0].note == 'this is fake'

    assert p.links.all()[0].url == 'http://example.com/link'
    assert p.sources.all()[0].url == 'http://example.com/source'
开发者ID:johnfelipe,项目名称:pupa,代码行数:31,代码来源:test_people_importer.py

示例11: handle_list_item

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_source [as 别名]
    def handle_list_item(self, item):
        photo_url = item.xpath('./img/@src')[0]
        url = item.xpath('.//h5/a/@href')[0]
        name_text = item.xpath('.//h5/a/b/text()')[0]

        name_match = re.match(r'^(.+)\(([0-9]{2}[AB]), ([A-Z]+)\)$', name_text)
        name = name_match.group(1).strip()
        district = name_match.group(2).lstrip('0').upper()
        party_text = name_match.group(3)
        party = PARTIES[party_text]

        info_texts = [x.strip() for x in item.xpath(
            './div/text()[normalize-space()]'
        ) if x.strip()]
        address = '\n'.join((info_texts[0], info_texts[1]))

        phone_text = info_texts[2]
        if validate_phone_number(phone_text):
            phone = phone_text

        email_text = item.xpath('.//a/@href')[1].replace('mailto:', '').strip()
        if validate_email_address(email_text):
            email = email_text

        rep = Person(name=name, district=district, party=party,
                     primary_org='lower', role='Representative',
                     image=photo_url)
        rep.add_link(url)
        rep.add_contact_detail(type='address', value=address, note='capitol')
        rep.add_contact_detail(type='voice', value=phone, note='capitol')
        rep.add_contact_detail(type='email', value=email, note='capitol')
        rep.add_source(self.url)

        yield rep
开发者ID:sunlightlabs,项目名称:openstates,代码行数:36,代码来源:people.py

示例12: handle_list_item

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_source [as 别名]
    def handle_list_item(self, item):
        link = item.xpath('.//div[@class="rep_style"]/a')[0]
        name = link.text_content().strip()

        if 'Vacant' in name or 'Resigned' in name or 'Pending' in name:
            return

        party = item.xpath('.//div[@class="party_style"]/text()')[0].strip()
        party = {'D': 'Democratic', 'R': 'Republican'}[party]

        district = item.xpath('.//div[@class="district_style"]/text()')[0].strip()

        leg_url = link.get('href')
        split_url = parse.urlsplit(leg_url)
        member_id = parse.parse_qs(split_url.query)['MemberId'][0]
        image = "http://www.flhouse.gov/FileStores/Web/Imaging/Member/{}.jpg".format(member_id)

        rep = Person(name=name, district=district, party=party, primary_org='lower',
                     role='Representative', image=image)
        rep.add_link(leg_url)
        rep.add_source(leg_url)
        rep.add_source(self.url)

        self.scrape_page(RepDetail, leg_url, obj=rep)

        return rep
开发者ID:tgrauer,项目名称:tot,代码行数:28,代码来源:people.py

示例13: scrape_member

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_source [as 别名]
    def scrape_member(self, chamber, member_url):
        member_page = self.get(member_url).text
        doc = lxml.html.fromstring(member_page)

        photo_url = doc.xpath('//div[@id="bioImage"]/img/@src')[0]
        name_pieces = doc.xpath('//span[@id="name"]/text()')[0].split()
        full_name = ' '.join(name_pieces[1:-1]).strip()

        party = name_pieces[-1]
        if party == '(R)':
            party = 'Republican'
        elif party == '(D)':
            party = 'Democratic'
        elif party == '(I)':
            party = 'Independent'

        district = doc.xpath('//span[@id="districtHeader"]/text()')[0].split()[-1]

        person = Person(name=full_name, district=district, party=party,
                        primary_org=chamber, image=photo_url)
        person.add_source(member_url)
        person.add_link(member_url)

        address = '\n'.join(doc.xpath('//div[@id="FrankfortAddresses"]//'
                                      'span[@class="bioText"]/text()'))

        phone = None
        fax = None
        phone_numbers = doc.xpath('//div[@id="PhoneNumbers"]//span[@class="bioText"]/text()')
        for num in phone_numbers:
            if num.startswith('Annex: '):
                num = num.replace('Annex: ', '')
                if num.endswith(' (fax)'):
                    fax = num.replace(' (fax)', '')
                else:
                    phone = num

        emails = doc.xpath(
            '//div[@id="EmailAddresses"]//span[@class="bioText"]//a/text()'
        )
        email = reduce(
            lambda match, address: address if '@lrc.ky.gov' in str(address) else match,
            [None] + emails
        )

        if phone:
            person.add_contact_detail(type='voice', value=phone, note='Capitol Office')

        if fax:
            person.add_contact_detail(type='fax', value=fax, note='Capitol Office')

        if email:
            person.add_contact_detail(type='email', value=email, note='Capitol Office')

        if address.strip() == "":
            self.warning("Missing Capitol Office!!")
        else:
            person.add_contact_detail(type='address', value=address, note='Capitol Office')

        yield person
开发者ID:cliftonmcintosh,项目名称:openstates,代码行数:62,代码来源:people.py

示例14: get_council

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_source [as 别名]
    def get_council(self):
        council_doc = self.lxmlize(self.COUNCIL_URL)

        member_urls = council_doc.xpath(
            '//table[@summary="City Directory"]/tr//' 'a[contains(@href, "/directory.aspx?EID=")]/@href'
        )
        for member_url in member_urls:
            member_doc = self.lxmlize(member_url)

            (name,) = member_doc.xpath('//span[@class="BioName"]/span/text()')
            (name,) = re.findall(r"^(?:Mr\.|Mrs\.|Hon\.)?\s*(.*?)\s*$", name)

            (title,) = member_doc.xpath('//a[@class="BioLink"]/following-sibling::text()')
            (title,) = re.findall(r"^Title: (Council Member(?: Ward \d)|Mayor)\s*$", title)

            try:
                (image_url,) = member_doc.xpath('//span[@class="BioText"]//img/@src')
            except ValueError:
                image_url = ""

            member = Person(name=name, image=image_url, primary_org="legislature", role=title)

            member.add_source(member_url)

            yield member
开发者ID:dtpeters,项目名称:scrapers-us-municipal,代码行数:27,代码来源:people.py

示例15: scrape_csv

# 需要导入模块: from pupa.scrape import Person [as 别名]
# 或者: from pupa.scrape.Person import add_source [as 别名]
 def scrape_csv(self, reader):
     for row in reader:
         contributor = Person(
             name="{Contact First Name} {Contact Last Name}".format(**row)
         )
         contributor.add_source(SEARCH_URL)
         yield contributor
开发者ID:AshleyTemple,项目名称:scrapers-us-state,代码行数:9,代码来源:contributions.py


注:本文中的pupa.scrape.Person.add_source方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。