当前位置: 首页>>代码示例>>Python>>正文


Python Organization.add_source方法代码示例

本文整理汇总了Python中pupa.scrape.Organization.add_source方法的典型用法代码示例。如果您正苦于以下问题:Python Organization.add_source方法的具体用法?Python Organization.add_source怎么用?Python Organization.add_source使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pupa.scrape.Organization的用法示例。


在下文中一共展示了Organization.add_source方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: scrape

# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_source [as 别名]
    def scrape(self):
        sessions = reversed(self.jurisdiction.legislative_sessions)
        committee_term_instances = committees_from_sessions(self, sessions)
        committees_by_code = build_lookup_dict(self, data_list=committee_term_instances, index_key='code')

        for code, instances in committees_by_code.items():
            # TODO: Figure out how to edit city council org.
            if code == 'CC':
                continue

            extras = {'tmmis_decision_body_ids': []}
            for i, inst in enumerate(instances):
                # TODO: Ensure this survives addition of new term (2017)
                #       so specific year always creates
                canonical_i = 0
                if i == canonical_i:
                    o = Organization(name=inst['name'], classification='committee')
                    extras.update({'description': inst['info']})
                    o.add_identifier(inst['code'], scheme=TWO_LETTER_ORG_CODE_SCHEME)
                extras['tmmis_decision_body_ids'].append({inst['term']: inst['decision_body_id']})
                o.extras = extras
                o.add_source(inst['source_url'])
                if instances[canonical_i]['name'] != inst['name']:
                    # TODO: Add start_date and end_date
                    o.add_name(inst['name'])

            yield o
开发者ID:anukat2015,项目名称:scrapers-ca,代码行数:29,代码来源:committees.py

示例2: scrape

# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_source [as 别名]
    def scrape(self):
        urls = Urls(dict(list=legislators_url), self)

        council = Organization(
            'Temecula City Council',
            classification='legislature')
        council.add_source(urls.list.url)
        yield council

        for tr in urls.list.xpath('//table[2]//tr')[1:]:

            # Parse some attributes.
            name, role = tr.xpath('td/p[1]//font/text()')
            image = tr.xpath('td/img/@src').pop()

            # Create legislator.
            person = Person(name, image=image)

            # Add membership on council.
            memb = person.add_membership(council, role=role)

            # Add email address.
            email, detail_url = tr.xpath('td//a/@href')
            email = email[7:]
            memb.contact_details.append(
                dict(type='email', value=email, note='work'))

            # Add sources.
            person.add_source(urls.list.url)
            person.add_source(detail_url)

            yield person
开发者ID:ChaelCodes,项目名称:scrapers-us-municipal,代码行数:34,代码来源:people.py

示例3: scrape_approp_subcommittees

# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_source [as 别名]
    def scrape_approp_subcommittees(self, url):
        html = self.get(url).text
        doc = lxml.html.fromstring(html)

        for strong in doc.xpath('//strong'):
            com = Organization(
                name=strong.text.strip(),
                parent_id={
                    'name': 'Appropriations',
                    'classification': 'committee',
                },
                classification='committee',
            )
            com.add_source(url)

            legislators = strong.getnext().tail.replace('Senators', '').strip()
            for leg in re.split(', | and ', legislators):
                if leg.endswith('(C)'):
                    role = 'chairman'
                    leg = leg[:-4]
                elif leg.endswith('(VC)'):
                    role = 'vice chairman'
                    leg = leg[:-5]
                elif leg.endswith('(MVC)'):
                    role = 'minority vice chairman'
                    leg = leg[:-6]
                else:
                    role = 'member'
                com.add_member(leg, role=role)

            yield com
开发者ID:neelneelpurk,项目名称:openstates,代码行数:33,代码来源:committees.py

示例4: scrape_senate_committee

# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_source [as 别名]
    def scrape_senate_committee(self, url):
        html = self.get(url).text
        doc = lxml.html.fromstring(html)

        headers = doc.xpath('(//div[@class="row"])[2]//h1')
        assert len(headers) == 1
        name = ' '.join(headers[0].xpath('./text()'))
        name = re.sub(r'\s+Committee.*$', '', name)

        com = Organization(chamber='upper', name=name, classification='committee')

        for member in doc.xpath('(//div[@class="row"])[3]/div[1]/ul[1]/li'):
            text = member.text_content()
            member_name = member.xpath('./a/text()')[0].replace('Representative ', '')
            if 'Committee Chair' in text:
                role = 'chair'
            elif 'Minority Vice' in text:
                role = 'minority vice chair'
            elif 'Vice' in text:
                role = 'majority vice chair'
            else:
                role = 'member'

            com.add_member(member_name, role=role)

        com.add_source(url)
        yield com
开发者ID:neelneelpurk,项目名称:openstates,代码行数:29,代码来源:committees.py

示例5: scrape_committee

# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_source [as 别名]
    def scrape_committee(self, term, href, name):
        page = self.get(href).text
        page = lxml.html.fromstring(page)
        page.make_links_absolute(href)
        members = page.xpath("//div[@class='view-content']"
                             "//a[contains(@href, 'members')]")

        if '/joint/' in href:
            chamber = 'legislature'
        elif '/senate/' in href:
            chamber = 'upper'
        elif '/house/' in href:
            chamber = 'lower'
        else:
            # interim committees and others were causing duplicate committee issues, skipping
            self.warning('Failed to identify chamber for {}; skipping'.format(href))
            return

        cttie = Organization(name, chamber=chamber, classification='committee')
        for a in members:
            member = a.text
            role = a.xpath("ancestor::div/h2[@class='pane-title']/text()")[0].strip()
            role = {"Legislative Members": "member",
                    "Chairman": "chair",
                    "Vice Chairman": "member"}[role]

            if member is None or member.startswith("District"):
                continue

            member = member.replace('Senator ', '').replace('Representative ', '')

            cttie.add_member(member, role=role)

        cttie.add_source(href)
        yield cttie
开发者ID:sunlightlabs,项目名称:openstates,代码行数:37,代码来源:committees.py

示例6: scrape_approp_subcommittees

# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_source [as 别名]
    def scrape_approp_subcommittees(self):
        URL = 'http://www.senate.michigan.gov/committee/appropssubcommittee.html'
        html = self.get(URL).text
        doc = lxml.html.fromstring(html)

        for strong in doc.xpath('//strong'):
            com = Organization(
                name=strong.text.strip(),
                parent_id=self._senate_appropriations,
                classification='committee',
            )
            com.add_source(URL)

            legislators = strong.getnext().tail.replace('Senators', '').strip()
            for leg in re.split(', | and ', legislators):
                if leg.endswith('(C)'):
                    role = 'chairman'
                    leg = leg[:-4]
                elif leg.endswith('(VC)'):
                    role = 'vice chairman'
                    leg = leg[:-5]
                elif leg.endswith('(MVC)'):
                    role = 'minority vice chairman'
                    leg = leg[:-6]
                else:
                    role = 'member'
                com.add_member(leg, role=role)

            yield com
开发者ID:sunlightlabs,项目名称:openstates,代码行数:31,代码来源:committees.py

示例7: scrape_committee

# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_source [as 别名]
    def scrape_committee(self, chamber, name, url):
        page = self.get(url).text
        page = lxml.html.fromstring(page)

        if page.xpath("//h3[. = 'Joint Committee']"):
            chamber = 'joint'

        subcommittee = page.xpath("//h3[@align='center']/text()")[0]
        if "Subcommittee" not in subcommittee:
            comm = Organization(
                chamber=chamber, name=name, classification='committee')
        else:
            comm = Organization(
                name=subcommittee, classification='committee',
                parent_id={'classification': chamber, 'name': name})

        comm.add_source(url)

        for link in page.xpath("//a[contains(@href, 'member=')]"):
            member = link.text.strip()

            mtype = link.xpath("string(../preceding-sibling::td[1])")
            mtype = mtype.strip(": \r\n\t").lower()

            comm.add_member(member, mtype)

        if not comm._related:
            self.warning('not saving %s, appears to be empty' % name)
        else:
            yield comm
开发者ID:neelneelpurk,项目名称:openstates,代码行数:32,代码来源:committees.py

示例8: scrape_lower_committee

# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_source [as 别名]
    def scrape_lower_committee(self, name, url):
        page = self.lxmlize(url)

        committee = Organization(chamber='lower', name=name,
                                 classification="committee")
        committee.add_source(url)

        seen = set()

        member_links = self.get_nodes(
            page,
            '//div[@class="mod-inner"]//a[contains(@href, "mem")]')

        for member_link in member_links:
            member_name = None
            member_role = None

            member_name = member_link.text
            if member_name is None:
                continue

            # Figure out if this person is the chair.
            if member_link == member_links[0]:
                member_role = 'chair'
            else:
                member_role = 'member'

            if name not in seen:
                committee.add_member(member_name, member_role)
                seen.add(member_name)

        return committee
开发者ID:sunlightlabs,项目名称:openstates,代码行数:34,代码来源:committees.py

示例9: scrape

# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_source [as 别名]
    def scrape(self):
        page = self.lxmlize(COUNCIL_PAGE)

        councillors = page.xpath('//div[@class="entry-content"]//p/strong')
        for councillor in councillors:
            district = councillor.xpath('./ancestor::p/preceding-sibling::h2')[-1].text_content().split('–'.decode('utf-8'))[0]
            name = ' '.join(councillor.text_content().split()[-2:]).replace('-Â'.decode('utf-8'), '')
            role = councillor.text_content().replace(name, '').split('-')[0]
            if 'SAO' in role or not role:
                continue

            org = Organization(name=district + ' Municipal Council', classification='legislature', jurisdiction_id=self.jurisdiction.jurisdiction_id)
            org.add_source(COUNCIL_PAGE)
            yield org

            p = Person(primary_org='legislature', name=name, district=district)
            p.add_source(COUNCIL_PAGE)
            membership = p.add_membership(org, role=role, district=district)

            info = councillor.xpath('./ancestor::p/text()')
            for contact in info:
                if 'NT' in contact:
                    membership.add_contact_detail('address', contact.strip(), 'legislature')
                if 'Tel' in contact:
                    contact = contact.replace('Tel. ', '').replace('(', '').replace(') ', '-').strip()
                    membership.add_contact_detail('voice', contact, 'legislature')
                if 'Fax' in contact:
                    contact = contact.replace('Fax ', '').replace('(', '').replace(') ', '-').strip()
                    membership.add_contact_detail('fax', contact, 'legislature')
            email = self.get_email(councillor, './parent::p')
            membership.add_contact_detail('email', email)

            if 'Website' in councillor.xpath('./parent::p')[0].text_content():
                p.add_link(councillor.xpath('./parent::p//a')[1].attrib['href'])
            yield p
开发者ID:anukat2015,项目名称:scrapers-ca,代码行数:37,代码来源:people.py

示例10: test_full_organization

# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_source [as 别名]
def test_full_organization():
    org = ScrapeOrganization('United Nations', classification='international')
    org.add_identifier('un')
    org.add_name('UN', start_date='1945')
    org.add_contact_detail(type='phone', value='555-555-1234', note='this is fake')
    org.add_link('http://example.com/link')
    org.add_source('http://example.com/source')

    # import org
    od = org.as_dict()
    OrganizationImporter('jurisdiction-id').import_data([od])

    # get person from db and assert it imported correctly
    o = Organization.objects.get()
    assert 'ocd-organization' in o.id
    assert o.name == org.name

    assert o.identifiers.all()[0].identifier == 'un'
    assert o.identifiers.all()[0].scheme == ''

    assert o.other_names.all()[0].name == 'UN'
    assert o.other_names.all()[0].start_date == '1945'

    assert o.contact_details.all()[0].type == 'phone'
    assert o.contact_details.all()[0].value == '555-555-1234'
    assert o.contact_details.all()[0].note == 'this is fake'

    assert o.links.all()[0].url == 'http://example.com/link'
    assert o.sources.all()[0].url == 'http://example.com/source'
开发者ID:Vanuan,项目名称:pupa,代码行数:31,代码来源:test_organization_importer.py

示例11: _scrape_lower_special_committees

# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_source [as 别名]
    def _scrape_lower_special_committees(self):
        url = 'http://house.louisiana.gov/H_Cmtes/SpecialCommittees.aspx'
        page = self.lxmlize(url)

        committee_list = page.xpath('//div[@class="accordion"]')[0]

        headers = committee_list.xpath('./h3')

        for header in headers:
            committee_name_text = header.xpath('string()')
            committee_name = committee_name_text.strip()
            committee_name = self._normalize_committee_name(committee_name)

            chamber = 'legislature' if committee_name.startswith('Joint') else 'lower'

            committee = Organization(committee_name, chamber=chamber,
                                     classification='committee')
            committee.add_source(url)

            committee_memberlist = header.xpath('./following-sibling::div[@class="pane"]'
                                                '//tr[@class="linkStyle2"]')

            for row in committee_memberlist:
                member_name = row.xpath('normalize-space(string(./th[1]))')
                member_name = self._normalize_member_name(member_name)
                member_role = row.xpath('normalize-space(string(./th[2]))')
                member_role = self._normalize_member_role(member_role)

                committee.add_member(member_name, member_role)

            yield committee
开发者ID:neelneelpurk,项目名称:openstates,代码行数:33,代码来源:committees.py

示例12: scrape_page

# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_source [as 别名]
    def scrape_page(self, link, chamber=None):
        page = self.lxmlize(link.attrib['href'])
        comName = link.text
        roles = {
            "Chair": "chair",
            "Vice Chair": "vice-chair",
            "Vice-Chair": "vice-chair",
        }
        committee = Organization(comName,
                                 chamber=chamber,
                                 classification='committee')
        committee.add_source(link.attrib['href'])

        for member in page.xpath('//div[@class="members"]/' +
                                 'div[@class="roster-item"]'):
            details = member.xpath('.//div[@class="member-details"]')[0]
            person = details.xpath('./h4')[0].text_content()
            # This page does random weird things with whitepace to names
            person = ' '.join(person.strip().split())
            if not person:
                continue
            role = details.xpath('./span[@class="member-role"]')
            if role:
                role = roles[role[0].text]
            else:
                role = 'member'
            committee.add_member(person, role=role)
        yield committee
开发者ID:sunlightlabs,项目名称:openstates,代码行数:30,代码来源:committees.py

示例13: scrape_chamber

# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_source [as 别名]
    def scrape_chamber(self, chamber, session):

        url = "%s/GetActiveCommittees?biennium=%s" % (self._base_url, session)
        page = self.get(url)
        page = lxml.etree.fromstring(page.content)

        for comm in xpath(page, "//wa:Committee"):
            agency = xpath(comm, "string(wa:Agency)")
            comm_chamber = {'House': 'lower', 'Senate': 'upper'}[agency]
            if comm_chamber != chamber:
                continue

            name = xpath(comm, "string(wa:Name)")
            # comm_id = xpath(comm, "string(wa:Id)")
            # acronym = xpath(comm, "string(wa:Acronym)")
            phone = xpath(comm, "string(wa:Phone)")

            comm = Organization(name, chamber=chamber, classification='committee')
            comm.extras['phone'] = phone
            self.scrape_members(comm, agency)
            comm.add_source(url)
            if not comm._related:
                self.warning('empty committee: %s', name)
            else:
                yield comm
开发者ID:cliftonmcintosh,项目名称:openstates,代码行数:27,代码来源:committees.py

示例14: _scrape_upper_committee

# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_source [as 别名]
    def _scrape_upper_committee(self, name, url2):
        cat = "Assignments.asp"
        url3 = url2.replace("default.asp", cat)

        committee = Organization(name,
                                 chamber="upper",
                                 classification="committee"
                                 )
        committee.add_source(url2)

        page = self.lxmlize(url3)

        members = page.xpath('//table[@id="table38"]//font/a/b')

        for link in members:
            role = "member"
            if link == members[0]:
                role = "Chairman"
            if link == members[1]:
                role = "Vice-Chairman"

            name = link.xpath('string()')
            name = name.replace('Senator ', '')
            name = re.sub('[\s]{2,}', ' ', name).strip()

            committee.add_member(name, role)

        yield committee
开发者ID:neelneelpurk,项目名称:openstates,代码行数:30,代码来源:committees.py

示例15: scrape_comm

# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_source [as 别名]
    def scrape_comm(self, chamber):
        url = 'http://billstatus.ls.state.ms.us/htms/%s_cmtememb.xml' % chamber
        comm_page = self.get(url)
        root = lxml.etree.fromstring(comm_page.content)
        if chamber == 'h':
            chamber = "lower"
        else:
            chamber = "upper"
        for mr in root.xpath('//COMMITTEE'):
            name = mr.xpath('string(NAME)')
            comm = Organization(name,
                                chamber=chamber,
                                classification='committee'
                                )
            chair = mr.xpath('string(CHAIR)')
            chair = chair.replace(", Chairman", "")
            role = "Chairman"
            if len(chair) > 0:
                comm.add_member(chair, role=role)
            vice_chair = mr.xpath('string(VICE_CHAIR)')
            vice_chair = vice_chair.replace(", Vice-Chairman", "")
            role = "Vice-Chairman"
            if len(vice_chair) > 0:
                comm.add_member(vice_chair, role=role)
            members = mr.xpath('string(MEMBERS)').split(";")
            if "" in members:
                members.remove("")

            for leg in members:
                leg = leg.strip()
                comm.add_member(leg)

            comm.add_source(url)
            yield comm
开发者ID:cliftonmcintosh,项目名称:openstates,代码行数:36,代码来源:committees.py


注:本文中的pupa.scrape.Organization.add_source方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。