当前位置: 首页>>代码示例>>Python>>正文


Python Organization.add_link方法代码示例

本文整理汇总了Python中pupa.scrape.Organization.add_link方法的典型用法代码示例。如果您正苦于以下问题:Python Organization.add_link方法的具体用法?Python Organization.add_link怎么用?Python Organization.add_link使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pupa.scrape.Organization的用法示例。


在下文中一共展示了Organization.add_link方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_full_organization

# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_link [as 别名]
def test_full_organization():
    org = ScrapeOrganization('United Nations', classification='international')
    org.add_identifier('un')
    org.add_name('UN', start_date='1945')
    org.add_contact_detail(type='phone', value='555-555-1234', note='this is fake')
    org.add_link('http://example.com/link')
    org.add_source('http://example.com/source')

    # import org
    od = org.as_dict()
    OrganizationImporter('jurisdiction-id').import_data([od])

    # get person from db and assert it imported correctly
    o = Organization.objects.get()
    assert 'ocd-organization' in o.id
    assert o.name == org.name

    assert o.identifiers.all()[0].identifier == 'un'
    assert o.identifiers.all()[0].scheme == ''

    assert o.other_names.all()[0].name == 'UN'
    assert o.other_names.all()[0].start_date == '1945'

    assert o.contact_details.all()[0].type == 'phone'
    assert o.contact_details.all()[0].value == '555-555-1234'
    assert o.contact_details.all()[0].note == 'this is fake'

    assert o.links.all()[0].url == 'http://example.com/link'
    assert o.sources.all()[0].url == 'http://example.com/source'
开发者ID:Vanuan,项目名称:pupa,代码行数:31,代码来源:test_organization_importer.py

示例2: scrape_committees

# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_link [as 别名]
    def scrape_committees(self, repos):
        for repo in repos:
            source = "https://raw.githubusercontent.com/unitedstates/congress-legislators/master/{0}".format(repo)
            committees = self.fetch_yaml(source)
            for committee in committees:
                org = Organization(committee["name"], classification="committee")

                org.add_source(source)

                for key in committee.keys() & {"url", "rss_url"}:
                    org.add_link(committee[key])

                for key in committee.keys() & {"phone", "address"}:
                    org.add_contact_detail(
                        type="voice", value=committee[key]
                    ) if key == "phone" else org.add_contact_detail(type=key, value=committee[key])

                for key in committee.keys() & {"senate_committee_id", "house_committee_id", "thomas_id"}:
                    org.add_identifier(committee[key], scheme=key)

                if "subcommittees" in committee:
                    for subcommittee in committee["subcommittees"]:
                        sub_org = Organization(subcommittee["name"], classification="committee", parent_id=org._id)

                        sub_org.add_identifier(subcommittee["thomas_id"], scheme="thomas")
                        sub_org.add_source(source)

                        for key in subcommittee.keys() & {"phone", "address"}:
                            sub_org.add_contact_detail(
                                type="voice", value=committee[key]
                            ) if key == "phone" else sub_org.add_contact_detail(type=key, value=committee[key])

                        yield sub_org

                yield org
开发者ID:influence-usa,项目名称:scrapers-us-federal,代码行数:37,代码来源:committee.py

示例3: _scrape_committee

# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_link [as 别名]
    def _scrape_committee(self, committee_name, link, chamber):
        """Scrape individual committee page and add members"""

        page = self.get(link).text
        page = lxml.html.fromstring(page)
        page.make_links_absolute(link)

        is_subcommittee = bool(page.xpath('//li/a[text()="Committee"]'))
        if is_subcommittee:
            # All TN subcommittees are just the name of the parent committee with " Subcommittee"
            # at the end
            parent_committee_name = re.sub(r'\s*(Study )?Subcommittee\s*', '', committee_name)
            com = Organization(
                    committee_name,
                    classification='committee',
                    parent_id=self.parents[parent_committee_name]
                    )
        else:
            com = Organization(
                committee_name,
                chamber=chamber,
                classification='committee',
            )
            self.parents[committee_name] = com._id

        OFFICER_SEARCH = '//h2[contains(text(), "Committee Officers")]/' \
                         'following-sibling::div/ul/li/a'
        MEMBER_SEARCH = '//h2[contains(text(), "Committee Members")]/' \
                        'following-sibling::div/ul/li/a'
        for a in (page.xpath(OFFICER_SEARCH) + page.xpath(MEMBER_SEARCH)):

            member_name = ' '.join([
                    x.strip() for x in
                    a.xpath('text()') + a.xpath('span/text()')
                    if x.strip()
                    ])
            role = a.xpath('small')
            if role:
                role = role[0].xpath('text()')[0].strip()
            else:
                role = 'member'
            if '(Vacant)' in role:
                continue

            com.add_member(member_name, role)

        com.add_link(link)
        com.add_source(link)
        return com
开发者ID:sunlightlabs,项目名称:openstates,代码行数:51,代码来源:committees.py

示例4: scrape

# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_link [as 别名]
    def scrape(self):
        com_url = 'http://dccouncil.us/committees'
        data = self.get(com_url).text
        doc = lxml.html.fromstring(data)
        doc.make_links_absolute(com_url)

        comms = set(
            doc.xpath('//a[contains(@href, "dccouncil.us/committees/")]'))

        for committee in comms:
            url = committee.attrib['href']
            name = committee.text_content().strip()
            comm_data = self.get(url).text
            comm_page = lxml.html.fromstring(comm_data)
            comm_page.make_links_absolute(url)

            # classify these as belonging to the legislature
            committee = Organization(name=name, classification='committee',
                                     chamber='legislature')

            if comm_page.xpath('//p[@class="page-summary"]'):
                summary = comm_page.xpath(
                    '//p[@class="page-summary"]')[0].text_content().strip()
                committee.extras['summary'] = summary

            chair = comm_page.xpath(
                "//h4[text()='Chairperson']/following-sibling::p")
            chair_name = chair[0].text_content().strip()
            chair_name = self.remove_title(chair_name)
            committee.add_member(chair_name, role="chair")

            members = comm_page.xpath(
                "//h4[text()='Councilmembers']/following-sibling::ul")
            members = members[0].xpath("./li")

            for m in members:
                mem_name = m.text_content().strip()
                mem_name = self.remove_title(mem_name)
                if mem_name != chair_name:
                    committee.add_member(mem_name)

            committee.add_source(url)
            committee.add_link(url, note='Official Website')

            if not committee._related:
                self.warning('empty committee: %s;', name)
            else:
                yield committee
开发者ID:sunlightlabs,项目名称:openstates,代码行数:50,代码来源:committees.py

示例5: get_organizations

# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_link [as 别名]
    def get_organizations(self):

        secretary_of_the_commonwealth = Organization(
            name="Office of the Secretary of the Commonwealth, Commonwealth of Virginia",
            classification="office"
        )
        secretary_of_the_commonwealth.add_contact_detail(
            type="voice",
            value="804-786-2441"
        )
        secretary_of_the_commonwealth.add_contact_detail(
            type="address",
            value="1111 East Broad Street, 4th Floor, Richmond, Virginia 23219"
        )
        secretary_of_the_commonwealth.add_link(
            url="https://commonwealth.virginia.gov/",
            note="Home page"
        )

        self._secretary_of_the_commonwealth = secretary_of_the_commonwealth

        yield secretary_of_the_commonwealth
开发者ID:TomBaxter,项目名称:scrapers-us-state,代码行数:24,代码来源:__init__.py

示例6: scrape_joint_committee

# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_link [as 别名]
    def scrape_joint_committee(self, committee_name, url):
        if 'state.tn.us' in url:
            com = Organization(
                committee_name,
                chamber='joint',
                classification='committee',
            )
            try:
                page = self.get(url).text
            except requests.exceptions.ConnectionError:
                self.logger.warning("Committee link is broken, skipping")
                return

            page = lxml.html.fromstring(page)

            for el in page.xpath(
                "//div[@class='Blurb']/table//tr[2 <= position() and  position() < 10]/td[1]"
            ):
                if el.xpath('text()') == ['Vacant']:
                    continue

                (member_name, ) = el.xpath('a/text()')
                if el.xpath('text()'):
                    role = el.xpath('text()')[0].strip(' ,')
                else:
                    role = 'member'

                member_name = member_name.replace('Senator', '')
                member_name = member_name.replace('Representative', '')
                member_name = member_name.strip()
                com.add_member(member_name, role)

            com.add_link(url)
            com.add_source(url)
            return com

        elif 'gov-opps' in url:
            com = Organization(
                committee_name,
                chamber='joint',
                classification='committee',
            )
            page = self.get(url).text
            page = lxml.html.fromstring(page)

            links = ['senate', 'house']
            for link in links:
                chamber_link = self.base_href + '/' + link + '/committees/gov-opps.html'
                chamber_page = self.get(chamber_link).text
                chamber_page = lxml.html.fromstring(chamber_page)

                OFFICER_SEARCH = '//h2[contains(text(), "Committee Officers")]/' \
                                 'following-sibling::div/ul/li/a'
                MEMBER_SEARCH = '//h2[contains(text(), "Committee Members")]/' \
                                'following-sibling::div/ul/li/a'
                for a in (
                        chamber_page.xpath(OFFICER_SEARCH) +
                        chamber_page.xpath(MEMBER_SEARCH)
                        ):
                    member_name = ' '.join([
                            x.strip() for x in
                            a.xpath('.//text()')
                            if x.strip()
                            ])
                    role = a.xpath('small')
                    if role:
                        role = role[0].xpath('text()')[0].strip()
                        member_name = member_name.replace(role, '').strip()
                    else:
                        role = 'member'
                    com.add_member(member_name, role)

                com.add_source(chamber_link)

            com.add_link(url)
            com.add_source(url)
            return com

        else:
            return self._scrape_committee(committee_name, url, 'joint')
开发者ID:neelneelpurk,项目名称:openstates,代码行数:82,代码来源:committees.py

示例7: get_organizations

# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_link [as 别名]
    def get_organizations(self):
        legislature = Organization("United States Congress",
                                   classification='legislature')

        self._legislature = legislature

        yield legislature

        senate = Organization(
            name="United States Senate",
            classification='upper',
            parent_id=legislature._id,
        )

        self._senate = senate

        yield senate

        house = Organization(
            name="United States House",
            classification='lower',
            parent_id=legislature._id,
        )

        self._house = house

        yield house

        sopr = Organization(
            name="Office of Public Record, US Senate",
            classification="office",
            parent_id=senate._id,
        )

        sopr.add_contact_detail(type="voice",
                                value="202-224-0322")

        sopr.add_source(url="http://www.senate.gov/pagelayout/legislative/"
                            "one_item_and_teasers/opr.htm",
                        note="Profile page")

        sopr.add_source(url="http://www.senate.gov/pagelayout/legislative/"
                            "g_three_sections_with_teasers/lobbyingdisc.htm"
                            "#lobbyingdisc=lda",
                        note="Disclosure Home")

        sopr.add_link(url="http://soprweb.senate.gov/index.cfm"
                          "?event=selectfields",
                      note="Disclosure Search Portal")

        sopr.add_link(url="http://soprweb.senate.gov/",
                      note="Disclosure Electronic Filing System")

        self._sopr = sopr

        yield sopr

        house_clerk = Organization(
            name="Office of the Clerk, US House",
            classification="office",
            parent_id=house._id,
        )

        house_clerk.add_contact_detail(type="voice",
                                       value="202-225-7000")

        house_clerk.add_source(url="http://clerk.house.gov/",
                               note="Home page")

        self._house_clerk = house_clerk

        yield house_clerk
        yield legislature
开发者ID:influence-usa,项目名称:scrapers-us-federal,代码行数:75,代码来源:__init__.py


注:本文中的pupa.scrape.Organization.add_link方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。