当前位置: 首页>>代码示例>>Python>>正文


Python Event.add_agenda_item方法代码示例

本文整理汇总了Python中pupa.scrape.Event.add_agenda_item方法的典型用法代码示例。如果您正苦于以下问题:Python Event.add_agenda_item方法的具体用法?Python Event.add_agenda_item怎么用?Python Event.add_agenda_item使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pupa.scrape.Event的用法示例。


在下文中一共展示了Event.add_agenda_item方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: scrape_meeting_notice

# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_agenda_item [as 别名]
    def scrape_meeting_notice(self, chamber, item, url):
        # Since Event Name is not provided for all mettings.
        event_name = str(item['CommitteeName'])
        # 04/25/2012 03:00:00 PM
        fmt = "%m/%d/%y %I:%M %p"
        start_time = dt.datetime.strptime(str(item['MeetingDateTime']), fmt)
        location_name = str(item['AddressAliasNickname'])
        event = Event(location_name=location_name,
                      start_date=self._tz.localize(start_time),
                      name=event_name,
                      description='Committee Meeting Status: {}'
                      .format(item['CommitteeMeetingStatusName'])
                      )

        event.add_source(url)
        event.add_committee(name=str(item['CommitteeName']), id=item['CommitteeId'])

        page_url = ("http://legis.delaware.gov/json/MeetingNotice/"
                    "GetCommitteeMeetingItems?committeeMeetingId={}".format(
                        item['CommitteeMeetingId'])
                    )

        event.add_source(page_url)
        page_data = self.post(page_url).json()['Data']
        for item in page_data:
            event.add_agenda_item(description=str(item['ItemDescription']))
            event.add_person(name=str(item['PrimarySponsorShortName']),
                             id=str(item['PrimarySponsorPersonId']),
                             note='Sponsor')

        yield event
开发者ID:sunlightlabs,项目名称:openstates,代码行数:33,代码来源:events.py

示例2: scrape_meetings

# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_agenda_item [as 别名]
    def scrape_meetings(self, meetings, group):
        """
        Scrape and save event data from a list of meetings.

        Arguments:
        meetings -- A list of lxml elements containing event information
        group -- The type of meeting. The legislature site applies
                 different formatting to events based on which group
                 they correspond to.  `group` should be one of the
                 following strings: 'house', 'senate', or 'commission'.

        """
        for meeting in meetings:
            when = self.get_date(meeting)
            description = self.get_description(meeting)
            location = self.get_location(meeting)

            if when and description and location:
                event = Event(name=description, start_date=when.replace(tzinfo=self.tz),
                              description=description,
                              location_name=location)
                agenda = self.get_agenda(meeting)
                if agenda:
                    event.add_agenda_item(agenda)
                event.add_source(url)
                yield event
开发者ID:sunlightlabs,项目名称:openstates,代码行数:28,代码来源:events.py

示例3: parse_div

# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_agenda_item [as 别名]
    def parse_div(self, row, chamber, com):
        cal_link = row.xpath('.//a[.//span[@id="calendarmarker"]]/@href')[0]
        # event_date = row.xpath('string(.//div[contains(@class,"ItemDate")])').strip()
        title, location, start_date, end_date = self.parse_gcal(cal_link)

        event = Event(
            start_date=start_date,
            end_date=end_date,
            name=title,
            location_name=location,
        )

        event.add_source('http://mgaleg.maryland.gov/webmga/frmHearingSchedule.aspx')

        for item in row.xpath('.//div[@class="col-xs-12a Item"]'):
            description = item.xpath('string(.)').strip()
            agenda = event.add_agenda_item(description=description)

        for item in row.xpath('.//div[contains(@class,"ItemContainer")]/a'):
            description = item.xpath('string(.)').strip()
            agenda = event.add_agenda_item(description=description)

            event.add_document(
                description,
                item.xpath('@href')[0],
                media_type="application/pdf",
                on_duplicate="ignore"
            )

        for item in row.xpath('.//div[contains(@class,"ItemContainer")]'
                              '[./div[@class="col-xs-1 Item"]]'):
            description = item.xpath('string(.)').strip()
            agenda = event.add_agenda_item(description=description)

            bill = item.xpath('.//div[@class="col-xs-1 Item"]/a/text()')[0].strip()
            agenda.add_bill(bill)

        video = row.xpath('.//a[./span[@class="OnDemand"]]')
        if video:
            event.add_media_link(
                'Video of Hearing',
                video[0].xpath('@href')[0],
                'text/html'
            )

        if 'subcommittee' in title.lower():
            subcom = title.split('-')[0].strip()
            event.add_participant(
                subcom,
                type='committee',
                note='host',
            )
        else:
            event.add_participant(
                com,
                type='committee',
                note='host',
            )
        yield event
开发者ID:sunlightlabs,项目名称:openstates,代码行数:61,代码来源:events.py

示例4: scrape_upper

# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_agenda_item [as 别名]
    def scrape_upper(self):
        listing_url = 'https://www.senate.mo.gov/hearingsschedule/hrings.htm'

        html = self.get(listing_url).text

        # The HTML here isn't wrapped in a container per-event
        # which makes xpath a pain. So string split by <hr>
        # then parse each event's fragment for cleaner results
        for fragment in html.split('<hr />')[1:]:
            page = lxml.html.fromstring(fragment)

            when_date = self.row_content(page, 'Date:')
            when_time = self.row_content(page, 'Time:')
            location = self.row_content(page, 'Room:')

            location = '{}, {}'.format(
                location,
                '201 W Capitol Ave, Jefferson City, MO 65101'
            )

            # com = self.row_content(page, 'Committee:')
            com = page.xpath('//td[descendant::b[contains(text(),"Committee")]]/a/text()')[0]
            com = com.split(', Senator')[0].strip()

            start_date = self._TZ.localize(
                dateutil.parser.parse('{} {}'.format(when_date, when_time))
            )

            event = Event(
                start_date=start_date,
                name=com,
                location_name=location
            )

            event.add_source(listing_url)

            event.add_participant(
                com,
                type='committee',
                note='host',
            )

            for bill_table in page.xpath('//table[@width="85%" and @border="0"]'):
                bill_link = ''
                if bill_table.xpath(self.bill_link_xpath):
                    agenda_line = bill_table.xpath('string(tr[2])').strip()
                    agenda_item = event.add_agenda_item(description=agenda_line)

                    bill_link = bill_table.xpath(self.bill_link_xpath)[0].strip()
                    agenda_item.add_bill(bill_link)
                else:
                    agenda_line = bill_table.xpath('string(tr[1])').strip()
                    agenda_item = event.add_agenda_item(description=agenda_line)

            yield event
开发者ID:sunlightlabs,项目名称:openstates,代码行数:57,代码来源:events.py

示例5: scrape_event_page

# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_agenda_item [as 别名]
    def scrape_event_page(self, session, chamber, url, datetime):
        page = self.lxmlize(url)
        info = page.xpath("//p")
        metainfo = {}
        plaintext = ""
        for p in info:
            content = re.sub("\s+", " ", p.text_content())
            plaintext += content + "\n"
            if ":" in content:
                key, val = content.split(":", 1)
                metainfo[key.strip()] = val.strip()
        committee = metainfo['COMMITTEE']
        where = metainfo['PLACE']
        if "CHAIR" in where:
            where, chair = where.split("CHAIR:")
            metainfo['PLACE'] = where.strip()
            metainfo['CHAIR'] = chair.strip()

        chair = None
        if "CHAIR" in metainfo:
            chair = metainfo['CHAIR']

        plaintext = re.sub("\s+", " ", plaintext).strip()
        regexp = r"(S|J|H)(B|M|R) (\d+)"
        bills = re.findall(regexp, plaintext)

        event = Event(
            name=committee,
            start_date=self._tz.localize(datetime),
            location_name=where
        )

        event.add_source(url)
        event.add_participant(committee, type='committee', note='host')
        if chair is not None:
            event.add_participant(chair, type='legislator', note='chair')

        for bill in bills:
            chamber, type, number = bill
            bill_id = "%s%s %s" % (chamber, type, number)
            item = event.add_agenda_item('Bill up for discussion')
            item.add_bill(bill_id)

        event.add_agenda_item(plaintext)

        yield event
开发者ID:neelneelpurk,项目名称:openstates,代码行数:48,代码来源:events.py

示例6: scrape_event_page

# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_agenda_item [as 别名]
    def scrape_event_page(self, url, event_type):
        page = self.lxmlize(url)
        page.make_links_absolute('https://malegislature.gov/')

        title = page.xpath('string(//div[contains(@class,"followable")]/h1)')
        title = title.replace('Hearing Details', '').strip()
        title = title.replace('Special Event Details', '')

        start_day = page.xpath('string(//dl[contains(@class,"eventInformation")]/dd[2])').strip()
        start_time = page.xpath('string(//dl[contains(@class,"eventInformation")]/dd[3])').strip()

        location = page.xpath('string(//dl[contains(@class,"eventInformation")]/dd[4]//a)').strip()

        description = page.xpath('string(//dl[contains(@class,"eventInformation")]/dd[5])').strip()

        start_date = self._TZ.localize(
            dateutil.parser.parse(
                '{} {}'.format(start_day, start_time),
            )
        )

        event = Event(
            start_date=start_date,
            name=title,
            location_name=location,
            description=description
        )

        event.add_source(url)

        agenda_rows = page.xpath(
            '//div[contains(@class,"col-sm-8") and .//h2[contains(@class,"agendaHeader")]]'
            '/div/div/div[contains(@class,"panel-default")]')

        for row in agenda_rows:
            # only select the text node, not the spans
            agenda_title = row.xpath('string(.//h4/a/text()[normalize-space()])').strip()

            if agenda_title == '':
                agenda_title = row.xpath('string(.//h4/text()[normalize-space()])').strip()

            agenda = event.add_agenda_item(description=agenda_title)

            bills = row.xpath('.//tbody/tr/td[1]/a/text()')
            for bill in bills:
                bill = bill.strip().replace('.', ' ')
                agenda.add_bill(bill)

        if event_type == 'Hearing':
            event.add_participant(
                title,
                type='committee',
                note='host',
            )

        yield event
开发者ID:sunlightlabs,项目名称:openstates,代码行数:58,代码来源:events.py

示例7: scrape

# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_agenda_item [as 别名]
    def scrape(self):
        calendar_url = "http://dccouncil.us/calendar"
        data = self.get(calendar_url).text
        doc = lxml.html.fromstring(data)

        committee_regex = re.compile("(Committee .*?)will")

        event_list = doc.xpath("//div[@class='event-description-dev']")
        for event in event_list:
            place_and_time = event.xpath(".//div[@class='event-description-dev-metabox']/p/text()")
            when = " ".join([place_and_time[0].strip(), place_and_time[1].strip()])
            if len(place_and_time) > 2:
                location = place_and_time[2]
            else:
                location = "unknown"
            # when is now of the following format:
            # Wednesday, 2/25/2015 9:30am
            when = datetime.datetime.strptime(when, "%A, %m/%d/%Y %I:%M%p")
            description_content = event.xpath(".//div[@class='event-description-content-dev']")[0]
            description_lines = description_content.xpath("./*")
            name = description_lines[0].text_content()
            desc_without_title = " ".join(d.text_content() for d in description_lines[1:])
            description = re.sub(r'\s+', " ", description_content.text_content()).strip()
            potential_bills = description_content.xpath(".//li")

            committee = committee_regex.search(desc_without_title)
            event_type = 'other'
            if committee is not None:
                committee = committee.group(1).strip()
                event_type = 'committee:meeting'

            e = Event(name=name,
                      description=description,
                      start_date=self._tz.localize(when),
                      location_name=location,
                      classification=event_type,
                      )

            for b in potential_bills:
                bill = b.xpath("./a/text()")
                if len(bill) == 0:
                    continue
                bill = bill[0]
                bill_desc = b.text_content().replace(bill, "").strip(", ").strip()
                ses, num = bill.split("-")
                bill = ses.replace(" ", "") + "-" + num.zfill(4)
                item = e.add_agenda_item(bill_desc)
                item.add_bill(bill)

            e.add_source(calendar_url)

            if committee:
                e.add_participant(committee, type='organization', note='host')

            yield e
开发者ID:sunlightlabs,项目名称:openstates,代码行数:57,代码来源:events.py

示例8: scrape

# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_agenda_item [as 别名]
    def scrape(self, chamber=None):
        URL = 'http://utahlegislature.granicus.com/ViewPublisherRSS.php?view_id=2&mode=agendas'
        doc = self.lxmlize(URL)
        events = doc.xpath('//item')

        for info in events:
            title_and_date = info.xpath('title/text()')[0].split(" - ")
            title = title_and_date[0]
            when = title_and_date[-1]
            # if not when.endswith(session[ :len("20XX")]):
            #    continue

            event = Event(name=title,
                          start_date=self._tz.localize(datetime.datetime.strptime(when,
                                                                                  '%b %d, %Y')),
                          location_name='State Capitol'
                          )
            event.add_source(URL)

            url = re.search(r'(http://.*?)\s', info.text_content()).group(1)
            try:
                doc = self.lxmlize(url)
            except HTTPError:
                self.logger.warning("Page missing, skipping")
                continue
            event.add_source(url)

            committee = doc.xpath('//a[text()="View committee page"]/@href')
            if committee:
                committee_doc = self.lxmlize(committee[0])
                committee_name = committee_doc.xpath(
                        '//h3[@class="heading committee"]/text()')[0].strip()
                event.add_participant(committee_name, type='committee',
                                      note='host')

            documents = doc.xpath('.//td')
            for document in documents:
                url = re.search(r'(http://.*?pdf)', document.xpath('@onclick')[0])
                if url is None:
                    continue
                url = url.group(1)
                event.add_document(
                        note=document.xpath('text()')[0],
                        url=url,
                        media_type='application/pdf'
                        )
                bills = document.xpath('@onclick')
                for bill in bills:
                    if "bills/static" in bill:
                        bill_name = bill.split("/")[-1].split(".")[0]
                        item = event.add_agenda_item('Bill up for discussion')
                        item.add_bill(bill_name)
            yield event
开发者ID:neelneelpurk,项目名称:openstates,代码行数:55,代码来源:events.py

示例9: scrape

# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_agenda_item [as 别名]
    def scrape(self):
        tz = pytz.timezone("US/Eastern")
        get_short_codes(self)
        page = self.lxmlize(URL)
        table = page.xpath(
            "//table[@id='ctl00_ContentPlaceHolderCol1_GridView1']")[0]

        for event in table.xpath(".//tr")[1:]:
            tds = event.xpath("./td")
            committee = tds[0].text_content().strip()
            descr = [x.text_content() for x in tds[1].xpath(".//span")]
            if len(descr) != 1:
                raise Exception
            descr = descr[0].replace('.', '').strip()
            when = tds[2].text_content().strip()
            where = tds[3].text_content().strip()
            notice = tds[4].xpath(".//a")[0]
            notice_href = notice.attrib['href']
            notice_name = notice.text
            when = dt.datetime.strptime(when, "%m/%d/%Y %I:%M %p")
            when = pytz.utc.localize(when)
            event = Event(name=descr, start_time=when, classification='committee-meeting',
                          description=descr, location_name=where, timezone=tz.zone)

            if "/" in committee:
                committees = committee.split("/")
            else:
                committees = [committee]

            for committee in committees:
                if "INFO" not in committee:
                    committee = self.short_ids.get("committee", {"chamber": "unknown",
                                                                 "name": committee})

                else:
                    committee = {
                        "chamber": "joint",
                        "name": committee,
                    }
                event.add_committee(committee['name'], note='host')

            event.add_source(URL)
            event.add_document(notice_name,
                               notice_href,
                               media_type='text/html')
            for bill in self.get_related_bills(notice_href):
                a = event.add_agenda_item(description=bill['descr'])
                a.add_bill(
                    bill['bill_id'],
                    note=bill['type']
                )
            yield event
开发者ID:cliftonmcintosh,项目名称:openstates,代码行数:54,代码来源:events.py

示例10: scrape_page

# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_agenda_item [as 别名]
    def scrape_page(self, url, session, chamber):
        html = self.get(url).text
        doc = lxml.html.fromstring(html)
        doc.make_links_absolute(url)

        ctty_name = doc.xpath("//span[@class='heading']")[0].text_content()

        tables = doc.xpath("//table[@cellpadding='3']")
        info = tables[0]
        rows = info.xpath(".//tr")
        metainf = {}
        for row in rows:
            tds = row.xpath(".//td")
            key = tds[0].text_content().strip()
            value = tds[1].text_content().strip()
            metainf[key] = value

        where = metainf['Location:']
        subject_matter = metainf['Subject Matter:']
        description = "{}, {}".format(ctty_name, subject_matter)

        datetime = metainf['Scheduled Date:']
        datetime = re.sub("\s+", " ", datetime)
        repl = {
            "AM": " AM",
            "PM": " PM"  # Space shim.
        }
        for r in repl:
            datetime = datetime.replace(r, repl[r])
        datetime = self.localize(dt.datetime.strptime(datetime, "%b %d, %Y %I:%M %p"))

        event = Event(description,
                      start_date=datetime,
                      location_name=where)
        event.add_source(url)

        if ctty_name.startswith('Hearing Notice For'):
            ctty_name.replace('Hearing Notice For', '')
        event.add_participant(ctty_name, 'organization')

        bills = tables[1]
        for bill in bills.xpath(".//tr")[1:]:
            tds = bill.xpath(".//td")
            if len(tds) < 4:
                continue
            # First, let's get the bill ID:
            bill_id = tds[0].text_content()
            agenda_item = event.add_agenda_item(bill_id)
            agenda_item.add_bill(bill_id)

        return event
开发者ID:neelneelpurk,项目名称:openstates,代码行数:53,代码来源:events.py

示例11: parse_event

# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_agenda_item [as 别名]
    def parse_event(self, row, chamber):
        # sample event available at http://www.akleg.gov/apptester.html
        committee_code = row.xpath('string(Sponsor)').strip()
        committee_name = '{} {}'.format(
                self.COMMITTEES_PRETTY[chamber],
                self.COMMITTEES[chamber][committee_code]['name']
            )

        name = '{} {}'.format(
            self.COMMITTEES_PRETTY[chamber],
            row.xpath('string(Title)').strip()
        )

        # If name is missing, make it "<CHAMBER> <COMMITTEE NAME>"
        if name == '':
            name = committee_name

        location = row.xpath('string(Location)').strip()

        # events with no location all seem to be committee hearings
        if location == '':
            location = 'Alaska State Capitol, 120 4th St, Juneau, AK 99801'

        start_date = dateutil.parser.parse(row.xpath('string(Schedule)'))
        # todo: do i need to self._TZ.localize() ?

        event = Event(
            start_date=start_date,
            name=name,
            location_name=location
        )

        event.add_source('http://w3.akleg.gov/index.php#tab4')

        event.add_participant(
            committee_name,
            type='committee',
            note='host',
        )

        for item in row.xpath('Agenda/Item'):
            agenda_desc = item.xpath('string(Text)').strip()
            if agenda_desc != '':
                agenda_item = event.add_agenda_item(description=agenda_desc)
                if item.xpath('BillRoot'):
                    bill_id = item.xpath('string(BillRoot)')
                    # AK Bill ids have a bunch of extra spaces
                    bill_id = re.sub(r'\s+', ' ', bill_id)
                    agenda_item.add_bill(bill_id)

        yield event
开发者ID:sunlightlabs,项目名称:openstates,代码行数:53,代码来源:events.py

示例12: scrape_lower_item

# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_agenda_item [as 别名]
    def scrape_lower_item(self, page):
        # print(lxml.etree.tostring(page, pretty_print=True))
        com = self.table_row_content(page, 'Committee:')
        when_date = self.table_row_content(page, 'Date:')
        when_time = self.table_row_content(page, 'Time:')
        location = self.table_row_content(page, 'Location:')

        if 'house hearing room' in location.lower():
            location = '{}, {}'.format(
                location,
                '201 W Capitol Ave, Jefferson City, MO 65101'
            )

        # fix some broken times, e.g. '12 :00'
        when_time = when_time.replace(' :', ':')

        # some times have extra info after the AM/PM
        if 'upon' in when_time:
            when_time = when_time.split('AM', 1)[0]
            when_time = when_time.split('PM', 1)[0]

        start_date = self._TZ.localize(
            dateutil.parser.parse('{} {}'.format(when_date, when_time))
        )

        event = Event(
            start_date=start_date,
            name=com,
            location_name=location
        )

        event.add_source('https://house.mo.gov/HearingsTimeOrder.aspx')

        event.add_participant(
            com,
            type='committee',
            note='host',
        )

        # different from general MO link xpath due to the <b>
        house_link_xpath = './/a[contains(@href, "Bill.aspx") ' \
            'or contains(@href, "bill.aspx")]/b/text()'

        for bill_title in page.xpath(house_link_xpath):
            bill_no = bill_title.split('--')[0].strip()
            bill_no = bill_no.replace('HCS', '').strip()

            agenda_item = event.add_agenda_item(description=bill_title)
            agenda_item.add_bill(bill_no)

        yield event
开发者ID:sunlightlabs,项目名称:openstates,代码行数:53,代码来源:events.py

示例13: scrape_chamber

# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_agenda_item [as 别名]
    def scrape_chamber(self, chamber):
        url = utils.urls['events'][chamber]
        page = self.get(url).text
        page = lxml.html.fromstring(page)
        page.make_links_absolute(url)

        for table in page.xpath('//table[@class="CMS-MeetingDetail-CurrMeeting"]'):
            date_string = table.xpath('ancestor::div[@class="CMS-MeetingDetail"]/div/a/@name')[0]
            for row in table.xpath('tr'):
                time_string = row.xpath('td[@class="CMS-MeetingDetail-Time"]/text()')[0].strip()
                description = row.xpath(
                    'td[@class="CMS-MeetingDetail-Agenda"]/div/div'
                )[-1].text_content().strip()
                location = row.xpath(
                    'td[@class="CMS-MeetingDetail-Location"]'
                )[0].text_content().strip()
                committees = row.xpath('.//div[@class="CMS-MeetingDetail-Agenda-CommitteeName"]/a')
                bills = row.xpath('.//a[contains(@href, "billinfo")]')

                try:
                    start_time = datetime.datetime.strptime(
                        '{} {}'.format(date_string, time_string),
                        '%m/%d/%Y %I:%M %p',
                    )
                except ValueError:
                    break

                event = Event(
                    name=description,
                    start_time=self._tz.localize(start_time),
                    location_name=location,
                    timezone=self._tz.zone,
                )
                event.add_source(url)

                if bills or committees:
                    item = event.add_agenda_item(description)
                    for bill in bills:
                        parsed = urllib.parse.urlparse(bill.get('href'))
                        qs = urllib.parse.parse_qs(parsed.query)
                        item.add_bill('{}{} {}'.format(qs['body'], qs['type'], qs['bn']))
                    for committee in committees:
                        parsed = urllib.parse.urlparse(committee.get('href'))
                        qs = urllib.parse.parse_qs(parsed.query)
                        item.add_committee(
                            re.sub(r' \([S|H]\)$', '', committee.text),
                            id=qs.get('Code'),
                        )

                yield event
开发者ID:cliftonmcintosh,项目名称:openstates,代码行数:52,代码来源:events.py

示例14: scrape_chamber

# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_agenda_item [as 别名]
    def scrape_chamber(self, chamber):
        grouped_hearings = defaultdict(list)

        for hearing in self.session.query(CACommitteeHearing):
            location = self.session.query(CALocation).filter_by(
                location_code=hearing.location_code)[0].description

            date = self._tz.localize(hearing.hearing_date)

            chamber_abbr = location[0:3]
            event_chamber = {'Asm': 'lower', 'Sen': 'upper'}[chamber_abbr]

            if event_chamber != chamber:
                continue

            grouped_hearings[(location, date)].append(hearing)

        for ((location, date), hearings) in grouped_hearings.items():

            # Get list of bill_ids from the database.
            bill_ids = [hearing.bill_id for hearing in hearings]
            bills = ["%s %s" % re.match(r'\d+([^\d]+)(\d+)', bill).groups()
                     for bill in bill_ids]

            # Dereference the committee_nr number and get display name.
            msg = 'More than one committee meeting at (location, date) %r'
            msg = msg % ((location, date),)
            assert len(set(hearing.committee_nr for hearing in hearings)) == 1, msg
            committee_name = _committee_nr[hearings.pop().committee_nr]

            desc = 'Committee Meeting: ' + committee_name
            event = Event(
                name=desc,
                start_date=date,
                location_name=committee_name,
            )
            for bill_id in bills:
                if 'B' in bill_id:
                    type_ = 'bill'
                else:
                    type_ = 'resolution'
                item = event.add_agenda_item('consideration')
                item.add_bill(bill_id, note=type_)

            event.add_person(committee_name + ' Committee', note='host')
            event.add_source('ftp://www.leginfo.ca.gov/pub/bill/')

            yield event
开发者ID:neelneelpurk,项目名称:openstates,代码行数:50,代码来源:events.py

示例15: scrape

# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_agenda_item [as 别名]
    def scrape(self):
        start = dt.datetime.utcnow()
        start = start - dt.timedelta(days=10)
        end = start + dt.timedelta(days=30)

        url = URL.format(**{"from": start.strftime("%Y/%m/%d"), "til": end.strftime("%Y/%m/%d")})

        page = self.lxmlize(url)
        events = page.xpath("//ul[contains(@class, 'committee-events')]//li")

        for event in events:
            string = event.text_content()

            po = CLICK_INFO.match(event.xpath(".//span")[0].attrib["onclick"])
            if po is None:
                continue

            poid = po.groupdict()["info_id"]  # This is used to get more deetz on

            popage = self.popOverUrl(poid)
            when = dt.datetime.strptime(popage.xpath("//strong")[0].text, "%B %d, %Y @ %I:%M %p")
            who = popage.xpath("//h1")[0].text
            related = []

            for item in popage.xpath("//div"):
                t = item.text
                if t is None:
                    continue

                t = t.strip()
                for related_entity in ORD_INFO.findall(t):
                    related.append({"ord_no": related_entity, "what": t})

            e = Event(name=who, when=when, location="unknown")
            e.add_source(url)

            for o in related:
                i = e.add_agenda_item(o["what"])
                i.add_bill(o["ord_no"], note="consideration")

            yield e
开发者ID:dtpeters,项目名称:scrapers-us-municipal,代码行数:43,代码来源:events.py


注:本文中的pupa.scrape.Event.add_agenda_item方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。