当前位置: 首页>>代码示例>>Python>>正文


Python Event.extras方法代码示例

本文整理汇总了Python中pupa.scrape.Event.extras方法的典型用法代码示例。如果您正苦于以下问题:Python Event.extras方法的具体用法?Python Event.extras怎么用?Python Event.extras使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pupa.scrape.Event的用法示例。


在下文中一共展示了Event.extras方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: scrape_events_range

# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import extras [as 别名]
    def scrape_events_range(self, start_date, end_date):

        def daterange(start_date, end_date):
            number_of_days = int((end_date - start_date).days)
            for n in range(number_of_days):
                yield start_date + datetime.timedelta(n)

        for date in daterange(start_date, end_date):
            calendar_day_url = CALENDAR_DAY_TEMPLATE.format(date.year, date.month - 1, date.day)
            events = self.extract_events_by_url(calendar_day_url)
            for event in events:
                tz = pytz.timezone("America/Toronto")
                time = datetime.datetime.strptime(event['time'], '%I:%M %p')
                start = tz.localize(date.replace(hour=time.hour, minute=time.minute, second=0, microsecond=0))
                org_name = event['meeting']
                e = Event(
                    name=org_name,
                    start_time=start,
                    timezone=tz.zone,
                    location_name=event['location'],
                    status=STATUS_DICT.get(event['meeting_status'])
                )
                e.extras = {
                    'meeting_number': event['no'],
                    'tmmis_meeting_id': event['meeting_id'],
                }
                e.add_source(calendar_day_url)
                e.add_participant(
                    name=org_name,
                    type='organization',
                )

                def is_agenda_available(event):
                    return event['publishing_status'] in ['Agenda Published', 'Minutes Published']

                def is_council(event):
                    return True if event['meeting'] == self.jurisdiction.name else False

                if is_agenda_available(event):
                    agenda_url_template = AGENDA_FULL_COUNCIL_TEMPLATE if is_council(event) else AGENDA_FULL_STANDARD_TEMPLATE
                    agenda_url = agenda_url_template.format(event['meeting_id'])
                    full_identifiers = list(self.full_identifiers(event['meeting_id'], is_council(event)))

                    e.add_source(agenda_url)
                    agenda_items = self.agenda_from_url(agenda_url)
                    for i, item in enumerate(agenda_items):

                        a = e.add_agenda_item(item['title'])
                        a.add_classification(item['type'].lower())
                        a['order'] = str(i)

                        def normalize_wards(raw):
                            if not raw:
                                raw = 'All'
                            if raw == 'All':
                                return raw.lower()
                            else:
                                return raw.split(', ')

                        identifier_regex = re.compile(r'^[0-9]{4}\.([A-Z]{2}[0-9]+\.[0-9]+)$')
                        [full_identifier] = [id for id in full_identifiers if identifier_regex.match(id).group(1) == item['identifier']]
                        a.add_bill(full_identifier)

                yield e
开发者ID:opencivicdata,项目名称:scrapers-ca,代码行数:66,代码来源:events-incremental.py

示例2: scrape

# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import extras [as 别名]
    def scrape(self, window=None) :
        if window:
            n_days_ago = datetime.datetime.utcnow() - datetime.timedelta(float(window))
        else:
            n_days_ago = None

        events = self.events(n_days_ago)

        for event, web_event in self._merge_events(events):
            body_name = event["EventBodyName"]

            if 'Board of Directors -' in body_name:
                body_name, event_name = [part.strip()
                                         for part
                                         in body_name.split('-')]
            else:
                event_name = body_name

            # Events can have an EventAgendaStatusName of "Final", "Final Revised", 
            # and "Final 2nd Revised."
            # We classify these events as "passed."
            status_name = event['EventAgendaStatusName']
            if status_name.startswith('Final'):
                status = 'passed'
            elif status_name == 'Draft':
                status = 'confirmed'
            elif status_name == 'Canceled':
                status = 'cancelled'
            else:
                status = 'tentative'

            location = event["EventLocation"]

            if not location:
                # We expect some events to have no location. LA Metro would
                # like these displayed in the Councilmatic interface. However,
                # OCD requires a value for this field. Add a sane default.
                location = 'Not available'

            e = Event(event_name,
                      start_date=event["start"],
                      description='',
                      location_name=location,
                      status=status)

            e.pupa_id = str(event['EventId'])

            # Metro requires the EventGuid to build out MediaPlayer links.
            # Add both the English event GUID, and the Spanish event GUID if
            # it exists, to the extras dict.
            e.extras = {'guid': event['EventGuid']}

            legistar_api_url = self.BASE_URL + '/events/{0}'.format(event['EventId'])
            e.add_source(legistar_api_url, note='api')

            if event.get('SAPEventGuid'):
                e.extras['sap_guid'] = event['SAPEventGuid']

            if 'event_details' in event:
                # if there is not a meeting detail page on legistar
                # don't capture the agenda data from the API
                for item in self.agenda(event):
                    agenda_item = e.add_agenda_item(item["EventItemTitle"])
                    if item["EventItemMatterFile"]:
                        identifier = item["EventItemMatterFile"]
                        agenda_item.add_bill(identifier)

                    if item["EventItemAgendaNumber"]:
                        # To the notes field, add the item number as given in the agenda minutes
                        note = "Agenda number, {}".format(item["EventItemAgendaNumber"])
                        agenda_item['notes'].append(note)

                    # The EventItemAgendaSequence provides 
                    # the line number of the Legistar agenda grid.
                    agenda_item['extras']['item_agenda_sequence'] = item['EventItemAgendaSequence']

                # Historically, the Legistar system has duplicated the EventItemAgendaSequence,
                # resulting in data inaccuracies. The scrape should fail in such cases, until Metro
                # cleans the data.
                item_agenda_sequences = [item['extras']['item_agenda_sequence'] for item in e.agenda]
                if len(item_agenda_sequences) != len(set(item_agenda_sequences)):
                    error_msg = 'An agenda has duplicate agenda items on the Legistar grid: \
                        {event_name} on {event_date} ({legistar_api_url}). \
                        Contact Metro, and ask them to remove the duplicate EventItemAgendaSequence.'

                    raise ValueError(error_msg.format(event_name=e.name, 
                                                      event_date=e.start_date.strftime("%B %d, %Y"),
                                                      legistar_api_url=legistar_api_url))

            e.add_participant(name=body_name,
                              type="organization")

            if event.get('SAPEventId'):
                e.add_source(self.BASE_URL + '/events/{0}'.format(event['SAPEventId']),
                             note='api (sap)')

            if event['EventAgendaFile']:
                e.add_document(note= 'Agenda',
                               url = event['EventAgendaFile'],
                               media_type="application/pdf")
#.........这里部分代码省略.........
开发者ID:datamade,项目名称:scrapers-us-municipal,代码行数:103,代码来源:events.py

示例3: scrape_events_range

# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import extras [as 别名]
    def scrape_events_range(self, start_date, end_date):

        def daterange(start_date, end_date):
            number_of_days = int((end_date - start_date).days)
            for n in range(number_of_days):
                yield start_date + dt.timedelta(n)

        for date in daterange(start_date, end_date):
            events = self.extract_events_by_day(date)
            for event in events:
                tz = pytz.timezone("America/Toronto")
                time = dt.datetime.strptime(event['time'], '%I:%M %p')
                start = tz.localize(date.replace(hour=time.hour, minute=time.minute, second=0, microsecond=0))
                source_url = CALENDAR_DAY_TEMPLATE.format(start.year, start.month, start.day)
                org_name = event['meeting']
                e = Event(
                    name = org_name,
                    start_time = start,
                    timezone = tz.zone,
                    location_name = event['location'],
                    status=STATUS_DICT.get(event['meeting_status'])
                    )
                e.add_source(source_url)
                e.extras = {
                    'meeting_number': event['no'],
                    'tmmis_meeting_id': event['meeting_id'],
                    }
                e.add_participant(
                    name = org_name,
                    type = 'organization',
                    )

                def is_agenda_available(event):
                    return event['publishing_status'] in ['Agenda Published', 'Minutes Published']

                def is_council(event):
                    return True if event['meeting'] == self.jurisdiction.name else False

                if is_agenda_available(event):
                    template = AGENDA_FULL_COUNCIL_TEMPLATE if is_council(event) else AGENDA_FULL_STANDARD_TEMPLATE
                    agenda_url = template.format(event['meeting_id'])
                    full_identifiers = list(self.full_identifiers(event['meeting_id'], is_council(event)))

                    e.add_source(agenda_url)
                    agenda_items = self.agenda_from_url(agenda_url)
                    for i, item in enumerate(agenda_items):

                        a = e.add_agenda_item(item['title'])
                        a.add_classification(item['type'].lower())
                        a['order'] = str(i)

                        def normalize_wards(raw):
                            if not raw: raw = 'All'
                            if raw == 'All':
                                return raw.lower()
                            else:
                                return raw.split(', ')

                        wards = normalize_wards(item['wards'])
                        identifier_regex = re.compile(r'^[0-9]{4}\.([A-Z]{2}[0-9]+\.[0-9]+)$')
                        [full_identifier] = [id for id in full_identifiers if identifier_regex.match(id).group(1) == item['identifier']]
                        a.add_bill(full_identifier)
                        if full_identifier not in self.seen_agenda_items:
                            b = Bill(
                                # TODO: Fix this hardcode
                                legislative_session = '2014-2018',
                                identifier = full_identifier,
                                title = item['title'],
                                from_organization = {'name': self.jurisdiction.name},
                                )
                            b.add_source(agenda_url)
                            b.add_document_link(note='canonical', media_type='text/html', url=AGENDA_ITEM_TEMPLATE.format(full_identifier))
                            b.extras = {
                                'wards': wards,
                                }

                            self.seen_agenda_items.append(full_identifier)

                            yield b

                yield e
开发者ID:dogooderapp,项目名称:scrapers-ca,代码行数:83,代码来源:events-incremental.py

示例4: scrape

# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import extras [as 别名]
    def scrape(self):
        last_events = deque(maxlen=10)
        for event, agenda in self.events(since=2011) :
            other_orgs = ''
            extras = []

            if '--em--' in event[u'Meeting Location'] :
                location_string, note = event[u'Meeting Location'].split('--em--')[:2]
                for each in note.split(' - ') :
                    if each.startswith('Join') :
                        other_orgs = each
                    else :
                        extras.append(each)
            else :
                location_string = event[u'Meeting Location'] 
            
            location_list = location_string.split('-', 2)
            location = ', '.join([each.strip() for each in location_list[0:2]])
            if not location :
                continue

            when = self.toTime(event[u'Meeting Date'])

            event_time = event['iCalendar'].subcomponents[0]['DTSTART'].dt
            when = when.replace(hour=event_time.hour,
                                minute=event_time.minute)

            time_string = event['Meeting Time']
            if time_string in ('Deferred',) :
                status = 'cancelled'
            elif self.now() < when :
                status = 'confirmed'
            else :
                status = 'passed'

            description = event['Meeting\xa0Topic']
            if any(each in description 
                   for each 
                   in ('Multiple meeting items',
                       'AGENDA TO BE ANNOUNCED')) :
                description = ''

            event_name = event['Name']

            event_id = (event_name, when)

            if event_id in last_events :
                continue
            else :
                last_events.append(event_id)

            e = Event(name=event_name,
                      start_time=when,
                      timezone=self.TIMEZONE,
                      description=description,
                      location_name=location,
                      status=status)

            if extras :
                e.extras = {'location note' : ' '.join(extras)}

            if event['Multimedia'] != 'Not\xa0available' : 
                e.add_media_link(note='Recording',
                                 url = event['Multimedia']['url'],
                                 type="recording",
                                 media_type = 'text/html')

            self.addDocs(e, event, 'Agenda')
            self.addDocs(e, event, 'Minutes')

            if event['Name'] == 'City Council Stated Meeting' :
                participating_orgs = ['New York City Council']
            elif 'committee' in event['Name'].lower() :
                participating_orgs = [event["Name"]]
            else :
                participating_orgs = []

            if other_orgs : 
                other_orgs = re.sub('Jointl*y with the ', '', other_orgs)
                participating_orgs += re.split(' and the |, the ', other_orgs)
 
            for org in participating_orgs :
                e.add_committee(name=org)

            if agenda :
                e.add_source(event["Meeting Details"]['url'])

                
                for item, _, _ in agenda :
                    if item["Name"] :
                        agenda_item = e.add_agenda_item(item["Name"])
                        if item["File\xa0#"] :
                            if item['Action'] :
                                note = item['Action']
                            else :
                                note = 'consideration'
                            agenda_item.add_bill(item["File\xa0#"]['label'],
                                                 note=note)
            else :
                e.add_source(self.EVENTSPAGE)
#.........这里部分代码省略.........
开发者ID:dtpeters,项目名称:scrapers-us-municipal,代码行数:103,代码来源:events.py

示例5: lower_parse_page

# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import extras [as 别名]

#.........这里部分代码省略.........
                    date = re.sub(r"\s+", " ", date)
                    date = re.sub(".*POSTPONED NEW DATE", "", date).strip()

                # Due to the html structure this shouldn't be an elif
                # It needs to fire twice in the same loop iteration
                if value.tag == 'th' and value.get("class") == 'commtitle':
                    coms = value.xpath('.//div[contains(@class,"comm-txt")]/text()')

                elif key.tag == 'td':
                    key = key.text_content().strip()
                    value = value.text_content().strip()
                    value = value.replace(u'\x96', '-')
                    value = re.sub(r"\s+", " ", value)
                    metainf[key] = value

            time = metainf['Time:']
            repl = {
                "A.M.": "AM",
                "P.M.": "PM",
            }
            drepl = {
                "Sept": "Sep"
            }
            for r in repl:
                time = time.replace(r, repl[r])

            for r in drepl:
                date = date.replace(r, drepl[r])

            time = re.sub("-.*", "", time)
            time = time.strip()

            year = dt.datetime.now().year

            date = "%s %s %s" % (
                date,
                year,
                time
            )

            if "tbd" in date.lower():
                continue

            date = date.replace(' PLEASE NOTE NEW TIME', '')

            # Check if the event has been postponed.
            postponed = 'POSTPONED' in date
            if postponed:
                date = date.replace(' POSTPONED', '')

            date_formats = ["%B %d %Y %I:%M %p", "%b. %d %Y %I:%M %p"]
            datetime = None
            for fmt in date_formats:
                try:
                    datetime = dt.datetime.strptime(date, fmt)
                except ValueError:
                    pass

            # If the datetime can't be parsed, bail.
            if datetime is None:
                return

            title_key = set(metainf) & set([
                'Public Hearing:', 'Summit:', 'Roundtable:',
                'Public Roundtable:', 'Public Meeting:', 'Public Forum:',
                'Meeting:'])
            assert len(title_key) == 1, "Couldn't determine event title."
            title_key = list(title_key).pop()
            title = metainf[title_key]

            title = re.sub(
                r"\*\*Click here to view public hearing notice\*\*",
                "",
                title
            )

            # If event was postponed, add a warning to the title.
            if postponed:
                title = 'POSTPONED: %s' % title

            event = Event(
                name=title,
                start_date=self._tz.localize(datetime),
                location_name=metainf['Place:'],
            )
            event.extras = {'contact': metainf['Contact:']}
            if 'Media Contact:' in metainf:
                event.extras.update(media_contact=metainf['Media Contact:'])
            event.add_source(url)

            for com in coms:
                event.add_participant(
                    com.strip(),
                    type='committee',
                    note='host',
                )
                participant = event.participants[-1]
                participant['extras'] = {'chamber': self.classify_committee(com)},

            yield event
开发者ID:neelneelpurk,项目名称:openstates,代码行数:104,代码来源:events.py

示例6: scrape_events_range

# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import extras [as 别名]
    def scrape_events_range(self, start_date, end_date):
        def daterange(start_date, end_date):
            number_of_days = int((end_date - start_date).days)
            for n in range(number_of_days):
                yield start_date + dt.timedelta(n)

        for date in daterange(start_date, end_date):
            calendar_day_url = CALENDAR_DAY_TEMPLATE.format(date.year, date.month - 1, date.day)
            events = self.extract_events_by_url(calendar_day_url)
            for event in events:
                tz = pytz.timezone("America/Toronto")
                time = dt.datetime.strptime(event["time"], "%I:%M %p")
                start = tz.localize(date.replace(hour=time.hour, minute=time.minute, second=0, microsecond=0))
                org_name = event["meeting"]
                e = Event(
                    name=org_name,
                    start_time=start,
                    timezone=tz.zone,
                    location_name=event["location"],
                    status=STATUS_DICT.get(event["meeting_status"]),
                )
                e.extras = {"meeting_number": event["no"], "tmmis_meeting_id": event["meeting_id"]}
                e.add_source(calendar_day_url)
                e.add_participant(name=org_name, type="organization")

                def is_agenda_available(event):
                    return event["publishing_status"] in ["Agenda Published", "Minutes Published"]

                def is_council(event):
                    return True if event["meeting"] == self.jurisdiction.name else False

                if is_agenda_available(event):
                    agenda_url_template = (
                        AGENDA_FULL_COUNCIL_TEMPLATE if is_council(event) else AGENDA_FULL_STANDARD_TEMPLATE
                    )
                    agenda_url = agenda_url_template.format(event["meeting_id"])
                    full_identifiers = list(self.full_identifiers(event["meeting_id"], is_council(event)))

                    event_map_url_template = (
                        "http://app.toronto.ca/tmmis/getAddressList.do?function=getMeetingAddressList&meetingId={}"
                    )
                    event_map_url = event_map_url_template.format(event["meeting_id"])
                    addresses_d = self.addressesByAgendaId(event_map_url)

                    e.add_source(agenda_url)
                    agenda_items = self.agenda_from_url(agenda_url)
                    for i, item in enumerate(agenda_items):

                        a = e.add_agenda_item(item["title"])
                        a.add_classification(item["type"].lower())
                        a["order"] = str(i)

                        def normalize_wards(raw):
                            if not raw:
                                raw = "All"
                            if raw == "All":
                                return raw.lower()
                            else:
                                return raw.split(", ")

                        wards = normalize_wards(item["wards"])
                        identifier_regex = re.compile(r"^[0-9]{4}\.([A-Z]{2}[0-9]+\.[0-9]+)$")
                        [full_identifier] = [
                            id for id in full_identifiers if identifier_regex.match(id).group(1) == item["identifier"]
                        ]
                        a.add_bill(full_identifier)
                        if full_identifier not in self.seen_agenda_items:
                            b = Bill(
                                # TODO: Fix this hardcode
                                legislative_session="2014-2018",
                                identifier=full_identifier,
                                title=item["title"],
                                from_organization={"name": self.jurisdiction.name},
                            )
                            b.add_source(agenda_url)
                            b.add_document_link(
                                note="canonical",
                                media_type="text/html",
                                url=AGENDA_ITEM_TEMPLATE.format(full_identifier),
                            )
                            b.extras["wards"] = wards

                            addresses = addresses_d.get(full_identifier)
                            if addresses:
                                b.extras["locations"] = []
                                for address in addresses:
                                    location = {"address": {"full_address": address}}
                                    b.extras["locations"].append(location)

                            self.seen_agenda_items.append(full_identifier)

                            yield b

                yield e
开发者ID:tor-councilmatic,项目名称:scrapers-ca,代码行数:96,代码来源:events-incremental.py


注:本文中的pupa.scrape.Event.extras方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。