当前位置: 首页>>代码示例>>Python>>正文


Python Event.add_media_link方法代码示例

本文整理汇总了Python中pupa.scrape.Event.add_media_link方法的典型用法代码示例。如果您正苦于以下问题:Python Event.add_media_link方法的具体用法?Python Event.add_media_link怎么用?Python Event.add_media_link使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pupa.scrape.Event的用法示例。


在下文中一共展示了Event.add_media_link方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: parse_div

# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_media_link [as 别名]
    def parse_div(self, row, chamber, com):
        cal_link = row.xpath('.//a[.//span[@id="calendarmarker"]]/@href')[0]
        # event_date = row.xpath('string(.//div[contains(@class,"ItemDate")])').strip()
        title, location, start_date, end_date = self.parse_gcal(cal_link)

        event = Event(
            start_date=start_date,
            end_date=end_date,
            name=title,
            location_name=location,
        )

        event.add_source('http://mgaleg.maryland.gov/webmga/frmHearingSchedule.aspx')

        for item in row.xpath('.//div[@class="col-xs-12a Item"]'):
            description = item.xpath('string(.)').strip()
            agenda = event.add_agenda_item(description=description)

        for item in row.xpath('.//div[contains(@class,"ItemContainer")]/a'):
            description = item.xpath('string(.)').strip()
            agenda = event.add_agenda_item(description=description)

            event.add_document(
                description,
                item.xpath('@href')[0],
                media_type="application/pdf",
                on_duplicate="ignore"
            )

        for item in row.xpath('.//div[contains(@class,"ItemContainer")]'
                              '[./div[@class="col-xs-1 Item"]]'):
            description = item.xpath('string(.)').strip()
            agenda = event.add_agenda_item(description=description)

            bill = item.xpath('.//div[@class="col-xs-1 Item"]/a/text()')[0].strip()
            agenda.add_bill(bill)

        video = row.xpath('.//a[./span[@class="OnDemand"]]')
        if video:
            event.add_media_link(
                'Video of Hearing',
                video[0].xpath('@href')[0],
                'text/html'
            )

        if 'subcommittee' in title.lower():
            subcom = title.split('-')[0].strip()
            event.add_participant(
                subcom,
                type='committee',
                note='host',
            )
        else:
            event.add_participant(
                com,
                type='committee',
                note='host',
            )
        yield event
开发者ID:sunlightlabs,项目名称:openstates,代码行数:61,代码来源:events.py

示例2: test_full_event

# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_media_link [as 别名]
def test_full_event():
    j = Jurisdiction.objects.create(id='jid', division_id='did')
    event = ScrapeEvent(name="America's Birthday", start_time="2014-07-04", location="America",
                        all_day=True)
    event.add_person("George Washington")
    event.add_media_link("fireworks", "http://example.com/fireworks.mov")

    EventImporter('jid').import_data([event.as_dict()])
开发者ID:Vanuan,项目名称:pupa,代码行数:10,代码来源:test_event_importer.py

示例3: scrape

# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_media_link [as 别名]

#.........这里部分代码省略.........

            if not location:
                # We expect some events to have no location. LA Metro would
                # like these displayed in the Councilmatic interface. However,
                # OCD requires a value for this field. Add a sane default.
                location = 'Not available'

            e = Event(event_name,
                      start_date=event["start"],
                      description='',
                      location_name=location,
                      status=status)

            e.pupa_id = str(event['EventId'])

            # Metro requires the EventGuid to build out MediaPlayer links.
            # Add both the English event GUID, and the Spanish event GUID if
            # it exists, to the extras dict.
            e.extras = {'guid': event['EventGuid']}

            legistar_api_url = self.BASE_URL + '/events/{0}'.format(event['EventId'])
            e.add_source(legistar_api_url, note='api')

            if event.get('SAPEventGuid'):
                e.extras['sap_guid'] = event['SAPEventGuid']

            if 'event_details' in event:
                # if there is not a meeting detail page on legistar
                # don't capture the agenda data from the API
                for item in self.agenda(event):
                    agenda_item = e.add_agenda_item(item["EventItemTitle"])
                    if item["EventItemMatterFile"]:
                        identifier = item["EventItemMatterFile"]
                        agenda_item.add_bill(identifier)

                    if item["EventItemAgendaNumber"]:
                        # To the notes field, add the item number as given in the agenda minutes
                        note = "Agenda number, {}".format(item["EventItemAgendaNumber"])
                        agenda_item['notes'].append(note)

                    # The EventItemAgendaSequence provides 
                    # the line number of the Legistar agenda grid.
                    agenda_item['extras']['item_agenda_sequence'] = item['EventItemAgendaSequence']

                # Historically, the Legistar system has duplicated the EventItemAgendaSequence,
                # resulting in data inaccuracies. The scrape should fail in such cases, until Metro
                # cleans the data.
                item_agenda_sequences = [item['extras']['item_agenda_sequence'] for item in e.agenda]
                if len(item_agenda_sequences) != len(set(item_agenda_sequences)):
                    error_msg = 'An agenda has duplicate agenda items on the Legistar grid: \
                        {event_name} on {event_date} ({legistar_api_url}). \
                        Contact Metro, and ask them to remove the duplicate EventItemAgendaSequence.'

                    raise ValueError(error_msg.format(event_name=e.name, 
                                                      event_date=e.start_date.strftime("%B %d, %Y"),
                                                      legistar_api_url=legistar_api_url))

            e.add_participant(name=body_name,
                              type="organization")

            if event.get('SAPEventId'):
                e.add_source(self.BASE_URL + '/events/{0}'.format(event['SAPEventId']),
                             note='api (sap)')

            if event['EventAgendaFile']:
                e.add_document(note= 'Agenda',
                               url = event['EventAgendaFile'],
                               media_type="application/pdf")

            if event['EventMinutesFile']:
                e.add_document(note= 'Minutes',
                               url = event['EventMinutesFile'],
                               media_type="application/pdf")

            for audio in event['audio']:
                try:
                    redirect_url = self.head(audio['url']).headers['Location']

                except KeyError:
                    # In some cases, the redirect URL does not yet
                    # contain the location of the audio file. Skip
                    # these events, and retry on next scrape.
                    continue

                e.add_media_link(note=audio['label'],
                                 url=redirect_url,
                                 media_type='text/html')

            if web_event['Recap/Minutes'] != 'Not\xa0available':
                e.add_document(note=web_event['Recap/Minutes']['label'],
                               url=web_event['Recap/Minutes']['url'],
                               media_type="application/pdf")

            if event['event_details']:
                for link in event['event_details']:
                    e.add_source(**link)
            else:
                e.add_source('https://metro.legistar.com/Calendar.aspx', note='web')

            yield e
开发者ID:datamade,项目名称:scrapers-us-municipal,代码行数:104,代码来源:events.py

示例4: scrape

# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_media_link [as 别名]
    def scrape(self):
        for event, agenda in self.events() :

            description = None

            location_string = event[u'Meeting Location']

            location_list = location_string.split('--', 2)
            location = ', '.join(location_list[0:2])
            if not location :
                continue

            when = self.toTime(event[u'Meeting Date'])

            event_time = event['iCalendar'].subcomponents[0]['DTSTART'].dt
            when = when.replace(hour=event_time.hour,
                                minute=event_time.minute)

            status_string = location_list[-1].split('Chicago, Illinois')
            if len(status_string) > 1 and status_string[1] :
                status_text = status_string[1].lower()
                if any(phrase in status_text 
                       for phrase in ('rescheduled to',
                                      'postponed to',
                                      'reconvened to',
                                      'rescheduled to',
                                      'meeting recessed',
                                      'recessed meeting',
                                      'postponed to',
                                      'recessed until',
                                      'deferred',
                                      'time change',
                                      'date change',
                                      'recessed meeting - reconvene',
                                      'cancelled',
                                      'new date and time',
                                      'rescheduled indefinitely',
                                      'rescheduled for',)) :
                    status = 'cancelled'
                elif status_text in ('rescheduled', 'recessed') :
                    status = 'cancelled'
                elif status_text in ('meeting reconvened',
                                     'reconvened meeting',
                                     'recessed meeting',
                                     'reconvene meeting',
                                     'rescheduled hearing',
                                     'rescheduled meeting',) :
                    status = confirmedOrPassed(when)
                elif status_text in ('amended notice of meeting',
                                     'room change',
                                     'amended notice',
                                     'change of location',
                                     'revised - meeting date and time') :
                    status = confirmedOrPassed(when)
                elif 'room' in status_text :
                    location = status_string[1] + ', ' + location
                elif status_text in ('wrong meeting date',) :
                    continue
                else :
                    print(status_text)
                    description = status_string[1].replace('--em--', '').strip()
                    status = confirmedOrPassed(when)
            else :
                status = confirmedOrPassed(when)


            if description :
                e = Event(name=event["Name"]["label"],
                          start_time=when,
                          description=description,
                          timezone='US/Central',
                          location_name=location,
                          status=status)
            else :
                e = Event(name=event["Name"]["label"],
                          start_time=when,
                          timezone='US/Central',
                          location_name=location,
                          status=status)


            if event['Video'] != 'Not\xa0available' : 
                e.add_media_link(note='Recording',
                                 url = event['Video']['url'],
                                 type="recording",
                                 media_type = 'text/html')

            self.addDocs(e, event, 'Agenda')
            self.addDocs(e, event, 'Notice')
            self.addDocs(e, event, 'Transcript')
            self.addDocs(e, event, 'Summary')

            participant = event["Name"]["label"]
            if participant == 'City Council' :
                participant = 'Chicago City Council'
            elif participant == 'Committee on Energy, Environmental Protection and Public Utilities (inactive)' :
                participant = 'Committee on Energy, Environmental Protection and Public Utilities'

            e.add_participant(name=participant,
                              type="organization")
#.........这里部分代码省略.........
开发者ID:ErnieAtLYD,项目名称:scrapers-us-municipal,代码行数:103,代码来源:events.py

示例5: scrape

# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_media_link [as 别名]
    def scrape(self):
        last_events = deque(maxlen=10)
        for event, agenda in self.events(since=2011) :
            other_orgs = ''
            extras = []

            if '--em--' in event[u'Meeting Location'] :
                location_string, note = event[u'Meeting Location'].split('--em--')[:2]
                for each in note.split(' - ') :
                    if each.startswith('Join') :
                        other_orgs = each
                    else :
                        extras.append(each)
            else :
                location_string = event[u'Meeting Location'] 
            
            location_list = location_string.split('-', 2)
            location = ', '.join([each.strip() for each in location_list[0:2]])
            if not location :
                continue

            when = self.toTime(event[u'Meeting Date'])

            event_time = event['iCalendar'].subcomponents[0]['DTSTART'].dt
            when = when.replace(hour=event_time.hour,
                                minute=event_time.minute)

            time_string = event['Meeting Time']
            if time_string in ('Deferred',) :
                status = 'cancelled'
            elif self.now() < when :
                status = 'confirmed'
            else :
                status = 'passed'

            description = event['Meeting\xa0Topic']
            if any(each in description 
                   for each 
                   in ('Multiple meeting items',
                       'AGENDA TO BE ANNOUNCED')) :
                description = ''

            event_name = event['Name']

            event_id = (event_name, when)

            if event_id in last_events :
                continue
            else :
                last_events.append(event_id)

            e = Event(name=event_name,
                      start_time=when,
                      timezone=self.TIMEZONE,
                      description=description,
                      location_name=location,
                      status=status)

            if extras :
                e.extras = {'location note' : ' '.join(extras)}

            if event['Multimedia'] != 'Not\xa0available' : 
                e.add_media_link(note='Recording',
                                 url = event['Multimedia']['url'],
                                 type="recording",
                                 media_type = 'text/html')

            self.addDocs(e, event, 'Agenda')
            self.addDocs(e, event, 'Minutes')

            if event['Name'] == 'City Council Stated Meeting' :
                participating_orgs = ['New York City Council']
            elif 'committee' in event['Name'].lower() :
                participating_orgs = [event["Name"]]
            else :
                participating_orgs = []

            if other_orgs : 
                other_orgs = re.sub('Jointl*y with the ', '', other_orgs)
                participating_orgs += re.split(' and the |, the ', other_orgs)
 
            for org in participating_orgs :
                e.add_committee(name=org)

            if agenda :
                e.add_source(event["Meeting Details"]['url'])

                
                for item, _, _ in agenda :
                    if item["Name"] :
                        agenda_item = e.add_agenda_item(item["Name"])
                        if item["File\xa0#"] :
                            if item['Action'] :
                                note = item['Action']
                            else :
                                note = 'consideration'
                            agenda_item.add_bill(item["File\xa0#"]['label'],
                                                 note=note)
            else :
                e.add_source(self.EVENTSPAGE)
#.........这里部分代码省略.........
开发者ID:dtpeters,项目名称:scrapers-us-municipal,代码行数:103,代码来源:events.py

示例6: scrape

# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_media_link [as 别名]
    def scrape(self):
        meetings_html = self.urlopen(self.ARLINGTON_MEETING_PAGE)
        meetings_lxml = lxml.html.fromstring(meetings_html)
        
        for meeting_type in ('archive', 'upcoming'):
            for meeting in meetings_lxml.cssselect('#%s tbody tr' % meeting_type):
                
                # attempt to map the cells across table types. 
                # if the sizes mismatch, ignore this one (it's an "empty" message)
                try:
                    cell_mapping = self._organize_cells(meeting_type, meeting.cssselect('td'))
                except:
                    continue

                meeting_title = cell_mapping['title'].text
                meeting_date = datetime.datetime.fromtimestamp(int(cell_mapping['date'].cssselect('span')[0].text))

                e = Event(name=meeting_title, when=meeting_date, location='unknown')
                e.add_source(self.ARLINGTON_MEETING_PAGE)                

                # detect agenda url, if present
                meeting_agenda_url = None
                if len(cell_mapping['agenda'].cssselect('a'))>0:
                    meeting_agenda_url = cell_mapping['agenda'].cssselect('a')[0].attrib.get('href')

                # follow the agenda URL and attempt to extract associated documents
                if meeting_agenda_url is not None:
                    e.add_link(meeting_agenda_url)
                    e.add_document(name='Agenda', url=meeting_agenda_url, mimetype='text/html')                    

                    meeting_agenda_html = self.urlopen(meeting_agenda_url)
                    meeting_agenda_lxml = lxml.html.fromstring(meeting_agenda_html)
                    for link in meeting_agenda_lxml.cssselect('a'):
                        link_url = link.attrib.get('href','')
                        if not len(link_url):
                            continue
                        if 'metaviewer.php' in link_url.lower():
                            # NOTE: application/pdf is a guess, may not always be correct
                            if link.text is not None:
                                e.add_document(name=link.text, url=link_url, mimetype='application/pdf') 

                # skip everything below here for the 'upcoming' table
                if meeting_type=='upcoming':
                    continue

                # detect video
                # TODO: extract actual mp4 files
                video_cell = cell_mapping['video'].cssselect('a')
                if len(video_cell)>0:
                    video_url_match = re.search(r"http://(.*?)'", video_cell[0].attrib.get('onclick',''))
                    if video_url_match is not None:
                        e.add_media_link(name="Video", url=video_url_match.group(0), mimetype='text/html')

                # detect audio
                audio_cell = cell_mapping['audio'].cssselect('a')
                if len(audio_cell)>0:
                    e.add_media_link(name="Audio", url=audio_cell[0].attrib.get('href', ''), mimetype='audio/mpeg')

                # detect minutes
                minutes_cell = cell_mapping['minutes'].cssselect('a')
                if len(minutes_cell)>0:
                    e.add_media_link(name="Minutes", url=minutes_cell[0].attrib.get('href', ''), mimetype='text/html')

                yield e
开发者ID:ChaelCodes,项目名称:scrapers-us-municipal,代码行数:66,代码来源:events.py

示例7: scrape

# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_media_link [as 别名]
    def scrape(self, follow_links=True):
        for page in self.eventPages(EVENTSPAGE):
            events_table = page.xpath("//table[@class='rgMasterTable']")[0]
            for events, headers, rows in self.parseDataTable(events_table) :
                if follow_links and type(events['Meeting\xa0Details']) == dict :
                    detail_url = events['Meeting\xa0Details']['url']
                    meeting_details = self.lxmlize(detail_url)

                    agenda_table = meeting_details.xpath(
                        "//table[@id='ctl00_ContentPlaceHolder1_gridMain_ctl00']")[0]
                    agenda = self.parseDataTable(agenda_table)

                    
                location_string = events[u'Meeting\xa0Location']
                location_list = location_string.split('--')
                location = ', '.join(location_list[0:2])

                when = events[u'Meeting\xa0Date']
                time_string = events[u'Meeting\xa0Time']
                event_time = datetime.datetime.strptime(time_string,
                                                        "%I:%M %p")
                when = when.replace(hour=event_time.hour)

                status_string = location_list[-1].split('Chicago, Illinois')
                if len(status_string) > 1 and status_string[1] :
                    status_text = status_string[1].lower()
                    if any(phrase in status_text 
                           for phrase in ('rescheduled to',
                                          'postponed to',
                                          'reconvened to',
                                          'recessed',
                                          'cancelled',
                                          'new date and time',
                                          'rescheduled indefinitely',
                                          'rescheduled for')) :
                        status = 'cancelled'
                    elif status_text in ('rescheduled') :
                        status = 'cancelled'
                    else :
                        print(status_text)
                elif datetime.datetime.utcnow().replace(tzinfo = pytz.utc) > when :
                    status = 'confirmed'
                else :
                    status = 'passed'
                            

                e = Event(name=events["Name"]["label"],
                          start_time=when,
                          timezone='US/Central',
                          location=location,
                          status=status)
                e.add_source(detail_url)
                if events['Video'] != 'Not\xa0available' : 
                    e.add_media_link(note='Recording',
                                     url = events['Video']['url'],
                                     type="recording",
                                     media_type = 'text/html')

                addDocs(e, events, 'Agenda')
                addDocs(e, events, 'Notice')
                addDocs(e, events, 'Transcript')
                addDocs(e, events, 'Summary')

                for item, _, _ in agenda :
                    agenda_item = e.add_agenda_item(item["Title"])
                    agenda_item.add_bill(item["Record #"]['label'])

                
                e.add_participant(name=events["Name"]["label"],
                                  type="organization")

                yield e
开发者ID:rshorey,项目名称:scrapers-us-municipal,代码行数:74,代码来源:events.py


注:本文中的pupa.scrape.Event.add_media_link方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。