本文整理汇总了Python中legistar.scraper.LegistarScraper.searchLegislation方法的典型用法代码示例。如果您正苦于以下问题:Python LegistarScraper.searchLegislation方法的具体用法?Python LegistarScraper.searchLegislation怎么用?Python LegistarScraper.searchLegislation使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类legistar.scraper.LegistarScraper
的用法示例。
在下文中一共展示了LegistarScraper.searchLegislation方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: recognize_dates
# 需要导入模块: from legistar.scraper import LegistarScraper [as 别名]
# 或者: from legistar.scraper.LegistarScraper import searchLegislation [as 别名]
def recognize_dates():
config = Config(hostname="phila.legistar.com", sponsor_links=False, date_format="%m/%d/%Y").defaults(DEFAULT_CONFIG)
scraper = LegistarScraper(config)
summaries = scraper.searchLegislation("")
summary = summaries.next()
import datetime
assert_is_instance(summary["File Created"], datetime.datetime)
示例2: recognize_dates
# 需要导入模块: from legistar.scraper import LegistarScraper [as 别名]
# 或者: from legistar.scraper.LegistarScraper import searchLegislation [as 别名]
def recognize_dates():
config = {"hostname": "phila.legistar.com", "date_format": "%m/%d/%Y", "fulltext": True}
scraper = LegistarScraper(config)
summaries = scraper.searchLegislation("")
summary = summaries.next()
import datetime
assert_is_instance(summary["File Created"], datetime.datetime)
示例3: supports_simple_initial_search_form
# 需要导入模块: from legistar.scraper import LegistarScraper [as 别名]
# 或者: from legistar.scraper.LegistarScraper import searchLegislation [as 别名]
def supports_simple_initial_search_form():
config = {"hostname": "phila.legistar.com", "fulltext": True}
scraper = LegistarScraper(config)
summaries = scraper.searchLegislation("")
try:
summaries.next()
except StopIteration:
fail("no legislation found")
示例4: supports_simple_initial_search_form
# 需要导入模块: from legistar.scraper import LegistarScraper [as 别名]
# 或者: from legistar.scraper.LegistarScraper import searchLegislation [as 别名]
def supports_simple_initial_search_form():
config = Config(hostname="phila.legistar.com", fulltext=True).defaults(DEFAULT_CONFIG)
scraper = LegistarScraper(config)
summaries = scraper.searchLegislation("")
try:
summaries.next()
except StopIteration:
fail("no legislation found")
示例5: paging_through_legislation
# 需要导入模块: from legistar.scraper import LegistarScraper [as 别名]
# 或者: from legistar.scraper.LegistarScraper import searchLegislation [as 别名]
def paging_through_legislation():
config = Config(hostname="chicago.legistar.com", fulltext=True).defaults(DEFAULT_CONFIG)
scraper = LegistarScraper(config)
summaries = list(scraper.searchLegislation("pub"))
# Making summaries a list forces the scraper to iterate completely through
# the generator
for s in summaries:
print s["Record #"]
assert_greater(len(summaries), 100)
示例6: supports_advanced_initial_search_form
# 需要导入模块: from legistar.scraper import LegistarScraper [as 别名]
# 或者: from legistar.scraper.LegistarScraper import searchLegislation [as 别名]
def supports_advanced_initial_search_form():
config = Config(hostname="chicago.legistar.com", fulltext=True).defaults(DEFAULT_CONFIG)
scraper = LegistarScraper(config)
summaries = scraper.searchLegislation("")
try:
summaries.next()
except StopIteration:
# fail('no legislation found')
assert False
示例7: paging_through_results
# 需要导入模块: from legistar.scraper import LegistarScraper [as 别名]
# 或者: from legistar.scraper.LegistarScraper import searchLegislation [as 别名]
def paging_through_results():
config = {"hostname": "chicago.legistar.com", "fulltext": True}
scraper = LegistarScraper(config)
summaries = list(scraper.searchLegislation("pub"))
# Making summaries a list forces the scraper to iterate completely through
# the generator
for s in summaries:
print s["Record #"]
assert_greater(len(summaries), 100)
示例8: HostedLegistarSiteWrapper
# 需要导入模块: from legistar.scraper import LegistarScraper [as 别名]
# 或者: from legistar.scraper.LegistarScraper import searchLegislation [as 别名]
class HostedLegistarSiteWrapper (object):
"""
A generic facade over hosted legistar site data scraper.
It is responsible for interpreting data scraped out of the site by LegistarScraper.
The main external point of interaction is scrape_legis_file.
NOTE that this is a superclass that will not run by itself and isn't
meant to be; you are expected to run a subclass that implements
some functions with names starting with "pluck".
requires: BeautifulSoup, mechanize
"""
def __init__(self, cmdline_options, **options):
self.scraper = LegistarScraper(options)
if cmdline_options['year']:
self.legislation_summaries = self.scraper.searchLegislation('',year=cmdline_options['year'])
else:
self.legislation_summaries = self.scraper.searchLegislation('')
def scrape_legis_file(self, key, summary):
'''Extract a record from the given document (soup). The key is for the
sake of record-keeping. It is the key passed to the site URL.'''
while True :
try:
legislation_attrs, legislation_history = self.scraper.expandLegislationSummary(summary)
break
except urllib2.URLError as e:
print e
print 'skipping to next leg record'
except AttributeError as e :
print e
print 'skipping to next leg record'
while True :
try:
summary = self.legislation_summaries.next()
break
except urllib2.URLError as e:
print e
print 'sleeping for five minutes'
time.sleep('360')
parsed_url = urlparse.urlparse(summary['URL'])
key = urlparse.parse_qs(parsed_url.query)['ID'][0]
record = self.pluck_record(key, summary, legislation_attrs)
attachments = self.pluck_attachments(key, legislation_attrs)
actions = []
for act in legislation_history :
act_details = act_votes = []
try:
act_details, act_votes = self.scraper.expandHistorySummary(act)
except (KeyError, AttributeError) as e:
log.debug('LegAction has no url')
else:
if act_votes:
print "act_votes", act_votes
try:
action = self.pluck_action(key, act, act_details, act_votes)
except TypeError as e:
print e
print summary
continue
except KeyError as e :
print act
print e
print summary
raise
actions.append(action)
# we should probably remove this from the model since the hosted
# legistar does not have minutes
minutes = []
log.info('Scraped legfile with key %r' % (key,))
log.debug("%r %r %r %r" % (record, attachments, actions, minutes))
return record, attachments, actions, minutes
def convert_date(self, orig_date):
if orig_date:
return datetime.datetime.strptime(orig_date, '%m/%d/%Y').date()
else:
return ''
def check_for_new_content(self, last_key):
'''Grab the next legislation summary row. Doesn't use the last_key
parameter; just starts at the beginning for each instance of the
scraper.
'''
try:
print 'next leg record'
next_summary = self.legislation_summaries.next()
return 0, next_summary
except StopIteration:
#.........这里部分代码省略.........
示例9: HostedLegistarSiteWrapper
# 需要导入模块: from legistar.scraper import LegistarScraper [as 别名]
# 或者: from legistar.scraper.LegistarScraper import searchLegislation [as 别名]
class HostedLegistarSiteWrapper (object):
"""
A facade over the Philadelphia city council legistar site data. It is
responsible for scraping data out of the site. The main external point
of interaction is scrape_legis_file.
requires: BeautifulSoup, mechanize
"""
def __init__(self, **options):
self.id_label = options.pop('id_label', 'Record #')
self.url_label = options.pop('url_label', 'URL')
self.type_label = options.pop('type_label', 'Type')
self.status_label = options.pop('status_label', 'Status')
self.title_label = options.pop('title_label', 'Title')
self.indexes_label = options.pop('indexes_label', 'Indexes')
self.intro_date_label = options.pop('intro_date_label', 'Intro Date')
self.final_date_label = options.pop('final_date_label', 'Final Date')
self.controlling_body_label = options.pop('controlling_body_label', 'Current Controlling Legislative Body')
self.version_label = options.pop('version_label', 'Version')
self.scraper = LegistarScraper(options)
self.legislation_summaries = self.scraper.searchLegislation('', created_before='2012-10-5')
def scrape_legis_file(self, key, summary):
'''Extract a record from the given document (soup). The key is for the
sake of record-keeping. It is the key passed to the site URL.'''
while True :
try:
legislation_attrs, legislation_history = self.scraper.expandLegislationSummary(summary)
break
except urllib2.URLError as e:
log.warning(e)
log.warning('skipping to next leg record')
except AttributeError as e :
log.warning(e)
log.warning('skipping to next leg record')
while True :
try:
summary = self.legislation_summaries.next()
break
except urllib2.URLError as e:
log.warning(e)
log.warning('sleeping for five minutes')
time.sleep('360')
parsed_url = urlparse.urlparse(summary['URL'])
key = urlparse.parse_qs(parsed_url.query)['ID'][0]
# re-order the sponsor name by '[First] [Last]' instead of '[Last], [First]'
sponsors = legislation_attrs['Sponsors']
first_name_first_sponsors = []
for sponsor in sponsors :
if ',' in sponsor :
name_list = sponsor.split(',')
name_list.reverse()
sponsor = ' '.join(name_list).strip()
first_name_first_sponsors.append(sponsor)
try:
record = {
'key' : key,
'id' : summary[self.id_label],
'url' : summary[self.url_label],
'type' : summary[self.type_label],
'status' : summary[self.status_label],
'title' : summary[self.title_label],
'indexes': legislation_attrs[self.indexes_label],
'controlling_body' : legislation_attrs[self.controlling_body_label],
'intro_date' : self.convert_date(summary[self.intro_date_label]),
'final_date' : self.convert_date(summary.setdefault(self.final_date_label, '')),
'version' : summary.setdefault(self.version_label, ''),
#'contact' : None,
'sponsors' : first_name_first_sponsors,
# probably remove this from the model as well
'minutes_url' : None
}
except KeyError, e:
raise ScrapeError('Failed to find key %s in either summary keys '
'(%r) or attrs (%r)' % (e, summary.keys(),
legislation_attrs.keys()))
try:
attachments = legislation_attrs['Attachments']
for attachment in attachments:
attachment['key'] = key
attachment['file'] = attachment['label']
attachment['description'] = attachment['label']
del attachment['label']
except KeyError:
attachments = []
actions = []
for act in legislation_history :
try:
act_details, act_votes = self.scraper.expandHistorySummary(act)
#.........这里部分代码省略.........
示例10: HostedLegistarSiteWrapper
# 需要导入模块: from legistar.scraper import LegistarScraper [as 别名]
# 或者: from legistar.scraper.LegistarScraper import searchLegislation [as 别名]
class HostedLegistarSiteWrapper (object):
"""
A facade over the Philadelphia city council legistar site data. It is
responsible for scraping data out of the site. The main external point
of interaction is scrape_legis_file.
requires: BeautifulSoup, mechanize
"""
def __init__(self, **options):
self.scraper = LegistarScraper(options)
self.legislation_summaries = self.scraper.searchLegislation('', created_before='2012-10-5')
def scrape_legis_file(self, key, summary):
'''Extract a record from the given document (soup). The key is for the
sake of record-keeping. It is the key passed to the site URL.'''
while True :
try:
legislation_attrs, legislation_history = self.scraper.expandLegislationSummary(summary)
break
except urllib2.URLError as e:
print e
print 'skipping to next leg record'
except AttributeError as e :
print e
print 'skipping to next leg record'
while True :
try:
summary = self.legislation_summaries.next()
break
except urllib2.URLError as e:
print e
print 'sleeping for five minutes'
time.sleep('360')
parsed_url = urlparse.urlparse(summary['URL'])
key = urlparse.parse_qs(parsed_url.query)['ID'][0]
# re-order the sponsor name by '[First] [Last]' instead of '[Last], [First]'
sponsors = legislation_attrs['Sponsors']
first_name_first_sponsors = []
for sponsor in sponsors :
if ',' in sponsor :
name_list = sponsor.rsplit(',', 1)
name_list.reverse()
sponsor = ' '.join(name_list).strip().replace(',', '')
first_name_first_sponsors.append(sponsor)
record = {
'key' : key,
'id' : summary['Record #'],
'url' : summary['URL'],
'type' : summary['Type'],
'status' : summary['Status'],
'title' : summary['Title'],
'controlling_body' : legislation_attrs['Current Controlling Legislative Body'],
'intro_date' : self.convert_date(summary['Intro Date']),
'final_date' : self.convert_date(summary.setdefault('Final Date', '')),
'version' : summary.setdefault('Version', ''),
#'contact' : None,
'sponsors' : first_name_first_sponsors,
# probably remove this from the model as well
'minutes_url' : None
}
try:
attachments = legislation_attrs['Attachments']
for attachment in attachments:
attachment['key'] = key
attachment['file'] = attachment['label']
attachment['description'] = attachment['label']
del attachment['label']
except KeyError:
attachments = []
actions = []
for act in legislation_history :
try:
act_details, act_votes = self.scraper.expandHistorySummary(act)
except (KeyError, AttributeError) as e:
print e
print summary
continue
try:
action = {
'key' : key,
'date_taken' : self.convert_date(act['Date']),
'acting_body' : act['Action By']['label'],
'motion' : act['Result'],
'description' : act['Action'],
'notes' : ''
}
except TypeError as e:
print e
print summary
continue
#.........这里部分代码省略.........