本文整理汇总了Python中history.History.check_recent方法的典型用法代码示例。如果您正苦于以下问题:Python History.check_recent方法的具体用法?Python History.check_recent怎么用?Python History.check_recent使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类history.History
的用法示例。
在下文中一共展示了History.check_recent方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: update_playlist
# 需要导入模块: from history import History [as 别名]
# 或者: from history.History import check_recent [as 别名]
def update_playlist(playlist, logger=None):
"""
Reads a playlist and attempts to find accurate release data on each song.
"""
hist = History('history.p')
d = Discogs(logger=logger)
for song in read_playlist(playlist):
if hist.check_recent(song.loc):
continue
try:
artist = song.meta.tag.artist
songname = song.meta.tag.title
except AttributeError:
continue
release = d.get_first_release(artist, songname)
if release:
update = build_update(songname, release)
song.update_info(update)
song.save()
hist.store(song.loc)
示例2: Gears
# 需要导入模块: from history import History [as 别名]
# 或者: from history.History import check_recent [as 别名]
class Gears(object):
"""Gears for scrapers"""
def __init__(self, logger=None, hist_file='history.p'):
"""Can pass in an external logger"""
if logger:
self.logger = logger
else:
self.logger = logging.getLogger('temp.log')
self.history = History(hist_file, logger=self.logger)
def get(self, url, referer=None, agent=None, delay=True, check_hist=True):
"""
I keep using this pattern to scrape pages
"""
if not agent: agent = ragent()
if delay: rdelay()
if check_hist:
if self.history.check_recent(url) is not False: return None
headers = {'User-Agent': agent, 'referer': referer}
self.logger.debug('Making request to %s\nwith headers:%s', url, headers)
try:
response = requests.get(url, headers=headers)
except requests.exceptions.ConnectionError:
self.logger.error('ConnectionError', exc_info=True)
else:
if response.status_code != 200: #pragma: no cover
self.logger.error(
'Request != 200: status_code = %s', response.status_code
)
self.logger.error(response.text)
return response
def parse_page(self, url, xpath=None, text=None, suffix='">',
referer=None, delay=True, check_hist=True):
"""
Generalized version of tracklists method
"""
self.logger.debug('Parsing page %s', url)
response = self.get(url,
referer=referer, delay=delay, check_hist=check_hist)
if not response:
return []
if xpath:
self.logger.debug('with xpath=%s', xpath)
tree = html.fromstring(response.text)
elements = tree.xpath(xpath)
if elements: # pragma: no cover
self.logger.debug('Found %d elements', len(elements))
return elements
else: # pragma: no cover
self.logger.debug('Found nothing')
return []
elif text: # pragma: no cover
return self.find_string(response.text, text, suffix)
def find_string(self, raw_text, prefix, suffix='">'):
"""Finds a string from raw HTML text"""
self.logger.debug('Finding string between %s\nand\n%s', prefix, suffix)
try:
idx = raw_text.index(prefix)+len(prefix)
found = raw_text[idx:].split(suffix)[0]
except (ValueError, IndexError, AttributeError), err:
self.logger.error(
'String not found due to error: %s', err, exc_info=True
)
self.logger.debug('Raw text: %s', raw_text)
return None
else: