本文整理汇总了Python中reporter.Reporter.update方法的典型用法代码示例。如果您正苦于以下问题:Python Reporter.update方法的具体用法?Python Reporter.update怎么用?Python Reporter.update使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类reporter.Reporter
的用法示例。
在下文中一共展示了Reporter.update方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Crawler
# 需要导入模块: from reporter import Reporter [as 别名]
# 或者: from reporter.Reporter import update [as 别名]
class Crawler():
def __init__(self):
self.vip_info = VipInfo()
self.rss_links = self._load_rss_links()
self.reporter = Reporter()
def _load_rss_links(self):
links = []
directory = os.path.join(os.path.dirname(os.path.abspath(__file__)), "rss")
for root, dirs, fs in os.walk(directory):
for f in fs:
links.append(os.path.join(root, f))
return links
def is_article_scanned(self, article):
#assuming that all aticle updated dae time is in GMT...this comparison can be made
epoch_article_time = mktime(article.updated_parsed)
if epoch_article_time >= CONF.last_script_run_date_time and epoch_article_time >= CONF.REPORT_START_DATE_TIME:
return False
return True
def crawl(self):
new_article_scanned = 0
old_article_scanned = 0
vip_article_found = 0
#not the start time for crawling for this sceduled run
crawl_start_time = time()
for f in list(self.rss_links):
text = open(f, "rb").read()
urls = text.split(os.linesep)
for url in urls:
feed = feedparser.parse(url)
for article in feed.entries:
# print "Working on", article.link
if not self.is_article_scanned(article):
new_article_scanned += 1
if self.vip_info.is_there_vip_news(article):
vip_article_found += 1
self.reporter.update(article)
else:
old_article_scanned += 1
#update the crawl start time in config
CONF.last_script_run_date_time = crawl_start_time
#log
print "new articles scanned:", new_article_scanned
print "old articles skipped:", old_article_scanned
print "vip articles found:", vip_article_found