本文整理汇总了Python中scraper.Scraper.connect方法的典型用法代码示例。如果您正苦于以下问题:Python Scraper.connect方法的具体用法?Python Scraper.connect怎么用?Python Scraper.connect使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scraper.Scraper
的用法示例。
在下文中一共展示了Scraper.connect方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: update
# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import connect [as 别名]
def update(from_id, to_id, backsort):
"""
Update items between given ids
Use this command to fetch older data
"""
if from_id is None or not from_id.isdigit() or to_id is None or not to_id.isdigit() or backsort not in ('0', '1'):
print "Error. Expected parameters from_hn_id to_hn_id backsort (0 or 1)"
return
s = Scraper()
s.connect()
# Fetch and save items
save = lambda item_data: s.save_item(item_data)
item_ids = range(int(from_id), int(to_id))
if backsort == '1':
item_ids = reversed(item_ids)
s.fetch_items(item_ids, callback=save)
示例2: fix_ask_items
# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import connect [as 别名]
def fix_ask_items():
"""Temporary command to fix type:ask items appearing as links with no URL"""
s = Scraper()
s.connect()
items = (db.session.query(Item)
.with_entities(Item.id)
.filter(Item.subkind == 'ask')
# .filter(Item.raw_body == None)
.order_by(sqlalchemy.desc(Item.id))
.all()
)
# Fetch and save items
def save(item_data):
s.save_item(item_data)
print 'fixed '+str(item_data['id'])
item_ids = [item.id for item in items]
s.fetch_items(item_ids, callback=save)
示例3: test
# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import connect [as 别名]
def test():
from scraper import Scraper
s = Scraper()
s.connect()
# item_data = s.fetch_item(8549539)
# print item_data
# print '<<<'
# item = s.save_item(item_data)
# from search import Search
# # print Search.token_tree('hello world'.split()), "\n";
# print Search.token_tree('| | AA | BB | CC | | | | DD | EE |'.split()), "\n";
# print Search.token_tree('hello the -world host:cnn.com | a | the author:raquo'.split()), "\n";
# token_tree = Search.token_tree('| | | hello -world - a host:cnn.com | | host:techcrunch.com author:raquo | | |'.split()), "\n";
# print token_tree
# token_tree.filter()
return
示例4: every_1_min
# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import connect [as 别名]
def every_1_min():
minute = datetime.utcnow().minute
s = Scraper()
s.connect()
if minute % 2 == 0:
# print "2 MIN"
# Update scores of newest existing stories
s.save_newest_existing_stories(count=30, min_delay=90)
if minute % 10 == 0:
# print "10 MIN"
# Update scores of stories on pages 2 and of newest
s.save_newest_existing_stories(start_from=30, count=60, min_delay=5*60)
# Update scores of stories on pages 2 and 3 of front page
s.save_top_stories(front_page=False, start_from=30, count=60, min_delay=5*60)
if minute % 5 == 0:
# print "5 MIN"
# Discover latest items
s.save_newest_items()
# Update scores of front page
s.save_top_stories(front_page=True, count=30, min_delay=3*60)
示例5: init
# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import connect [as 别名]
def init():
s = Scraper()
s.connect()
s.save_newest_items()