本文整理汇总了Python中scraper.Scraper方法的典型用法代码示例。如果您正苦于以下问题:Python scraper.Scraper方法的具体用法?Python scraper.Scraper怎么用?Python scraper.Scraper使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scraper
的用法示例。
在下文中一共展示了scraper.Scraper方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: import scraper [as 别名]
# 或者: from scraper import Scraper [as 别名]
def __init__(self, driver=None, driver_path=None):
self.scraper = Scraper(driver_name=driver, driver_path=driver_path)
示例2: scrape_league
# 需要导入模块: import scraper [as 别名]
# 或者: from scraper import Scraper [as 别名]
def scrape_league(league_id=None):
details_only = flask.request.args.get('detailsOnly')
details_list = flask.request.args.get('details', '')
start_date = flask.request.args.get('startDate')
end_date = flask.request.args.get('endDate')
url_rule = flask.request.url_rule
if url_rule.rule.startswith('/scrape-all'):
league_list = constants.LEAGUE_SPORT_MAP.keys()
details_list = True
else:
if league_id in constants.LEAGUE_SPORT_MAP:
league_list = [league_id]
else:
return '404 Not Found', 404
details_list = [x.lower() for x in details_list.split(',')]
for league_key in league_list:
scraper_object = scraper.Scraper(league_key)
try:
if not details_only:
scraper_object.fill_game_list(start_date, end_date)
if details_list is True or 'odds' in details_list:
scraper_object.fill_game_odds()
if details_list is True or 'pitchers' in details_list:
scraper_object.fill_pitchers()
except DeadlineExceededError as e:
# one of the sites probably temporarily down
logging.exception(e)
return 'Success'
示例3: main
# 需要导入模块: import scraper [as 别名]
# 或者: from scraper import Scraper [as 别名]
def main(cls):
parser = ArgumentParser(description='Scrapes "Who Wants to be Hired?" HN Posts.')
parser.add_argument("-s", "--source", help="The source url to scrape.")
parser.add_argument("-t", "--technologies", nargs="*", help="The technology(ies) to filter on.")
parser.add_argument("-l", "--location", nargs="*", help="The location(s) to filter on.")
parser.add_argument("-rel", "--relocate", action='store_true', help="Applies a filter of 'willing to relocate' = Yes.")
parser.add_argument("-rem", "--remote", action='store_true', help="Applies a filter of 'willing to work remotely' = Yes.")
args = parser.parse_args()
filters = {}
if args.technologies is not None:
filters[Candidate.META_TECHNOLOGIES] = args.technologies
if args.location is not None:
filters[Candidate.META_LOCATION] = args.location
if args.relocate:
filters[Candidate.META_RELOCATE] = "Yes"
if args.remote:
filters[Candidate.META_REMOTE] = "Yes"
url = args.source
url = cls.getDefaultSourceUrl() if url is None else url
print "\nParsing Source: " + url
html = Scraper(url).get()
data = HackernewsParser(html, filters)
title = data.getTitle()
candidates = data.getCandidates()
print "\n" + json.dumps(candidates, indent=4, sort_keys=True)
print "\nParsed Source: " + title
print "\nTotal Matches Found: " + str(len(candidates))
示例4: getDefaultSourceUrl
# 需要导入模块: import scraper [as 别名]
# 或者: from scraper import Scraper [as 别名]
def getDefaultSourceUrl():
month = datetime.now().strftime("%B %Y")
url = "https://www.google.com/search" \
+ "?as_qdr=all&complete=0" \
+ "&q=hackernews%20who%20wants%20to%20be%20hired%20" + month
html = Scraper(url).get()
return GoogleParser(html).getFirstResultUrl()
示例5: run_once
# 需要导入模块: import scraper [as 别名]
# 或者: from scraper import Scraper [as 别名]
def run_once(self):
for location, office_id in LOCATIONS.items():
scraper = Scraper()
self.logger.log("Checking appointment for %s" % location)
appt = scraper.i_want_an_appointment_at(office_id)
if appt:
self.logger.log("Appointment retrieved from web page")
if not self.db.appt_exists(location, appt):
self.logger.log("New appointment found. Added to DB.")
msg = "*{}*\n{}".format(location, appt)
self.bot.post_message(msg)
else:
self.logger.log("Appointment already exists in DB.")
else:
self.logger.log("Invalid appointment object returned")