本文整理汇总了Python中selenium.webdriver.PhantomJS.set_script_timeout方法的典型用法代码示例。如果您正苦于以下问题:Python PhantomJS.set_script_timeout方法的具体用法?Python PhantomJS.set_script_timeout怎么用?Python PhantomJS.set_script_timeout使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类selenium.webdriver.PhantomJS
的用法示例。
在下文中一共展示了PhantomJS.set_script_timeout方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: PagesCrawler
# 需要导入模块: from selenium.webdriver import PhantomJS [as 别名]
# 或者: from selenium.webdriver.PhantomJS import set_script_timeout [as 别名]
class PagesCrawler(BaseSpider):
name = 'pages'
link_extractor = RegexpLinkExtractor(canonicalize=False, deny_extensions=[])
ignored_exts = set(['.' + e for e in IGNORED_EXTENSIONS])
def __init__(self, **kw):
args = DEFAULT_INPUT.copy()
args.update(kw)
self.args = args
self.start_urls = to_list(args['start_urls'])
self.maxdepth = int(args['maxdepth'])
self.follow_prefixes = to_list(args['follow_prefixes'])
self.nofollow_prefixes = to_list(args['nofollow_prefixes'])
self.discover_prefixes = [url_to_lru_clean("http%s://%s" % (https, u.replace('http://', '').replace('https://', ''))) for u in to_list(args['discover_prefixes']) for https in ['', 's']]
self.resolved_links = {}
self.user_agent = args['user_agent']
self.phantom = 'phantom' in args and args['phantom'] and args['phantom'].lower() != "false"
if self.phantom:
self.ph_timeout = int(args.get('phantom_timeout', PHANTOM['TIMEOUT']))
self.ph_idle_timeout = int(args.get('phantom_idle_timeout', PHANTOM['IDLE_TIMEOUT']))
self.ph_ajax_timeout = int(args.get('phantom_ajax_timeout', PHANTOM['AJAX_TIMEOUT']))
self.errors = 0
dispatcher.connect(self.closed, spider_closed)
dispatcher.connect(self.crashed, spider_error)
def start_requests(self):
self.log("Starting crawl task - jobid: %s" % self.crawler.settings['JOBID'], log.INFO)
self.log("ARGUMENTS : "+str(self.args), log.INFO)
if self.phantom:
self.init_phantom()
for url in self.start_urls:
yield self._request(url)
def init_phantom(self):
self.prefixfiles = os.path.join(
scrapyd_config().get('logs_dir'),
HYPHE_PROJECT,
self.name,
self.crawler.settings['JOBID']
)
self.log("Using path %s for PhantomJS crawl" % self.prefixfiles, log.INFO)
phantom_args = []
if PROXY and not PROXY.startswith(':'):
phantom_args.append('--proxy=%s' % PROXY)
phantom_args.append('--cookies-file=%s-phantomjs-cookie.txt' % self.prefixfiles)
phantom_args.append('--ignore-ssl-errors=true')
phantom_args.append('--load-images=false')
self.capabilities = dict(DesiredCapabilities.PHANTOMJS)
self.capabilities['phantomjs.page.settings.userAgent'] = self.user_agent
self.capabilities['takesScreenshot'] = False
self.capabilities['phantomjs.page.settings.javascriptCanCloseWindows'] = False
self.capabilities['phantomjs.page.settings.javascriptCanOpenWindows'] = False
self.phantom = PhantomJS(
executable_path=PHANTOM['PATH'],
service_args=phantom_args,
desired_capabilities=self.capabilities,
service_log_path="%s-phantomjs.log" % self.prefixfiles
)
self.phantom.implicitly_wait(10)
self.phantom.set_page_load_timeout(60)
self.phantom.set_script_timeout(self.ph_timeout + 15)
def crashed(self, spider):
self.errors += 1
self.closed("CRASH")
def closed(self, reason):
if self.errors:
self.log("%s error%s encountered during the crawl." %
(self.errors, 's' if self.errors > 1 else ''), log.ERROR)
if self.phantom:
self.phantom.quit()
if not self.errors:
for f in ["phantomjs-cookie.txt", "phantomjs.log"]:
fi = "%s-%s" % (self.prefixfiles, f)
if os.path.exists(fi) and not self.errors:
os.remove(fi)
def handle_response(self, response):
lru = url_to_lru_clean(response.url)
if self.phantom:
self.phantom.get(response.url)
# Collect whole DOM of the webpage including embedded iframes
with open(os.path.join(PHANTOM["JS_PATH"], "get_iframes_content.js")) as js:
get_bod_w_iframes = js.read()
bod_w_iframes = self.phantom.execute_script(get_bod_w_iframes)
response._set_body(bod_w_iframes.encode('utf-8'))
# Try to scroll and unfold page
self.log("Start PhantomJS scrolling and unfolding", log.INFO)
with open(os.path.join(PHANTOM["JS_PATH"], "scrolldown_and_unfold.js")) as js:
try:
signal.signal(signal.SIGALRM, timeout_alarm)
signal.alarm(self.ph_timeout + 30)
timedout = self.phantom.execute_async_script(
js.read(), self.ph_timeout,
self.ph_idle_timeout, self.ph_ajax_timeout)
#.........这里部分代码省略.........