本文整理汇总了Python中selenium.webdriver.PhantomJS.execute_script方法的典型用法代码示例。如果您正苦于以下问题:Python PhantomJS.execute_script方法的具体用法?Python PhantomJS.execute_script怎么用?Python PhantomJS.execute_script使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类selenium.webdriver.PhantomJS
的用法示例。
在下文中一共展示了PhantomJS.execute_script方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: generate_image
# 需要导入模块: from selenium.webdriver import PhantomJS [as 别名]
# 或者: from selenium.webdriver.PhantomJS import execute_script [as 别名]
def generate_image(structure):
image_path = os.path.join(mkdtemp(), 'okc.png')
html_path = os.path.join(
os.path.dirname(os.path.realpath(__file__)),
'okc.html',
)
url = 'file://{}'.format(html_path)
driver = PhantomJS(service_log_path=mkstemp()[1])
driver.set_window_size(2000, 500)
driver.get(url)
driver.execute_script('setText({});'.format(json.dumps(structure)))
if random() > 0.4:
driver.execute_script('hideForm();')
elif random() > 0.5:
driver.execute_script('uncheckForm();')
driver.set_window_size(*driver.execute_script('return getSize();'))
driver.save_screenshot(image_path)
# twitter's gonna make our beautiful screenshot a jpeg unless we make it
# think that we're using transparency for a reason, so,,
img = Image.open(image_path)
origin = img.getpixel((0, 0))
new_origin = origin[:3] + (254,)
img.putpixel((0, 0), new_origin)
img.save(image_path)
subprocess.check_call(['optipng', '-quiet', image_path])
return image_path
示例2: PagesCrawler
# 需要导入模块: from selenium.webdriver import PhantomJS [as 别名]
# 或者: from selenium.webdriver.PhantomJS import execute_script [as 别名]
class PagesCrawler(BaseSpider):
name = 'pages'
link_extractor = RegexpLinkExtractor(canonicalize=False, deny_extensions=[])
ignored_exts = set(['.' + e for e in IGNORED_EXTENSIONS])
def __init__(self, **kw):
args = DEFAULT_INPUT.copy()
args.update(kw)
self.args = args
self.start_urls = to_list(args['start_urls'])
self.maxdepth = int(args['maxdepth'])
self.follow_prefixes = to_list(args['follow_prefixes'])
self.nofollow_prefixes = to_list(args['nofollow_prefixes'])
self.discover_prefixes = [url_to_lru_clean("http%s://%s" % (https, u.replace('http://', '').replace('https://', ''))) for u in to_list(args['discover_prefixes']) for https in ['', 's']]
self.resolved_links = {}
self.user_agent = args['user_agent']
self.phantom = 'phantom' in args and args['phantom'] and args['phantom'].lower() != "false"
if self.phantom:
self.ph_timeout = int(args.get('phantom_timeout', PHANTOM['TIMEOUT']))
self.ph_idle_timeout = int(args.get('phantom_idle_timeout', PHANTOM['IDLE_TIMEOUT']))
self.ph_ajax_timeout = int(args.get('phantom_ajax_timeout', PHANTOM['AJAX_TIMEOUT']))
self.errors = 0
dispatcher.connect(self.closed, spider_closed)
dispatcher.connect(self.crashed, spider_error)
def start_requests(self):
self.log("Starting crawl task - jobid: %s" % self.crawler.settings['JOBID'], log.INFO)
self.log("ARGUMENTS : "+str(self.args), log.INFO)
if self.phantom:
self.init_phantom()
for url in self.start_urls:
yield self._request(url)
def init_phantom(self):
self.prefixfiles = os.path.join(
scrapyd_config().get('logs_dir'),
HYPHE_PROJECT,
self.name,
self.crawler.settings['JOBID']
)
self.log("Using path %s for PhantomJS crawl" % self.prefixfiles, log.INFO)
phantom_args = []
if PROXY and not PROXY.startswith(':'):
phantom_args.append('--proxy=%s' % PROXY)
phantom_args.append('--cookies-file=%s-phantomjs-cookie.txt' % self.prefixfiles)
phantom_args.append('--ignore-ssl-errors=true')
phantom_args.append('--load-images=false')
self.capabilities = dict(DesiredCapabilities.PHANTOMJS)
self.capabilities['phantomjs.page.settings.userAgent'] = self.user_agent
self.capabilities['takesScreenshot'] = False
self.capabilities['phantomjs.page.settings.javascriptCanCloseWindows'] = False
self.capabilities['phantomjs.page.settings.javascriptCanOpenWindows'] = False
self.phantom = PhantomJS(
executable_path=PHANTOM['PATH'],
service_args=phantom_args,
desired_capabilities=self.capabilities,
service_log_path="%s-phantomjs.log" % self.prefixfiles
)
self.phantom.implicitly_wait(10)
self.phantom.set_page_load_timeout(60)
self.phantom.set_script_timeout(self.ph_timeout + 15)
def crashed(self, spider):
self.errors += 1
self.closed("CRASH")
def closed(self, reason):
if self.errors:
self.log("%s error%s encountered during the crawl." %
(self.errors, 's' if self.errors > 1 else ''), log.ERROR)
if self.phantom:
self.phantom.quit()
if not self.errors:
for f in ["phantomjs-cookie.txt", "phantomjs.log"]:
fi = "%s-%s" % (self.prefixfiles, f)
if os.path.exists(fi) and not self.errors:
os.remove(fi)
def handle_response(self, response):
lru = url_to_lru_clean(response.url)
if self.phantom:
self.phantom.get(response.url)
# Collect whole DOM of the webpage including embedded iframes
with open(os.path.join(PHANTOM["JS_PATH"], "get_iframes_content.js")) as js:
get_bod_w_iframes = js.read()
bod_w_iframes = self.phantom.execute_script(get_bod_w_iframes)
response._set_body(bod_w_iframes.encode('utf-8'))
# Try to scroll and unfold page
self.log("Start PhantomJS scrolling and unfolding", log.INFO)
with open(os.path.join(PHANTOM["JS_PATH"], "scrolldown_and_unfold.js")) as js:
try:
signal.signal(signal.SIGALRM, timeout_alarm)
signal.alarm(self.ph_timeout + 30)
timedout = self.phantom.execute_async_script(
js.read(), self.ph_timeout,
self.ph_idle_timeout, self.ph_ajax_timeout)
#.........这里部分代码省略.........
示例3: get_applications_in_page
# 需要导入模块: from selenium.webdriver import PhantomJS [as 别名]
# 或者: from selenium.webdriver.PhantomJS import execute_script [as 别名]
def get_applications_in_page(self, scroll_script):
applications = []
driver = None
try:
desired_capabilities = dict(DesiredCapabilities.PHANTOMJS)
desired_capabilities["phantomjs.page.settings.userAgent"] = useragent.get_random_agent(google_prop.user_agent_list_url)
service_args = ['--load-images=no', '--proxy=%s' % (proxy.get_random_proxy(google_prop.proxy_list_url))]
driver = PhantomJS(desired_capabilities=desired_capabilities, service_args=service_args)
# driver = Firefox(firefox_profile=self.fp, proxy=self.proxy)
if self.proxy_test:
driver.get('http://curlmyip.com/')
ip = driver.find_element_by_xpath('//body//pre').text
print('ip : [ ' + ip + ' ]')
pass
else:
driver.get(self.url)
driver.execute_script(scroll_script)
acknowledge = 0
done = False
while not done:
scroll_finished = driver.execute_script("return scraperLoadCompleted")
if scroll_finished:
if acknowledge == self.acknowledgements:
done = driver.execute_script("return scraperLoadCompleted")
pass
else:
acknowledge += 1
pass
pass
else:
acknowledge = 0
pass
time.sleep(5) # Wait before retry
pass
product_matrix = driver.find_elements_by_class_name("card")
for application in product_matrix:
extracted_application = self.extract_application_data(application)
# if extracted_application['app_price'] != -1:
applications.append(extracted_application)
#pass
pass
pass
driver.quit()
pass
except Exception as e:
if driver is not None:
driver.quit()
pass
if self.attempt < self.retries:
self.attempt += 1
time.sleep(10)
print 'retry : url [ ' + self.url + ' ] + | attempt [ ' + str(self.attempt) + ' ] | error [ ' + str(e) + ' ]'
applications = self.get_applications_in_page(scroll_script)
pass
else:
print('fail : url [ ' + self.url + ' ] | error [ ' + str(e) + ' ]')
pass
pass
return applications
pass
示例4: LegacySensCritique
# 需要导入模块: from selenium.webdriver import PhantomJS [as 别名]
# 或者: from selenium.webdriver.PhantomJS import execute_script [as 别名]
#.........这里部分代码省略.........
def retrieveListByTitle(self, title):
self.to(ListCollectionPage(self._currentUsername))
for l in self.page.lists():
if l.title() == title:
return self.createSCListFromListModule(l)
return None
def retrieveMoviesFromList(self, l : SCList):
self.to(ListPage(l))
for movie in self.page.movies():
yield movie
def createList(self, l : SCList):
self.to(ListCollectionPage(self._currentUsername))
self.page.create_list_button().click()
self.page.new_list_title().send_keys(l.title())
self.page.film_type_radio().click()
self.page.classic_list_radio().click()
self.page.public_list_radio().click()
self.page.confirm_create_list_button().click()
# Change the current page as we are now on the list page
self.page = ListPage(l)
self.page._driver = self.driver # TODO: fixme, we don't want to use self.to(page) as it would reload the page
self.page.set_description(l.description())
url = self.driver.current_url
l._id = url[url.rfind("/") + 1:]
return l
def deleteList(self, l : sclist):
self.to(ListCollectionPage(self._currentUsername))
for module in self.page.lists():
if l.id() in module.url():
# Alert box will be auto-accepted. Needed as Phantomjs cannot handle them
self.driver.execute_script("window.confirm = function(msg) { return true; };")
delete_button = module.delete_button()
delete_action = ActionChains(self.driver)
delete_action.move_to_element(module.title_node())
delete_action.move_to_element(delete_button)
delete_action.click(delete_button)
delete_action.perform()
def addMovie(self, movie: Movie, l : SCList):
self.to(ListPage(l))
self.page.query_input().send_keys(movie.title())
add_button = self.page.add_movie_button(0)
if add_button is None:
return False # Movie already in list
if movie.description():
self.page.movie_description_field(0).send_keys(movie.description())
add_button.click()
return True
def deleteMovies(self, movies_to_delete, l : SCList):
self.to(ListPage(l))
for movie in self.page.movies():
try:
movies_to_delete.remove(movie.title())
delete = movie.delete_button()
delete.click()
movie.confirm_delete_button().click()
self.page.wait_loading_finished()
except Exception as e:
logging.error("Fail to delete movie " + movie.title() + ". " + format(e))
return movies_to_delete
def to(self, page):
page.to(self.driver)
self.page = page
def createSCListFromListModule(self, module : ListModule):
list = sclist.SCList(module.id())
list.setTitle(module.title())
list.setDescription(module.description())
list.setType(None) # TODO: parse the type
return list
示例5: get_url_files
# 需要导入模块: from selenium.webdriver import PhantomJS [as 别名]
# 或者: from selenium.webdriver.PhantomJS import execute_script [as 别名]
def get_url_files(retail, invoice_doc_type, invoice_id, invoice_date, invoice_amount):
retail_invoice_url = RETAIL_INVOICE_URL[retail]
driver = PhantomJS()
driver.get(retail_invoice_url)
# 1 Set doc_type 'select'
try:
select_doc_type = Select(driver.find_element_by_name('txtTipoDte'))
value = RETAIL_INVOICE_DOC_TYPES[retail][invoice_doc_type]['value']
select_doc_type.select_by_value(value)
# name = RETAIL_INVOICE_DOC_TYPES[retail][invoice_doc_type]['name']
# select_doc_type.select_by_visible_text(name)
except Exception:
print 'ERROR: set doc_type select as Boleta'
driver.save_screenshot('screen.png')
return '', ''
time.sleep(5)
# 2 Get recaptcha img url
try:
recaptcha_img = driver.find_element_by_id('recaptcha_challenge_image')
recaptcha_img_url = recaptcha_img.get_attribute('src')
except Exception:
print 'ERROR: get recaptcha image url'
driver.save_screenshot('screen.png')
return '', ''
# 3 Solve recaptcha
v = VisionApi()
recaptcha_value = v.detect_text_from_url(recaptcha_img_url)
if recaptcha_value is None:
print 'ERROR: solving recaptcha image'
driver.save_screenshot('screen.png')
return '', ''
# 4 Fill form
script = u"""
document.getElementsByName('txtFolio')[0].value = '{invoice_id}';
document.getElementsByName('txtFechaEmision')[0].value = '{invoice_date}';
document.getElementsByName('txtMontoTotal')[0].value = '{invoice_amount}';
document.getElementsByName('recaptcha_response_field')[0].value = '{recaptcha_value}';
""".format(
invoice_id=invoice_id,
invoice_date=invoice_date,
invoice_amount=invoice_amount,
recaptcha_value=recaptcha_value,
)
driver.execute_script(script)
# 5 Submit form
try:
driver.find_element_by_name('frmDatos').submit()
except Exception:
print 'ERROR: submitting form'
driver.save_screenshot('screen.png')
return '', ''
# 6 Get url files
try:
xml_a_tag = driver.find_element_by_xpath('//*[@id="Tabla_01"]/tbody/tr[1]/td[2]/p/a[2]')
pdf_a_tag = driver.find_element_by_xpath('//*[@id="Tabla_01"]/tbody/tr[1]/td[2]/p/a[1]')
xml_url = xml_a_tag.get_attribute('href')
pdf_url = pdf_a_tag.get_attribute('href')
except Exception:
print 'ERROR: getting url files'
driver.save_screenshot('screen.png')
return '', ''
# 8 Delete driver session
driver.close()
driver.quit()
return xml_url, pdf_url