当前位置: 首页>>代码示例>>Python>>正文


Python PhantomJS.execute_script方法代码示例

本文整理汇总了Python中selenium.webdriver.PhantomJS.execute_script方法的典型用法代码示例。如果您正苦于以下问题:Python PhantomJS.execute_script方法的具体用法?Python PhantomJS.execute_script怎么用?Python PhantomJS.execute_script使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在selenium.webdriver.PhantomJS的用法示例。


在下文中一共展示了PhantomJS.execute_script方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: generate_image

# 需要导入模块: from selenium.webdriver import PhantomJS [as 别名]
# 或者: from selenium.webdriver.PhantomJS import execute_script [as 别名]
def generate_image(structure):
    image_path = os.path.join(mkdtemp(), 'okc.png')
    html_path = os.path.join(
        os.path.dirname(os.path.realpath(__file__)),
        'okc.html',
    )
    url = 'file://{}'.format(html_path)
    driver = PhantomJS(service_log_path=mkstemp()[1])
    driver.set_window_size(2000, 500)
    driver.get(url)
    driver.execute_script('setText({});'.format(json.dumps(structure)))

    if random() > 0.4:
        driver.execute_script('hideForm();')
    elif random() > 0.5:
        driver.execute_script('uncheckForm();')

    driver.set_window_size(*driver.execute_script('return getSize();'))
    driver.save_screenshot(image_path)

    # twitter's gonna make our beautiful screenshot a jpeg unless we make it
    # think that we're using transparency for a reason, so,,
    img = Image.open(image_path)
    origin = img.getpixel((0, 0))
    new_origin = origin[:3] + (254,)
    img.putpixel((0, 0), new_origin)
    img.save(image_path)

    subprocess.check_call(['optipng', '-quiet', image_path])

    return image_path
开发者ID:tinruufu,项目名称:ablankblank,代码行数:33,代码来源:image.py

示例2: PagesCrawler

# 需要导入模块: from selenium.webdriver import PhantomJS [as 别名]
# 或者: from selenium.webdriver.PhantomJS import execute_script [as 别名]
class PagesCrawler(BaseSpider):

    name = 'pages'
    link_extractor = RegexpLinkExtractor(canonicalize=False, deny_extensions=[])
    ignored_exts = set(['.' + e for e in IGNORED_EXTENSIONS])

    def __init__(self, **kw):
        args = DEFAULT_INPUT.copy()
        args.update(kw)
        self.args = args
        self.start_urls = to_list(args['start_urls'])
        self.maxdepth = int(args['maxdepth'])
        self.follow_prefixes = to_list(args['follow_prefixes'])
        self.nofollow_prefixes = to_list(args['nofollow_prefixes'])
        self.discover_prefixes = [url_to_lru_clean("http%s://%s" % (https, u.replace('http://', '').replace('https://', ''))) for u in to_list(args['discover_prefixes']) for https in ['', 's']]
        self.resolved_links = {}
        self.user_agent = args['user_agent']
        self.phantom = 'phantom' in args and args['phantom'] and args['phantom'].lower() != "false"
        if self.phantom:
            self.ph_timeout = int(args.get('phantom_timeout', PHANTOM['TIMEOUT']))
            self.ph_idle_timeout = int(args.get('phantom_idle_timeout', PHANTOM['IDLE_TIMEOUT']))
            self.ph_ajax_timeout = int(args.get('phantom_ajax_timeout', PHANTOM['AJAX_TIMEOUT']))
        self.errors = 0
        dispatcher.connect(self.closed, spider_closed)
        dispatcher.connect(self.crashed, spider_error)

    def start_requests(self):
        self.log("Starting crawl task - jobid: %s" % self.crawler.settings['JOBID'], log.INFO)
        self.log("ARGUMENTS : "+str(self.args), log.INFO)
        if self.phantom:
            self.init_phantom()
        for url in self.start_urls:
            yield self._request(url)

    def init_phantom(self):
        self.prefixfiles = os.path.join(
            scrapyd_config().get('logs_dir'),
            HYPHE_PROJECT,
            self.name,
            self.crawler.settings['JOBID']
        )
        self.log("Using path %s for PhantomJS crawl" % self.prefixfiles, log.INFO)
        phantom_args = []
        if PROXY and not PROXY.startswith(':'):
            phantom_args.append('--proxy=%s' % PROXY)
        phantom_args.append('--cookies-file=%s-phantomjs-cookie.txt' % self.prefixfiles)
        phantom_args.append('--ignore-ssl-errors=true')
        phantom_args.append('--load-images=false')
        self.capabilities = dict(DesiredCapabilities.PHANTOMJS)
        self.capabilities['phantomjs.page.settings.userAgent'] = self.user_agent
        self.capabilities['takesScreenshot'] = False
        self.capabilities['phantomjs.page.settings.javascriptCanCloseWindows'] = False
        self.capabilities['phantomjs.page.settings.javascriptCanOpenWindows'] = False
        self.phantom = PhantomJS(
            executable_path=PHANTOM['PATH'],
            service_args=phantom_args,
            desired_capabilities=self.capabilities,
            service_log_path="%s-phantomjs.log" % self.prefixfiles
        )
        self.phantom.implicitly_wait(10)
        self.phantom.set_page_load_timeout(60)
        self.phantom.set_script_timeout(self.ph_timeout + 15)

    def crashed(self, spider):
        self.errors += 1
        self.closed("CRASH")

    def closed(self, reason):
        if self.errors:
            self.log("%s error%s encountered during the crawl." %
                (self.errors, 's' if self.errors > 1 else ''), log.ERROR)
        if self.phantom:
            self.phantom.quit()
            if not self.errors:
                for f in ["phantomjs-cookie.txt", "phantomjs.log"]:
                    fi = "%s-%s" % (self.prefixfiles, f)
                    if os.path.exists(fi) and not self.errors:
                        os.remove(fi)

    def handle_response(self, response):
        lru = url_to_lru_clean(response.url)

        if self.phantom:
            self.phantom.get(response.url)

          # Collect whole DOM of the webpage including embedded iframes
            with open(os.path.join(PHANTOM["JS_PATH"], "get_iframes_content.js")) as js:
                get_bod_w_iframes = js.read()
            bod_w_iframes = self.phantom.execute_script(get_bod_w_iframes)
            response._set_body(bod_w_iframes.encode('utf-8'))

          # Try to scroll and unfold page
            self.log("Start PhantomJS scrolling and unfolding", log.INFO)
            with open(os.path.join(PHANTOM["JS_PATH"], "scrolldown_and_unfold.js")) as js:
                try:
                    signal.signal(signal.SIGALRM, timeout_alarm)
                    signal.alarm(self.ph_timeout + 30)
                    timedout = self.phantom.execute_async_script(
                        js.read(), self.ph_timeout,
                        self.ph_idle_timeout, self.ph_ajax_timeout)
#.........这里部分代码省略.........
开发者ID:SciencesPoDRIS,项目名称:hyphe,代码行数:103,代码来源:pages.py

示例3: get_applications_in_page

# 需要导入模块: from selenium.webdriver import PhantomJS [as 别名]
# 或者: from selenium.webdriver.PhantomJS import execute_script [as 别名]
    def get_applications_in_page(self, scroll_script):
        applications = []
        driver = None
        try:
            desired_capabilities = dict(DesiredCapabilities.PHANTOMJS)
            desired_capabilities["phantomjs.page.settings.userAgent"] = useragent.get_random_agent(google_prop.user_agent_list_url)
            service_args = ['--load-images=no', '--proxy=%s' % (proxy.get_random_proxy(google_prop.proxy_list_url))]
            driver = PhantomJS(desired_capabilities=desired_capabilities, service_args=service_args)
            # driver = Firefox(firefox_profile=self.fp, proxy=self.proxy)

            if self.proxy_test:
                driver.get('http://curlmyip.com/')
                ip = driver.find_element_by_xpath('//body//pre').text
                print('ip : [ ' + ip + ' ]')
                pass
            else:
                driver.get(self.url)
                driver.execute_script(scroll_script)

                acknowledge = 0
                done = False
                while not done:
                    scroll_finished = driver.execute_script("return scraperLoadCompleted")
                    if scroll_finished:
                        if acknowledge == self.acknowledgements:
                            done = driver.execute_script("return scraperLoadCompleted")
                            pass
                        else:
                            acknowledge += 1
                            pass
                        pass
                    else:
                        acknowledge = 0
                        pass
                    time.sleep(5)  # Wait before retry
                    pass

                product_matrix = driver.find_elements_by_class_name("card")
                for application in product_matrix:
                    extracted_application = self.extract_application_data(application)
                    # if extracted_application['app_price'] != -1:
                    applications.append(extracted_application)
                    #pass
                    pass
                pass
            driver.quit()
            pass

        except Exception as e:
            if driver is not None:
                driver.quit()
                pass

            if self.attempt < self.retries:
                self.attempt += 1
                time.sleep(10)
                print 'retry : url [ ' + self.url + ' ] + | attempt [ ' + str(self.attempt) + ' ] | error [ ' + str(e) + ' ]'
                applications = self.get_applications_in_page(scroll_script)
                pass
            else:
                print('fail : url [ ' + self.url + ' ] | error [ ' + str(e) + ' ]')
                pass
            pass
        return applications
        pass
开发者ID:elharo,项目名称:google-play-service,代码行数:67,代码来源:indexer.py

示例4: LegacySensCritique

# 需要导入模块: from selenium.webdriver import PhantomJS [as 别名]
# 或者: from selenium.webdriver.PhantomJS import execute_script [as 别名]

#.........这里部分代码省略.........
    def retrieveListByTitle(self, title):
        self.to(ListCollectionPage(self._currentUsername))

        for l in self.page.lists():
            if l.title() == title:
                return self.createSCListFromListModule(l)

        return None

    def retrieveMoviesFromList(self, l : SCList):
        self.to(ListPage(l))

        for movie in self.page.movies():
            yield movie

    def createList(self, l : SCList):
        self.to(ListCollectionPage(self._currentUsername))

        self.page.create_list_button().click()

        self.page.new_list_title().send_keys(l.title())
        self.page.film_type_radio().click()
        self.page.classic_list_radio().click()
        self.page.public_list_radio().click()

        self.page.confirm_create_list_button().click()

        # Change the current page as we are now on the list page
        self.page = ListPage(l)
        self.page._driver = self.driver  # TODO: fixme, we don't want to use self.to(page) as it would reload the page

        self.page.set_description(l.description())

        url = self.driver.current_url
        l._id = url[url.rfind("/") + 1:]

        return l

    def deleteList(self, l : sclist):
        self.to(ListCollectionPage(self._currentUsername))

        for module in self.page.lists():
            if l.id() in module.url():

                # Alert box will be auto-accepted. Needed as Phantomjs cannot handle them
                self.driver.execute_script("window.confirm = function(msg) { return true; };")

                delete_button = module.delete_button()

                delete_action = ActionChains(self.driver)
                delete_action.move_to_element(module.title_node())
                delete_action.move_to_element(delete_button)
                delete_action.click(delete_button)

                delete_action.perform()

    def addMovie(self, movie: Movie, l : SCList):
        self.to(ListPage(l))

        self.page.query_input().send_keys(movie.title())

        add_button = self.page.add_movie_button(0)
        if add_button is None:
            return False  # Movie already in list

        if movie.description():
            self.page.movie_description_field(0).send_keys(movie.description())

        add_button.click()
        return True

    def deleteMovies(self, movies_to_delete, l : SCList):
        self.to(ListPage(l))

        for movie in self.page.movies():
            try:
                movies_to_delete.remove(movie.title())

                delete = movie.delete_button()
                delete.click()

                movie.confirm_delete_button().click()
                self.page.wait_loading_finished()
            except Exception as e:
                logging.error("Fail to delete movie " + movie.title() + ". " + format(e))

        return movies_to_delete

    def to(self, page):
        page.to(self.driver)
        self.page = page

    def createSCListFromListModule(self, module : ListModule):
        list = sclist.SCList(module.id())

        list.setTitle(module.title())
        list.setDescription(module.description())
        list.setType(None)  # TODO: parse the type

        return list
开发者ID:AltarBeastiful,项目名称:rateItSeven,代码行数:104,代码来源:legacysenscritique.py

示例5: get_url_files

# 需要导入模块: from selenium.webdriver import PhantomJS [as 别名]
# 或者: from selenium.webdriver.PhantomJS import execute_script [as 别名]
def get_url_files(retail, invoice_doc_type, invoice_id, invoice_date, invoice_amount):
    retail_invoice_url = RETAIL_INVOICE_URL[retail]

    driver = PhantomJS()
    driver.get(retail_invoice_url)

    # 1 Set doc_type 'select'
    try:
        select_doc_type = Select(driver.find_element_by_name('txtTipoDte'))
        value = RETAIL_INVOICE_DOC_TYPES[retail][invoice_doc_type]['value']
        select_doc_type.select_by_value(value)
        # name = RETAIL_INVOICE_DOC_TYPES[retail][invoice_doc_type]['name']
        # select_doc_type.select_by_visible_text(name)
    except Exception:
        print 'ERROR: set doc_type select as Boleta'
        driver.save_screenshot('screen.png')
        return '', ''

    time.sleep(5)

    # 2 Get recaptcha img url
    try:
        recaptcha_img = driver.find_element_by_id('recaptcha_challenge_image')
        recaptcha_img_url = recaptcha_img.get_attribute('src')
    except Exception:
        print 'ERROR: get recaptcha image url'
        driver.save_screenshot('screen.png')
        return '', ''

    # 3 Solve recaptcha
    v = VisionApi()
    recaptcha_value = v.detect_text_from_url(recaptcha_img_url)

    if recaptcha_value is None:
        print 'ERROR: solving recaptcha image'
        driver.save_screenshot('screen.png')
        return '', ''

    # 4 Fill form
    script = u"""
        document.getElementsByName('txtFolio')[0].value = '{invoice_id}';
        document.getElementsByName('txtFechaEmision')[0].value = '{invoice_date}';
        document.getElementsByName('txtMontoTotal')[0].value = '{invoice_amount}';
        document.getElementsByName('recaptcha_response_field')[0].value = '{recaptcha_value}';
    """.format(
        invoice_id=invoice_id,
        invoice_date=invoice_date,
        invoice_amount=invoice_amount,
        recaptcha_value=recaptcha_value,
    )
    driver.execute_script(script)

    # 5 Submit form
    try:
        driver.find_element_by_name('frmDatos').submit()
    except Exception:
        print 'ERROR: submitting form'
        driver.save_screenshot('screen.png')
        return '', ''

    # 6 Get url files
    try:
        xml_a_tag = driver.find_element_by_xpath('//*[@id="Tabla_01"]/tbody/tr[1]/td[2]/p/a[2]')
        pdf_a_tag = driver.find_element_by_xpath('//*[@id="Tabla_01"]/tbody/tr[1]/td[2]/p/a[1]')

        xml_url = xml_a_tag.get_attribute('href')
        pdf_url = pdf_a_tag.get_attribute('href')
    except Exception:
        print 'ERROR: getting url files'
        driver.save_screenshot('screen.png')
        return '', ''

    # 8 Delete driver session
    driver.close()
    driver.quit()

    return xml_url, pdf_url
开发者ID:msolorzano,项目名称:ret-bot,代码行数:79,代码来源:scraper.py


注:本文中的selenium.webdriver.PhantomJS.execute_script方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。