当前位置: 首页>>代码示例>>Python>>正文


Python Ghost.load_cookies方法代码示例

本文整理汇总了Python中ghost.Ghost.load_cookies方法的典型用法代码示例。如果您正苦于以下问题:Python Ghost.load_cookies方法的具体用法?Python Ghost.load_cookies怎么用?Python Ghost.load_cookies使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在ghost.Ghost的用法示例。


在下文中一共展示了Ghost.load_cookies方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: WxGhost

# 需要导入模块: from ghost import Ghost [as 别名]
# 或者: from ghost.Ghost import load_cookies [as 别名]
class WxGhost(object):
	def __init__(self):
		self.ghost = Ghost(log_level=logging.CRITICAL).start()
		self.ghost.download_images = False
		try:
		    self.ghost.load_cookies("cookie.txt")
		    print 'load cookie'
		except IOError:
			print 'load cookie error'
		self.ghost.show()

	def handle_frequency(self):
		if u"您的访问过于频繁" in self.ghost.content:
			print 'frequency'
			self.ghost.show()
			self.ghost.capture_to("seccode.png", selector="#seccodeImage")
			self.ghost.wait_for_text(u'以下内容来自微信公众号', timeout=1800)  # 输入验证码
			self.ghost.save_cookies("cookie.txt")

	def open(self, url):
		try:
			self.ghost.open(url)
			self.handle_frequency()
		except TimeoutError:
			print 'timeout when open'
			return False
		return True

	def evaluate(self, js, expect_loading=True):
		try:
			self.ghost.evaluate(js, expect_loading=expect_loading)
			self.handle_frequency()
		except TimeoutError:
			return False
		return True

	def sleep(self, value):
		self.ghost.sleep(value)

	def get_lxml(self):
		return lxml.html.fromstring(self.ghost.content)
开发者ID:surpassly,项目名称:wxghost,代码行数:43,代码来源:wxghost.py

示例2: __init__

# 需要导入模块: from ghost import Ghost [as 别名]
# 或者: from ghost.Ghost import load_cookies [as 别名]
class Crawler:
    def __init__(self,
                 location,
                 cookie_file=None,
                 mainwindow=None):
        self.mainwindow = mainwindow
        self.ghost = Ghost().start()
        self.ghost._confirm_expected = True  #
        self.ghost.wait_timeout = page_timeout
        self.ghost.download_images = False
        if cookie_file != '':
            try:
                self.ghost.load_cookies(cookie_file)
            except IOError:
                self.display("cookie: IOError", '<font color=red>$</font>', 'url')
        self.max_depth = 0
        self.url_queue = []
        self.location = location.split('?')[0]
        # dvwa_security(self.__ghost, 'low')

    def go(self):
        self.display("...crawling", "<b>$<b>", 'url')
        times = 0
        while True:
            try:
                self.ghost.open(self.location)
                current_url, resources = self.ghost.evaluate('window.location.href')  # redirect
                self.location = str(current_url)
                r = urlparse.urlparse(self.location)
                self.host = r.netloc  # slash(r.scheme + "://" + r.netloc)
                self.display(self.location,  "<a href='$'>$<a>", 'url')
                self.url_queue.append(self.location)
                break
            except TimeoutError:
                times = times + 1
            if times == 5:
                self.display("TimeoutError", '<font color=red>$</font>', 'url')
                self.exit()
        self.crawler_page(self.location, 0)  # url, depth
        # Test
        for url in self.url_queue:
            t = Test(self.ghost, url, self.mainwindow)
            t.test()
        self.exit()

    def crawler_page(self, location, depth):
        if depth >= self.max_depth:
            return
        try:
            self.ghost.open(location)
            current_url, resources = self.ghost.evaluate('window.location.href')  # redirect
            location = str(current_url)
        except TimeoutError:
            return
        urls = []
        soup = BeautifulSoup(str(self.ghost.content), from_encoding='utf-8')
        bs_as = soup.find_all('a')
        for a in bs_as:
            url = self.convert_a(location, a)
            if url:
                r = urlparse.urlparse(url)
                host = r.netloc  # slash(r.scheme + "://" + r.netloc)
                if host == self.host and url not in self.url_queue:
                    self.display(url,  "<a href='$'>$<a>", 'url')
                    self.url_queue.append(url)
                    urls.append(url)
        for url in urls:
            self.crawler_page(url, depth + 1)

    def display(self, content, format=None, widget=None):
        print content
        if self.mainwindow:
            self.mainwindow.display(content, format, widget)

    def convert_a(self, location, a):
        if str(type(a)) == "<class 'bs4.element.Tag'>":
            try:
                href = a['href']
            except KeyError:
                return None
        elif str(type(a)) == "<type 'str'>":
            href = a
        else:
            return None  # <type 'unicode'>
        href = href.strip()
        # useless
        if href.lower() in ['javascript:;', "javacript:void(0);", "javascript:void(0)", "javascript:void(0);",
                           'return false;', '/', "http://www", ""]:
            return None
        for s in ['mailto:', '#', 'javascript:']:
            if href.lower().startswith(s):
                return None
        # normal
        if href.startswith('http://') or href.startswith('https://'):
            return href
        # path
        if href.startswith("//"):
            href = "http:" + href  # //www.baidu.com/s
        elif href.startswith("/"):
            href = self.host + href[1:]
#.........这里部分代码省略.........
开发者ID:surpassly,项目名称:front,代码行数:103,代码来源:crawler.py


注:本文中的ghost.Ghost.load_cookies方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。