当前位置: 首页>>代码示例>>Python>>正文


Python Queue.empty方法代码示例

本文整理汇总了Python中asyncio.Queue.empty方法的典型用法代码示例。如果您正苦于以下问题:Python Queue.empty方法的具体用法?Python Queue.empty怎么用?Python Queue.empty使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在asyncio.Queue的用法示例。


在下文中一共展示了Queue.empty方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: Cloner

# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import empty [as 别名]
class Cloner(object):
    def __init__(self, root):
        self.visited_urls = []
        self.root = self.add_scheme(root)
        if len(self.root.host) < 4:
            sys.exit('invalid taget {}'.format(self.root.host))
        self.target_path = '/opt/snare/pages/{}'.format(self.root.host)

        if not os.path.exists(self.target_path):
            os.mkdir(self.target_path)

        self.new_urls = Queue()

    @staticmethod
    def add_scheme(url):
        if yarl.URL(url).scheme:
            new_url = yarl.URL(url)
        else:
            new_url = yarl.URL('http://' + url)
        return new_url

    @asyncio.coroutine
    def process_link(self, url, check_host=False):
        url = yarl.URL(url)
        if check_host:
            if (url.host != self.root.host or url.fragment
                            or url in self.visited_urls):
                return None
        if not url.is_absolute():
            url = self.root.join(url)

        yield from self.new_urls.put(url)
        return url.relative().human_repr()

    @asyncio.coroutine
    def replace_links(self, data):
        soup = BeautifulSoup(data, 'html.parser')

        # find all relative links
        for link in soup.findAll(href=True):
            res = yield from self.process_link(link['href'], check_host=True)
            if res is not None:
                link['href'] = res

        # find all images and scripts
        for elem in soup.findAll(src=True):
            res = yield from self.process_link(elem['src'])
            if res is not None:
                elem['src'] = res

        # find all action elements
        for act_link in soup.findAll(action=True):
            res = yield from self.process_link(act_link['action'])
            if res is not None:
                act_link['action'] = res

        # prevent redirects
        for redir in soup.findAll(True, attrs={'name': re.compile('redirect.*')}):
            redir['value'] = yarl.URL(redir['value']).relative().human_repr()

        return soup

    @asyncio.coroutine
    def get_body(self):
        while not self.new_urls.empty():
            current_url = yield from self.new_urls.get()
            if current_url in self.visited_urls:
                continue
            self.visited_urls.append(current_url)
            if current_url.name:
                file_name = current_url.name
            elif current_url.raw_path != '/':
                file_name = current_url.path.rsplit('/')[1]
            else:
                file_name = 'index.html'
            file_path = os.path.dirname(current_url.path)
            if file_path == '/':
                file_path = self.target_path
            else:
                file_path = os.path.join(self.target_path, file_path[1:])

            print('path: ', file_path, 'name: ', file_name)

            if file_path and not os.path.exists(file_path):
                os.makedirs(file_path)

            data = None
            try:
                with aiohttp.Timeout(10.0):
                    with aiohttp.ClientSession() as session:
                        response = yield from session.get(current_url)
                        data = yield from response.read()
            except aiohttp.ClientError as client_error:
                print(client_error)
            else:
                response.release()
                session.close()
            if data is not None:
                if re.match(re.compile('.*\.(html|php)'), file_name):
                    soup = yield from self.replace_links(data)
#.........这里部分代码省略.........
开发者ID:mushorg,项目名称:snare,代码行数:103,代码来源:clone.py

示例2: Cloner

# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import empty [as 别名]

#.........这里部分代码省略.........
        for elem in soup.findAll(src=True):
            res = await self.process_link(elem['src'], level)
            if res is not None:
                elem['src'] = res

        # find all action elements
        for act_link in soup.findAll(action=True):
            res = await self.process_link(act_link['action'], level)
            if res is not None:
                act_link['action'] = res

        # prevent redirects
        for redir in soup.findAll(True, attrs={'name': re.compile('redirect.*')}):
            if redir['value'] != "":
                redir['value'] = yarl.URL(redir['value']).relative().human_repr()

        return soup

    def _make_filename(self, url):
        host = url.host
        if url.is_absolute():
            file_name = url.relative().human_repr()
        else:
            file_name = url.human_repr()
        if not file_name.startswith('/'):
            file_name = "/" + file_name

        if file_name == '/' or file_name == "":
            if host == self.root.host or (self.moved_root is not None and self.moved_root.host == host):
                file_name = '/index.html'
            else:
                file_name = host
        m = hashlib.md5()
        m.update(file_name.encode('utf-8'))
        hash_name = m.hexdigest()
        return file_name, hash_name

    async def get_body(self, session):
        while not self.new_urls.empty():
            current_url, level = await self.new_urls.get()
            if current_url.human_repr() in self.visited_urls:
                continue
            self.visited_urls.append(current_url.human_repr())
            file_name, hash_name = self._make_filename(current_url)
            print('name: ', file_name)
            self.meta[file_name] = {}

            data = None
            content_type = None
            try:
                response = await session.get(current_url, headers={'Accept': 'text/html'}, timeout=10.0)
                content_type = response.content_type
                data = await response.read()

            except (aiohttp.ClientError, asyncio.TimeoutError) as client_error:
                self.logger.error(client_error)
            else:
                await response.release()
            if data is not None:
                self.meta[file_name]['hash'] = hash_name
                self.meta[file_name]['content_type'] = content_type
                if content_type == 'text/html':
                    soup = await self.replace_links(data, level)
                    data = str(soup).encode()
                with open(os.path.join(self.target_path, hash_name), 'wb') as index_fh:
                    index_fh.write(data)
                if content_type == 'text/css':
                    css = cssutils.parseString(data, validate=self.css_validate)
                    for carved_url in cssutils.getUrls(css):
                        if carved_url.startswith('data'):
                            continue
                        carved_url = yarl.URL(carved_url)
                        if not carved_url.is_absolute():
                            carved_url = self.root.join(carved_url)
                        if carved_url.human_repr() not in self.visited_urls:
                            await self.new_urls.put((carved_url, level + 1))

    async def get_root_host(self):
        try:
            async with aiohttp.ClientSession() as session:
                resp = await session.get(self.root)
                if resp.host != self.root.host:
                    self.moved_root = resp.url
                resp.close()
        except aiohttp.ClientError as err:
            self.logger.error("Can\'t connect to target host: %s", err)
            exit(-1)

    async def run(self):
        session = aiohttp.ClientSession()
        try:
            await self.new_urls.put((self.root, 0))
            await self.new_urls.put((self.error_page, 0))
            await self.get_body(session)
        except KeyboardInterrupt:
            raise
        finally:
            with open(os.path.join(self.target_path, 'meta.json'), 'w') as mj:
                json.dump(self.meta, mj)
            await session.close()
开发者ID:afeena,项目名称:snare,代码行数:104,代码来源:cloner.py


注:本文中的asyncio.Queue.empty方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。