当前位置: 首页>>代码示例>>Python>>正文


Python logging.__函数代码示例

本文整理汇总了Python中wpull.backport.logging.__函数的典型用法代码示例。如果您正苦于以下问题:Python __函数的具体用法?Python __怎么用?Python __使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了__函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: snapshot

    def snapshot(self, remote, html_path=None, render_path=None):
        '''Take HTML and PDF snapshot.'''
        content = yield remote.eval('page.content')
        url = yield remote.eval('page.url')

        if html_path:
            _logger.debug(__('Saving snapshot to {0}.', html_path))
            dir_path = os.path.abspath(os.path.dirname(html_path))

            if not os.path.exists(dir_path):
                os.makedirs(dir_path)

            with open(html_path, 'wb') as out_file:
                out_file.write(content.encode('utf-8'))

            if self._warc_recorder:
                self._add_warc_snapshot(html_path, 'text/html', url)

        if render_path:
            _logger.debug(__('Saving snapshot to {0}.', render_path))
            yield remote.call('page.render', render_path)

            if self._warc_recorder:
                self._add_warc_snapshot(render_path, 'application/pdf', url)

        raise tornado.gen.Return(content)
开发者ID:mback2k,项目名称:wpull,代码行数:26,代码来源:processor.py

示例2: process

    def process(self, session: AppSession):
        self._debug_log_registered_hooks(session)
        internal_plugin_path = get_package_filename(os.path.join('application', 'plugins'))
        plugin_locations = [internal_plugin_path]

        plugin_filenames = []

        if session.args.plugin_script:
            plugin_filenames.append(session.args.plugin_script)

        locator = PluginLocator(plugin_locations, plugin_filenames)

        session.plugin_manager = PluginManager(plugin_locator=locator)
        session.plugin_manager.collectPlugins()

        for plugin_info in session.plugin_manager.getAllPlugins():
            if plugin_info.path.startswith(internal_plugin_path):
                _logger.debug(__(
                    _('Found plugin {name} from {filename}.'),
                    filename=plugin_info.path,
                    name=plugin_info.name
                ))
            else:
                _logger.info(__(
                    _('Found plugin {name} from {filename}.'),
                    filename=plugin_info.path,
                    name=plugin_info.name
                ))

            plugin_info.plugin_object.app_session = session

            if plugin_info.plugin_object.should_activate():
                session.plugin_manager.activatePluginByName(plugin_info.name)
                self._connect_plugin_hooks(session, plugin_info.plugin_object)
开发者ID:Super-Rad,项目名称:wpull,代码行数:34,代码来源:plugin.py

示例3: _polling_sleep

    def _polling_sleep(cls, resource_monitor, log=False):
        for counter in itertools.count():
            resource_info = resource_monitor.check()

            if not resource_info:
                if log and counter:
                    _logger.info(_('Situation cleared.'))

                break

            if log and counter % 15 == 0:
                if resource_info.path:
                    _logger.warning(__(
                        _('Low disk space on {path} ({size} free).'),
                        path=resource_info.path,
                        size=wpull.string.format_size(resource_info.free)
                    ))
                else:
                    _logger.warning(__(
                        _('Low memory ({size} free).'),
                        size=wpull.string.format_size(resource_info.free)
                    ))

                _logger.warning(_('Waiting for operator to clear situation.'))

            yield from asyncio.sleep(60)
开发者ID:Super-Rad,项目名称:wpull,代码行数:26,代码来源:resmon.py

示例4: _scrape_document

    def _scrape_document(self, request, response, url_item):
        to_native = self.to_script_native_type
        url_info_dict = to_native(request.url_info.to_dict())
        document_info_dict = to_native(response.body.to_dict())
        filename = to_native(response.body.content_file.name)

        new_url_dicts = self.callbacks.get_urls(
            filename, url_info_dict, document_info_dict)

        _logger.debug(__('Hooked scrape returned {0}', new_url_dicts))

        if not new_url_dicts:
            return

        if to_native(1) in new_url_dicts:
            # Lua doesn't have sequences
            for i in itertools.count(1):
                new_url_dict = new_url_dicts[to_native(i)]

                _logger.debug(__('Got lua new url info {0}', new_url_dict))

                if new_url_dict is None:
                    break

                self._add_hooked_url(url_item, new_url_dict)
        else:
            for new_url_dict in new_url_dicts:
                self._add_hooked_url(url_item, new_url_dict)
开发者ID:mback2k,项目名称:wpull,代码行数:28,代码来源:hook.py

示例5: _check_resource_monitor

    def _check_resource_monitor(self):
        if not self._resource_monitor:
            return

        for counter in itertools.count():
            resource_info = self._resource_monitor.check()

            if not resource_info:
                if counter:
                    _logger.info(_('Situation cleared.'))
                break

            if counter % 15 == 0:
                if resource_info.path:
                    _logger.warning(__(
                        _('Low disk space on {path} ({size} free).'),
                        path=resource_info.path,
                        size=wpull.string.format_size(resource_info.free)
                    ))
                else:
                    _logger.warning(__(
                        _('Low memory ({size} free).'),
                        size=wpull.string.format_size(resource_info.free)
                    ))

                _logger.warning(_('Waiting for operator to clear situation.'))

            yield From(trollius.sleep(60))
开发者ID:Willianvdv,项目名称:wpull,代码行数:28,代码来源:engine.py

示例6: _read_input_urls

    def _read_input_urls(cls, session: AppSession, default_scheme='http'):
        '''Read the URLs provided by the user.'''

        url_string_iter = session.args.urls or ()
        # FIXME: url rewriter isn't created yet
        url_rewriter = session.factory.get('URLRewriter')

        if session.args.input_file:
            if session.args.force_html:
                lines = cls._input_file_as_html_links(session)
            else:
                lines = cls._input_file_as_lines(session)

            url_string_iter = itertools.chain(url_string_iter, lines)

        base_url = session.args.base

        for url_string in url_string_iter:
            _logger.debug(__('Parsing URL {0}', url_string))

            if base_url:
                url_string = wpull.url.urljoin(base_url, url_string)

            url_info = wpull.url.URLInfo.parse(
                url_string, default_scheme=default_scheme)

            _logger.debug(__('Parsed URL {0}', url_info))

            if url_rewriter:
                # TODO: this logic should be a hook
                url_info = url_rewriter.rewrite(url_info)
                _logger.debug(__('Rewritten URL {0}', url_info))

            yield url_info
开发者ID:Super-Rad,项目名称:wpull,代码行数:34,代码来源:database.py

示例7: _make_socket

    def _make_socket(self):
        '''Make and wrap the socket with an IOStream.'''
        host, port = self._original_address

        family, self._resolved_address = yield self._resolver.resolve(
            host, port)

        self._socket = socket.socket(family, socket.SOCK_STREAM)

        _logger.debug(__('Socket to {0}/{1}.', family, self._resolved_address))

        if self._params.bind_address:
            _logger.debug(__(
                'Binding socket to {0}', self._params.bind_address
            ))
            self._socket.bind(self._params.bind_address)

        if self._ssl:
            self._io_stream = SSLIOStream(
                self._socket,
                max_buffer_size=self._params.buffer_size,
                rw_timeout=self._params.read_timeout,
                ssl_options=self._params.ssl_options or {},
                server_hostname=host,
            )
        else:
            self._io_stream = IOStream(
                self._socket,
                rw_timeout=self._params.read_timeout,
                max_buffer_size=self._params.buffer_size,
            )

        self._io_stream.set_close_callback(self._stream_closed_callback)
开发者ID:nwpu063291,项目名称:wpull,代码行数:33,代码来源:connection.py

示例8: resolve_all

    def resolve_all(self, host, port=0):
        '''Resolve hostname and return a list of results.

        Args:
            host (str): The hostname.
            port (int): The port number.

        Returns:
            list: A list of tuples where each tuple contains the family and
            the socket address. See :method:`resolve` for the socket address
            format.
        '''
        _logger.debug(__('Lookup address {0} {1}.', host, port))

        host = self._lookup_hook(host, port)
        results = None

        if self._cache:
            results = self._get_cache(host, port, self._family)

        if results is None:
            results = yield From(self._resolve_from_network(host, port))

        if self._cache:
            self._put_cache(host, port, results)

        if not results:
            raise DNSNotFound(
                "DNS resolution for {0} did not return any results."
                .format(repr(host))
            )

        _logger.debug(__('Resolved addresses: {0}.', results))

        raise Return(results)
开发者ID:Willianvdv,项目名称:wpull,代码行数:35,代码来源:dns.py

示例9: process

    def process(self, item_session: ItemSession, request, response, file_writer_session):
        '''Process PhantomJS.

        Coroutine.
        '''
        if response.status_code != 200:
            return

        if not HTMLReader.is_supported(request=request, response=response):
            return

        _logger.debug('Starting PhantomJS processing.')

        self._file_writer_session = file_writer_session

        # FIXME: this is a quick hack for crashes. See #137.
        attempts = int(os.environ.get('WPULL_PHANTOMJS_TRIES', 5))

        for dummy in range(attempts):
            try:
                yield from self._run_driver(item_session, request, response)
            except asyncio.TimeoutError:
                _logger.warning(_('Waiting for page load timed out.'))
                break
            except PhantomJSCrashed as error:
                _logger.exception(__('PhantomJS crashed: {}', error))
            else:
                break
        else:
            _logger.warning(__(
                _('PhantomJS failed to fetch ‘{url}’. I am sorry.'),
                url=request.url_info.url
            ))
开发者ID:Super-Rad,项目名称:wpull,代码行数:33,代码来源:phantomjs.py

示例10: control

    def control(self, remote):
        '''Scroll the page.'''
        num_scrolls = self._num_scrolls

        if self._smart_scroll:
            is_page_dynamic = yield remote.call('isPageDynamic')

            if not is_page_dynamic:
                num_scrolls = 0

        url = yield remote.eval('page.url')
        total_scroll_count = 0

        for scroll_count in range(num_scrolls):
            _logger.debug(__('Scrolling page. Count={0}.', scroll_count))

            pre_scroll_counter_values = remote.resource_counter.values()

            scroll_position = yield remote.eval('page.scrollPosition')
            scroll_position['top'] += self._viewport_size[1]

            yield self.scroll_to(remote, 0, scroll_position['top'])

            total_scroll_count += 1

            self._log_action('wait', self._wait_time)
            yield wpull.async.sleep(self._wait_time)

            post_scroll_counter_values = remote.resource_counter.values()

            _logger.debug(__(
                'Counter values pre={0} post={1}',
                pre_scroll_counter_values,
                post_scroll_counter_values
            ))

            if post_scroll_counter_values == pre_scroll_counter_values \
               and self._smart_scroll:
                break

        for dummy in range(remote.resource_counter.pending):
            if remote.resource_counter.pending:
                self._log_action('wait', self._wait_time)
                yield wpull.async.sleep(self._wait_time)
            else:
                break

        yield self.scroll_to(remote, 0, 0)

        _logger.info(__(
            gettext.ngettext(
                'Scrolled page {num} time.',
                'Scrolled page {num} times.',
                total_scroll_count,
            ), num=total_scroll_count
        ))

        if self._warc_recorder:
            self._add_warc_action_log(url)
开发者ID:mback2k,项目名称:wpull,代码行数:59,代码来源:processor.py

示例11: run

    def run(self):
        scrape_snapshot_path = self._get_temp_path('phantom', suffix='.html')
        action_log_path = self._get_temp_path('phantom-action', suffix='.txt')
        event_log_path = self._get_temp_path('phantom-event', suffix='.txt')
        snapshot_paths = [scrape_snapshot_path]
        snapshot_paths.extend(self._get_snapshot_paths())
        url = self._item_session.url_record.url

        driver_params = PhantomJSDriverParams(
            url=url,
            snapshot_paths=snapshot_paths,
            wait_time=self._params.wait_time,
            num_scrolls=self._params.num_scrolls,
            smart_scroll=self._params.smart_scroll,
            snapshot=self._params.snapshot,
            viewport_size=self._params.viewport_size,
            paper_size=self._params.paper_size,
            event_log_filename=event_log_path,
            action_log_filename=action_log_path,
            custom_headers=self._params.custom_headers,
            page_settings=self._params.page_settings,
        )

        driver = self._phantomjs_driver_factory(params=driver_params)

        _logger.info(__(
            _('PhantomJS fetching ‘{url}’.'),
            url=url
        ))

        with contextlib.closing(driver):
            yield from driver.start()

            # FIXME: we don't account that things might be scrolling and
            # downloading so it might not be a good idea to timeout like
            # this
            if self._params.load_time:
                yield from asyncio.wait_for(
                    driver.process.wait(), self._params.load_time
                )
            else:
                yield from driver.process.wait()

            if driver.process.returncode != 0:
                raise PhantomJSCrashed(
                    'PhantomJS exited with code {}'
                    .format(driver.process.returncode)
                )

        if self._warc_recorder:
            self._add_warc_action_log(action_log_path, url)
            for path in snapshot_paths:
                self._add_warc_snapshot(path, url)

        _logger.info(__(
            _('PhantomJS fetched ‘{url}’.'),
            url=url
        ))
开发者ID:Super-Rad,项目名称:wpull,代码行数:58,代码来源:phantomjs.py

示例12: write_record

    def write_record(self, record):
        '''Append the record to the WARC file.'''
        # FIXME: probably not a good idea to modifiy arguments passed to us
        # TODO: add extra gzip headers that wget uses
        record.fields['WARC-Warcinfo-ID'] = self._warcinfo_record.fields[
            WARCRecord.WARC_RECORD_ID]

        _logger.debug(__('Writing WARC record {0}.',
                         record.fields['WARC-Type']))

        if self._params.compress:
            open_func = gzip.GzipFile
        else:
            open_func = open

        # Use getsize to get actual file size. Avoid tell() because it may
        # not be the raw file position.
        if os.path.exists(self._warc_filename):
            before_offset = os.path.getsize(self._warc_filename)
        else:
            before_offset = 0

        journal_filename = self._warc_filename + '-wpullinc'

        with open(journal_filename, 'w') as file:
            file.write('wpull-journal-version:1\n')
            file.write('offset:{}\n'.format(before_offset))

        try:
            with open_func(self._warc_filename, mode='ab') as out_file:
                for data in record:
                    out_file.write(data)
        except (OSError, IOError) as error:
            _logger.info(__(
                _('Rolling back file {filename} to length {length}.'),
                filename=self._warc_filename, length=before_offset
            ))
            with open(self._warc_filename, mode='wb') as out_file:
                out_file.truncate(before_offset)

            raise error
        finally:
            os.remove(journal_filename)

        after_offset = os.path.getsize(self._warc_filename)

        if self._cdx_filename:
            raw_file_offset = before_offset
            raw_file_record_size = after_offset - before_offset

            self._write_cdx_field(
                record, raw_file_record_size, raw_file_offset
            )
开发者ID:asergi,项目名称:wpull,代码行数:53,代码来源:warc.py

示例13: _load_ca_certs

    def _load_ca_certs(cls, session: AppSession, clean: bool=True):
        '''Load the Certificate Authority certificates.
        '''
        args = session.args

        if session.ca_certs_filename:
            return session.ca_certs_filename

        certs = set()

        if args.use_internal_ca_certs:
            pem_filename = os.path.join(
                os.path.dirname(__file__), '..', '..', 'cert', 'ca-bundle.pem'
            )
            certs.update(cls._read_pem_file(pem_filename, from_package=True))

        if args.ca_directory:
            if os.path.isdir(args.ca_directory):
                for filename in os.listdir(args.ca_directory):
                    if os.path.isfile(filename):
                        certs.update(cls._read_pem_file(filename))
            else:
                _logger.warning(__(
                    _('Certificate directory {path} does not exist.'),
                    path=args.ca_directory
                ))

        if args.ca_certificate:
            if os.path.isfile(args.ca_certificate):
                certs.update(cls._read_pem_file(args.ca_certificate))
            else:
                _logger.warning(__(
                    _('Certificate file {path} does not exist.'),
                    path=args.ca_certificate
                ))

        session.ca_certs_filename = certs_filename = tempfile.mkstemp(
            suffix='.pem', prefix='tmp-wpull-')[1]

        def clean_certs_file():
            os.remove(certs_filename)

        if clean:
            atexit.register(clean_certs_file)

        with open(certs_filename, 'w+b') as certs_file:
            for cert in certs:
                certs_file.write(cert)

        _logger.debug('CA certs loaded.')
开发者ID:Super-Rad,项目名称:wpull,代码行数:50,代码来源:sslcontext.py

示例14: _process_url_item

    def _process_url_item(self, url_item):
        '''Process an item.

        Args:
            url_item (:class:`.item.URLItem`): The item to process.

        This function calls :meth:`.processor.BaseProcessor.process`.
        '''
        _logger.debug(__('Begin session for {0} {1}.',
                         url_item.url_record, url_item.url_info))

        yield self._processor.process(url_item)

        _logger.debug(__('End session for {0} {1}.',
                         url_item.url_record, url_item.url_info))
开发者ID:mback2k,项目名称:wpull,代码行数:15,代码来源:engine.py

示例15: _read_content

    def _read_content(self, response, original_url_info):
        '''Read response and parse the contents into the pool.'''
        data = response.body.read(4096)
        url_info = original_url_info

        try:
            self._robots_txt_pool.load_robots_txt(url_info, data)
        except ValueError:
            _logger.warning(__(
                _('Failed to parse {url} for robots exclusion rules. '
                  'Ignoring.'), url_info.url))
            self._accept_as_blank(url_info)
        else:
            _logger.debug(__('Got a good robots.txt for {0}.',
                             url_info.url))
开发者ID:Willianvdv,项目名称:wpull,代码行数:15,代码来源:robots.py


注:本文中的wpull.backport.logging.__函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。