Python proxylist.ProxyList类代码示例

本文整理汇总了Python中grab.proxylist.ProxyList类的典型用法代码示例。如果您正苦于以下问题：Python ProxyList类的具体用法？Python ProxyList怎么用？Python ProxyList使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了ProxyList类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

def main(**kwargs):
    logging.basicConfig(level=logging.DEBUG)

    pl = ProxyList()
    pl.load_file('/web/proxy-us.txt')
    proxy = pl.get_random_proxy()

    urls = [
        'http://jabbim.com',
    ]

    pool = []
    for url in urls:
        pro = Process(target=click, kwargs={'url': url,
                                            'proxy': proxy.get_address()})
        pro._start_time = time.time()
        pro.daemon = True
        pro.start()
        pool.append(pro)

    TIMEOUT = None
    while True:
        if not pro.is_alive():
            break
        else:
            time.sleep(0.5)
            if TIMEOUT and time.time() - pro._start_time > TIMEOUT:
                pro.terminate()

开发者ID:pombredanne，项目名称:wkit，代码行数:28，代码来源:test.py

示例2: init

    def __init__(self, document_body=None,
                 transport=None, **kwargs):
        """
        Create Grab instance
        """

        self.meta = {}
        self._doc = None
        self.config = default_config()
        self.config['common_headers'] = self.common_headers()
        self.cookies = CookieManager()
        self.proxylist = ProxyList()
        self.exception = None

        # makes pylint happy
        self.request_counter = None
        self.request_head = None
        self.request_body = None
        self.request_method = None
        self.transport_param = transport
        self.transport = None

        self.reset()
        if kwargs:
            self.setup(**kwargs)
        if document_body is not None:
            self.setup_document(document_body)

开发者ID:lorien，项目名称:grab，代码行数:27，代码来源:base.py

示例3: load_proxylist

    def load_proxylist(self, source, source_type=None, proxy_type='http',
                       auto_init=True, auto_change=True,
                       **kwargs):
        self.proxylist = ProxyList()
        if isinstance(source, BaseProxySource):
            self.proxylist.set_source(source)
        elif isinstance(source, six.string_types):
            if source_type == 'text_file':
                self.proxylist.load_file(source, proxy_type=proxy_type)
            elif source_type == 'url':
                self.proxylist.load_url(source, proxy_type=proxy_type)
            else:
                raise SpiderMisuseError('Method `load_proxylist` received '
                                        'invalid `source_type` argument: %s'
                                        % source_type) 
        else:
            raise SpiderMisuseError('Method `load_proxylist` received '
                                    'invalid `source` argument: %s'
                                    % source) 

        self.proxylist_enabled = True
        self.proxy = None
        if not auto_change and auto_init:
            self.proxy = self.proxylist.get_random_proxy()
        self.proxy_auto_change = auto_change

开发者ID:gwynnbleidd1984，项目名称:grab，代码行数:25，代码来源:base.py

示例4: test_get_next_proxy

 def test_get_next_proxy(self):
     pl = ProxyList()
     path = self.generate_plist_file('foo:1\nbar:1')
     pl.load_file(path)
     self.assertEqual(pl.get_next_proxy().host, 'foo')
     self.assertEqual(pl.get_next_proxy().host, 'bar')
     self.assertEqual(pl.get_next_proxy().host, 'foo')
     pl.load_file(path)
     self.assertEqual(pl.get_next_proxy().host, 'foo')

开发者ID:Michael-F-Bryan，项目名称:grab，代码行数:9，代码来源:proxy.py

示例5: test_get_next_proxy

 def test_get_next_proxy(self):
     with temp_file() as path:
         plist = ProxyList()
         self.generate_plist_file(path, 'foo:1\nbar:1')
         plist.load_file(path)
         self.assertEqual(plist.get_next_proxy().host, 'foo')
         self.assertEqual(plist.get_next_proxy().host, 'bar')
         self.assertEqual(plist.get_next_proxy().host, 'foo')
         plist.load_file(path)
         self.assertEqual(plist.get_next_proxy().host, 'foo')

开发者ID:lorien，项目名称:grab，代码行数:10，代码来源:proxy.py

示例6: load_proxylist

    def load_proxylist(self, source, source_type, proxy_type='http',
                       auto_init=True, auto_change=True,
                       **kwargs):
        self.proxylist = ProxyList(source, source_type, proxy_type=proxy_type, **kwargs)

        self.proxylist_enabled = True
        self.proxy = None
        if not auto_change and auto_init:
            self.proxy = self.proxylist.get_random()
        self.proxy_auto_change = auto_change

开发者ID:Moonshard，项目名称:grab，代码行数:10，代码来源:base.py

示例7: init

    def __init__(self, document_body=None,
                 transport='pycurl', **kwargs):
        """
        Create Grab instance
        """

        self.meta = {}
        self._doc = None
        self.config = default_config()
        self.config['common_headers'] = self.common_headers()
        self.cookies = CookieManager()
        self.proxylist = ProxyList()
        self.setup_transport(transport)
        self.reset()
        if kwargs:
            self.setup(**kwargs)
        if document_body is not None:
            self.setup_document(document_body)

开发者ID:ixtel，项目名称:grab，代码行数:18，代码来源:base.py

示例8: load_proxylist

    def load_proxylist(self, source, source_type=None, proxy_type='http',
                       auto_init=True, auto_change=True):
        """
        Load proxy list.

        :param source: Proxy source.
            Accepts string (file path, url) or ``BaseProxySource`` instance.
        :param source_type: The type of the specified source.
            Should be one of the following: 'text_file' or 'url'.
        :param proxy_type:
            Should be one of the following: 'socks4', 'socks5' or'http'.
        :param auto_change:
            If set to `True` then automatical random proxy rotation
            will be used.


        Proxy source format should be one of the following (for each line):
            - ip:port
            - ip:port:login:password

        """
        self.proxylist = ProxyList()
        if isinstance(source, BaseProxySource):
            self.proxylist.set_source(source)
        elif isinstance(source, six.string_types):
            if source_type == 'text_file':
                self.proxylist.load_file(source, proxy_type=proxy_type)
            elif source_type == 'url':
                self.proxylist.load_url(source, proxy_type=proxy_type)
            else:
                raise SpiderMisuseError('Method `load_proxylist` received '
                                        'invalid `source_type` argument: %s'
                                        % source_type)
        else:
            raise SpiderMisuseError('Method `load_proxylist` received '
                                    'invalid `source` argument: %s'
                                    % source)

        self.proxylist_enabled = True
        self.proxy = None
        if not auto_change and auto_init:
            self.proxy = self.proxylist.get_random_proxy()
        self.proxy_auto_change = auto_change

开发者ID:lorien，项目名称:grab，代码行数:43，代码来源:base.py

示例9: test_basic

 def test_basic(self):
     plist = ProxyList()
     self.assertEqual(0, plist.size())

开发者ID:lorien，项目名称:grab，代码行数:3，代码来源:proxy.py

示例10: Spider


#.........这里部分代码省略.........
            queue.put(
                task, priority=task.priority, schedule_time=task.schedule_time
            )
            return True

    def stop(self):
        """
        This method set internal flag which signal spider
        to stop processing new task and shuts down.
        """
        self.work_allowed = False

    def load_proxylist(self, source, source_type=None, proxy_type='http',
                       auto_init=True, auto_change=True):
        """
        Load proxy list.

        :param source: Proxy source.
            Accepts string (file path, url) or ``BaseProxySource`` instance.
        :param source_type: The type of the specified source.
            Should be one of the following: 'text_file' or 'url'.
        :param proxy_type:
            Should be one of the following: 'socks4', 'socks5' or'http'.
        :param auto_change:
            If set to `True` then automatical random proxy rotation
            will be used.


        Proxy source format should be one of the following (for each line):
            - ip:port
            - ip:port:login:password

        """
        self.proxylist = ProxyList()
        if isinstance(source, BaseProxySource):
            self.proxylist.set_source(source)
        elif isinstance(source, six.string_types):
            if source_type == 'text_file':
                self.proxylist.load_file(source, proxy_type=proxy_type)
            elif source_type == 'url':
                self.proxylist.load_url(source, proxy_type=proxy_type)
            else:
                raise SpiderMisuseError('Method `load_proxylist` received '
                                        'invalid `source_type` argument: %s'
                                        % source_type)
        else:
            raise SpiderMisuseError('Method `load_proxylist` received '
                                    'invalid `source` argument: %s'
                                    % source)

        self.proxylist_enabled = True
        self.proxy = None
        if not auto_change and auto_init:
            self.proxy = self.proxylist.get_random_proxy()
        self.proxy_auto_change = auto_change

    def process_next_page(self, grab, task, xpath,
                          resolve_base=False, **kwargs):
        """
        Generate task for next page.

        :param grab: Grab instance
        :param task: Task object which should be assigned to next page url
        :param xpath: xpath expression which calculates list of URLS
        :param **kwargs: extra settings for new task object

开发者ID:lorien，项目名称:grab，代码行数:66，代码来源:base.py

示例11: test_file_proxy_source

 def test_file_proxy_source(self):
     with temp_file() as path:
         plist = ProxyList()
         self.generate_plist_file(path)
         plist.load_file(path)
         self.assertEqual(2, plist.size())

开发者ID:lorien，项目名称:grab，代码行数:6，代码来源:proxy.py

示例12: Grab

class Grab(DeprecatedThings):

    __slots__ = (
        'request_head', 'request_body',
        #'request_log',
        'proxylist', 'config',
        'transport',
        'transport_param', 'request_method', 'request_counter',
        '__weakref__', 'cookies',
        'meta', 'exception',

        # Dirty hack to make it possible to inherit Grab from
        # multiple base classes with __slots__
        '_doc',
    )

    # Attributes which should be processed when clone
    # of Grab instance is creating
    clonable_attributes = ('request_head', 'request_body',
                           #'request_log',
                           'proxylist')

    # Complex config items which points to mutable objects
    mutable_config_keys = copy(MUTABLE_CONFIG_KEYS)

    #
    # Public methods
    #

    def __init__(self, document_body=None,
                 transport=None, **kwargs):
        """
        Create Grab instance
        """

        self.meta = {}
        self._doc = None
        self.config = default_config()
        self.config['common_headers'] = self.common_headers()
        self.cookies = CookieManager()
        self.proxylist = ProxyList()
        self.exception = None

        # makes pylint happy
        self.request_counter = None
        self.request_head = None
        self.request_body = None
        self.request_method = None
        self.transport_param = transport
        self.transport = None

        self.reset()
        if kwargs:
            self.setup(**kwargs)
        if document_body is not None:
            self.setup_document(document_body)

    def _get_doc(self):
        if self._doc is None:
            self._doc = Document(self)
        return self._doc

    def _set_doc(self, obj):
        self._doc = obj

    doc = property(_get_doc, _set_doc)

    def setup_transport(self, transport_param, reset=False):
        if self.transport is not None and not reset:
            raise error.GrabMisuseError(
                'Transport is already set up. Use'
                ' setup_transport(..., reset=True) to explicitly setup'
                ' new transport')
        if transport_param is None:
            transport_param = DEFAULT_TRANSPORT
        if isinstance(transport_param, six.string_types):
            if transport_param in TRANSPORT_ALIAS:
                transport_param = TRANSPORT_ALIAS[transport_param]
            if '.' not in transport_param:
                raise error.GrabMisuseError('Unknown transport: %s'
                                            % transport_param)
            else:
                mod_path, cls_name = transport_param.rsplit('.', 1)
                try:
                    cls = TRANSPORT_CACHE[(mod_path, cls_name)]
                except KeyError:
                    mod = __import__(mod_path, globals(), locals(), ['foo'])
                    cls = getattr(mod, cls_name)
                    TRANSPORT_CACHE[(mod_path, cls_name)] = cls
                self.transport = cls()
        elif isinstance(transport_param, collections.Callable):
            self.transport = transport_param()
        else:
            raise error.GrabMisuseError('Option `transport` should be string '
                                        'or class or callable. Got %s'
                                        % type(transport_param))

    def reset(self):
        """
        Reset all attributes which could be modified during previous request
#.........这里部分代码省略.........

开发者ID:lorien，项目名称:grab，代码行数:101，代码来源:base.py

示例13: Grab

class Grab(DeprecatedThings):

    __slots__ = ('request_head', 'request_log', 'request_body',
                 'proxylist', 'config',
                 'transport',
                 'transport_param', 'request_method', 'request_counter',
                 '__weakref__', 'cookies',
                 'meta',

                 # Dirty hack to make it possible to inherit Grab from
                 # multiple base classes with __slots__
                 '_doc',
                 )

    # Attributes which should be processed when clone
    # of Grab instance is creating
    clonable_attributes = ('request_head', 'request_log', 'request_body',
                           'proxylist')

    # Complex config items which points to mutable objects
    mutable_config_keys = copy(MUTABLE_CONFIG_KEYS)

    """
    Public methods
    """

    def __init__(self, document_body=None,
                 transport='grab.transport.curl.CurlTransport', **kwargs):
        """
        Create Grab instance
        """

        self.meta = {}
        self._doc = None
        self.config = default_config()
        self.config['common_headers'] = self.common_headers()
        self.cookies = CookieManager()
        self.proxylist = ProxyList()
        self.setup_transport(transport)
        self.reset()
        if kwargs:
            self.setup(**kwargs)
        if document_body is not None:
            self.setup_document(document_body)

    def _get_doc(self):
        if self._doc is None:
            self._doc = Document(self)
        return self._doc

    def _set_doc(self, obj):
        self._doc = obj

    doc = property(_get_doc, _set_doc)

    def setup_transport(self, transport_param):
        self.transport_param = transport_param
        if isinstance(transport_param, six.string_types):
            mod_path, cls_name = transport_param.rsplit('.', 1)
            try:
                cls = TRANSPORT_CACHE[(mod_path, cls_name)]
            except KeyError:
                mod = __import__(mod_path, globals(), locals(), ['foo'])
                cls = getattr(mod, cls_name)
                TRANSPORT_CACHE[(mod_path, cls_name)] = cls
            self.transport = cls()
        elif isinstance(transport_param, collections.Callable):
            self.transport = transport_param()
        else:
            raise error.GrabMisuseError('Option `transport` should be string '
                                        'or callable. Got %s'
                                        % type(transport_param))

    def reset(self):
        """
        Reset all attributes which could be modified during previous request
        or which is not initialized yet if this is the new Grab instance.

        This methods is automatically called before each network request.
        """

        self.request_head = None
        self.request_log = None
        self.request_body = None
        self.request_method = None
        self.transport.reset()

    def clone(self, **kwargs):
        """
        Create clone of Grab instance.

        Cloned instance will have the same state: cookies, referrer, response
        document data

        :param **kwargs: overrides settings of cloned grab instance
        """

        g = Grab(transport=self.transport_param)
        g.config = self.dump_config()

#.........这里部分代码省略.........

开发者ID:noscripter，项目名称:grab，代码行数:101，代码来源:base.py

示例14: test_web_proxy_source

 def test_web_proxy_source(self):
     plist = ProxyList()
     self.server.response['data'] = DEFAULT_PLIST_DATA
     plist.load_url(self.server.get_url())
     self.assertEqual(2, plist.size())

开发者ID:lorien，项目名称:grab，代码行数:5，代码来源:proxy.py

示例15: Spider


#.........这里部分代码省略.........
                          % (task.name, task.url)
                    raise SpiderError(msg)
                else:
                    warn('Class attribute `Spider::base_url` is deprecated. '
                         'Use Task objects with absolute URLs')
                    task.url = urljoin(self.base_url, task.url)
                    # If task has grab_config object then update it too
                    if task.grab_config:
                        task.grab_config['url'] = task.url
        except Exception as ex:
            self.stat.collect('task-with-invalid-url', task.url)
            if raise_error:
                raise
            else:
                logger.error('', exc_info=ex)
                return False

        # TODO: keep original task priority if it was set explicitly
        self.task_queue.put(task, task.priority, schedule_time=task.schedule_time)
        return True

    def stop(self):
        """
        This method set internal flag which signal spider
        to stop processing new task and shuts down.
        """

        logger_verbose.debug('Method `stop` was called')
        self.work_allowed = False

    def load_proxylist(self, source, source_type=None, proxy_type='http',
                       auto_init=True, auto_change=True,
                       **kwargs):
        self.proxylist = ProxyList()
        if isinstance(source, BaseProxySource):
            self.proxylist.set_source(source)
        elif isinstance(source, six.string_types):
            if source_type == 'text_file':
                self.proxylist.load_file(source, proxy_type=proxy_type)
            elif source_type == 'url':
                self.proxylist.load_url(source, proxy_type=proxy_type)
            else:
                raise SpiderMisuseError('Method `load_proxylist` received '
                                        'invalid `source_type` argument: %s'
                                        % source_type) 
        else:
            raise SpiderMisuseError('Method `load_proxylist` received '
                                    'invalid `source` argument: %s'
                                    % source) 

        self.proxylist_enabled = True
        self.proxy = None
        if not auto_change and auto_init:
            self.proxy = self.proxylist.get_random_proxy()
        self.proxy_auto_change = auto_change

    def process_next_page(self, grab, task, xpath,
                          resolve_base=False, **kwargs):
        """
        Generate task for next page.

        :param grab: Grab instance
        :param task: Task object which should be assigned to next page url
        :param xpath: xpath expression which calculates list of URLS
        :param **kwargs: extra settings for new task object

开发者ID:Michael-F-Bryan，项目名称:grab，代码行数:66，代码来源:base.py

注：本文中的grab.proxylist.ProxyList类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。