当前位置: 首页>>代码示例>>Python>>正文


Python Fetcher.sync_fetch方法代码示例

本文整理汇总了Python中pyspider.fetcher.tornado_fetcher.Fetcher.sync_fetch方法的典型用法代码示例。如果您正苦于以下问题:Python Fetcher.sync_fetch方法的具体用法?Python Fetcher.sync_fetch怎么用?Python Fetcher.sync_fetch使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyspider.fetcher.tornado_fetcher.Fetcher的用法示例。


在下文中一共展示了Fetcher.sync_fetch方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: TestFetcher

# 需要导入模块: from pyspider.fetcher.tornado_fetcher import Fetcher [as 别名]
# 或者: from pyspider.fetcher.tornado_fetcher.Fetcher import sync_fetch [as 别名]
class TestFetcher(unittest.TestCase):
    sample_task_http = {
        'taskid': 'taskid',
        'project': 'project',
        'url': '',
        'fetch': {
            'method': 'GET',
            'headers': {
                'Cookie': 'a=b',
                'a': 'b'
            },
            'cookies': {
                'c': 'd',
            },
            'timeout': 60,
            'save': 'abc',
        },
        'process': {
            'callback': 'callback',
            'save': [1, 2, 3],
        },
    }

    @classmethod
    def setUpClass(self):
        import tests.data_test_webpage
        import httpbin

        self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run, port=14887, passthrough_errors=False)
        self.httpbin = 'http://127.0.0.1:14887'

        self.inqueue = Queue(10)
        self.outqueue = Queue(10)
        self.fetcher = Fetcher(self.inqueue, self.outqueue)
        self.fetcher.phantomjs_proxy = '127.0.0.1:25555'
        self.rpc = xmlrpc_client.ServerProxy('http://localhost:%d' % 24444)
        self.xmlrpc_thread = utils.run_in_thread(self.fetcher.xmlrpc_run, port=24444)
        self.thread = utils.run_in_thread(self.fetcher.run)
        self.proxy_thread = subprocess.Popen(['pyproxy', '--username=binux',
                                              '--password=123456', '--port=14830',
                                              '--debug'], close_fds=True)
        self.proxy = '127.0.0.1:14830'
        try:
            self.phantomjs = subprocess.Popen(['phantomjs',
                os.path.join(os.path.dirname(__file__),
                    '../pyspider/fetcher/phantomjs_fetcher.js'),
                '25555'])
        except OSError:
            self.phantomjs = None
        time.sleep(0.5)

    @classmethod
    def tearDownClass(self):
        self.proxy_thread.terminate()
        self.proxy_thread.wait()
        self.httpbin_thread.terminate()
        self.httpbin_thread.join()

        if self.phantomjs:
            self.phantomjs.kill()
            self.phantomjs.wait()
        self.rpc._quit()
        self.thread.join()

        assert not utils.check_port_open(5000)
        assert not utils.check_port_open(23333)
        assert not utils.check_port_open(24444)
        assert not utils.check_port_open(25555)
        assert not utils.check_port_open(14887)

        time.sleep(1)

    def test_10_http_get(self):
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = self.httpbin+'/get'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200, result)
        self.assertEqual(response.orig_url, request['url'])
        self.assertEqual(response.save, request['fetch']['save'])
        self.assertIsNotNone(response.json, response.content)
        self.assertEqual(response.json['headers'].get('A'), 'b', response.json)
        self.assertIn('c=d', response.json['headers'].get('Cookie'), response.json)
        self.assertIn('a=b', response.json['headers'].get('Cookie'), response.json)

    def test_15_http_post(self):
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = self.httpbin+'/post'
        request['fetch']['method'] = 'POST'
        request['fetch']['data'] = 'binux'
        request['fetch']['cookies'] = {'c': 'd'}
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.orig_url, request['url'])
        self.assertEqual(response.save, request['fetch']['save'])
        self.assertIsNotNone(response.json, response.content)

#.........这里部分代码省略.........
开发者ID:eromoe,项目名称:pyspider,代码行数:103,代码来源:test_fetcher.py

示例2: TestSplashFetcher

# 需要导入模块: from pyspider.fetcher.tornado_fetcher import Fetcher [as 别名]
# 或者: from pyspider.fetcher.tornado_fetcher.Fetcher import sync_fetch [as 别名]
class TestSplashFetcher(unittest.TestCase):
    @property
    def sample_task_http(self):
        return {
            'taskid': 'taskid',
            'project': 'project',
            'url': '',
            'fetch': {
                'method': 'GET',
                'headers': {
                    'Cookie': 'a=b',
                    'a': 'b'
                },
                'cookies': {
                    'c': 'd',
                },
                'timeout': 60,
                'save': 'abc',
            },
            'process': {
                'callback': 'callback',
                'save': [1, 2, 3],
            },
        }

    @classmethod
    def setUpClass(self):
        import tests.data_test_webpage
        import httpbin

        self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run, host='0.0.0.0', port=14887, passthrough_errors=False)
        self.httpbin = 'http://' + socket.gethostbyname(socket.gethostname()) + ':14887'

        self.inqueue = Queue(10)
        self.outqueue = Queue(10)
        self.fetcher = Fetcher(self.inqueue, self.outqueue)
        self.fetcher.splash_endpoint = 'http://127.0.0.1:8050/execute'
        self.rpc = xmlrpc_client.ServerProxy('http://localhost:%d' % 24444)
        self.xmlrpc_thread = utils.run_in_thread(self.fetcher.xmlrpc_run, port=24444)
        self.thread = utils.run_in_thread(self.fetcher.run)
        self.proxy_thread = subprocess.Popen(['pyproxy', '--username=binux',
                                              '--password=123456', '--port=14830',
                                              '--debug'], close_fds=True)
        self.proxy = '127.0.0.1:14830'
        
    @classmethod
    def tearDownClass(self):
        self.proxy_thread.terminate()
        self.proxy_thread.wait()
        self.httpbin_thread.terminate()
        self.httpbin_thread.join()

        self.rpc._quit()
        self.thread.join()

        assert not utils.check_port_open(5000)
        assert not utils.check_port_open(23333)
        assert not utils.check_port_open(24444)
        assert not utils.check_port_open(25555)
        assert not utils.check_port_open(14887)

        time.sleep(1)

    def test_69_no_splash(self):
        splash_endpoint = self.fetcher.splash_endpoint
        self.fetcher.splash_endpoint = None

        request = self.sample_task_http
        request['url'] = self.httpbin + '/get'
        request['fetch']['fetch_type'] = 'splash'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 501, result)

        self.fetcher.splash_endpoint = splash_endpoint

    def test_70_splash_url(self):
        request = self.sample_task_http
        request['url'] = self.httpbin + '/get'
        request['fetch']['fetch_type'] = 'splash'
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200, result)
        self.assertEqual(response.orig_url, request['url'])
        self.assertEqual(response.save, request['fetch']['save'])
        data = json.loads(response.doc('pre').text())
        self.assertIsNotNone(data, response.content)
        self.assertEqual(data['headers'].get('A'), 'b', response.json)
        self.assertEqual(data['headers'].get('Cookie'), 'c=d', response.json)

    def test_75_splash_robots(self):
        request = self.sample_task_http
        request['url'] = self.httpbin + '/deny'
        request['fetch']['fetch_type'] = 'splash'
        request['fetch']['robots_txt'] = True
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

#.........这里部分代码省略.........
开发者ID:eromoe,项目名称:pyspider,代码行数:103,代码来源:test_fetcher.py

示例3: TestFetcher

# 需要导入模块: from pyspider.fetcher.tornado_fetcher import Fetcher [as 别名]
# 或者: from pyspider.fetcher.tornado_fetcher.Fetcher import sync_fetch [as 别名]
class TestFetcher(unittest.TestCase):
    sample_task_http = {
        'taskid': 'taskid',
        'project': 'project',
        'url': 'http://echo.opera.com/',
        'fetch': {
            'method': 'GET',
            'headers': {
                'Cookie': 'a=b',
                'a': 'b'
            },
            'cookies': {
                'c': 'd',
            },
            'timeout': 60,
            'save': 'abc',
        },
        'process': {
            'callback': 'callback',
            'save': [1, 2, 3],
        },
    }

    @classmethod
    def setUpClass(self):
        self.inqueue = Queue(10)
        self.outqueue = Queue(10)
        self.fetcher = Fetcher(self.inqueue, self.outqueue)
        self.fetcher.phantomjs_proxy = '127.0.0.1:25555'
        self.rpc = xmlrpc_client.ServerProxy('http://localhost:%d' % 24444)
        self.xmlrpc_thread = utils.run_in_thread(self.fetcher.xmlrpc_run, port=24444)
        self.thread = utils.run_in_thread(self.fetcher.run)
        try:
            self.phantomjs = subprocess.Popen(['phantomjs',
                os.path.join(os.path.dirname(__file__),
                    '../pyspider/fetcher/phantomjs_fetcher.js'),
                '25555'])
        except OSError:
            self.phantomjs = None

    @classmethod
    def tearDownClass(self):
        if self.phantomjs:
            self.phantomjs.kill()
            self.phantomjs.wait()
        self.rpc._quit()
        self.thread.join()
        time.sleep(1)

    def test_10_http_get(self):
        result = self.fetcher.sync_fetch(self.sample_task_http)
        self.assertEqual(result['status_code'], 200)
        self.assertEqual(result['orig_url'], self.sample_task_http['url'])
        self.assertEqual(result['save'], self.sample_task_http['fetch']['save'])
        self.assertIn('content', result)

        content = result['content']
        self.assertIn(b'<b>A:', content)
        self.assertIn(b'<b>Cookie:</b>', content)
        self.assertIn(b'c=d</td>', content)

    def test_10_http_post(self):
        request = copy.deepcopy(self.sample_task_http)
        request['fetch']['method'] = 'POST'
        request['fetch']['data'] = 'binux'
        request['fetch']['cookies'] = {'c': 'd'}
        result = self.fetcher.sync_fetch(request)
        self.assertEqual(result['status_code'], 200)
        self.assertEqual(result['orig_url'], self.sample_task_http['url'])
        self.assertEqual(result['save'], self.sample_task_http['fetch']['save'])
        self.assertIn('content', result)

        content = result['content']
        self.assertIn(b'<h2>POST', content)
        self.assertIn(b'A:', content)
        self.assertIn(b'Cookie:', content)
        # FIXME: cookies in headers not supported
        self.assertNotIn(b'a=b', content)
        self.assertIn(b'c=d', content)
        self.assertIn(b'binux', content)

    def test_20_dataurl_get(self):
        data = copy.deepcopy(self.sample_task_http)
        data['url'] = 'data:,hello'
        result = self.fetcher.sync_fetch(data)
        self.assertEqual(result['status_code'], 200)
        self.assertIn('content', result)
        self.assertEqual(result['content'], 'hello')

    def test_30_with_queue(self):
        data = copy.deepcopy(self.sample_task_http)
        data['url'] = 'data:,hello'
        self.inqueue.put(data)
        task, result = self.outqueue.get()
        self.assertEqual(result['status_code'], 200)
        self.assertIn('content', result)
        self.assertEqual(result['content'], 'hello')

    def test_40_with_rpc(self):
        data = copy.deepcopy(self.sample_task_http)
#.........这里部分代码省略.........
开发者ID:0xa-cc,项目名称:pyspider,代码行数:103,代码来源:test_fetcher.py

示例4: TestFetcher

# 需要导入模块: from pyspider.fetcher.tornado_fetcher import Fetcher [as 别名]
# 或者: from pyspider.fetcher.tornado_fetcher.Fetcher import sync_fetch [as 别名]
class TestFetcher(unittest.TestCase):
    sample_task_http = {
            'taskid': 'taskid',
            'project': 'project',
            'url': 'http://echo.opera.com/',
            'fetch': {
                'method': 'GET',
                'headers': {
                    'Cookie': 'a=b', 
                    'a': 'b'
                    },
                'timeout': 60,
                'save': 'abc',
                },
            'process': {
                'callback': 'callback',
                'save': [1, 2, 3],
                },
            }
    @classmethod
    def setUpClass(self):
        self.inqueue = Queue(10)
        self.outqueue = Queue(10)
        self.fetcher = Fetcher(self.inqueue, self.outqueue)
        self.rpc = xmlrpclib.ServerProxy('http://localhost:%d' % 24444)
        self.xmlrpc_thread = utils.run_in_thread(self.fetcher.xmlrpc_run, port=24444)
        self.thread = utils.run_in_thread(self.fetcher.run)

    @classmethod
    def tearDownClass(self):
        self.rpc._quit()
        self.thread.join()

    def test_10_http_get(self):
        result = self.fetcher.sync_fetch(self.sample_task_http)
        self.assertEqual(result['status_code'], 200)
        self.assertEqual(result['orig_url'], self.sample_task_http['url'])
        self.assertEqual(result['save'], self.sample_task_http['fetch']['save'])
        self.assertIn('content', result)

        content = result['content']
        self.assertIn('..A:', content)
        self.assertIn('..Cookie:', content)
        self.assertIn('a=b', content)

    def test_10_http_post(self):
        request = dict(self.sample_task_http)
        request['fetch']['method'] = 'POST'
        request['fetch']['data'] = 'binux'
        request['fetch']['cookies'] = {'c': 'd'}
        result = self.fetcher.sync_fetch(request)
        self.assertEqual(result['status_code'], 200)
        self.assertEqual(result['orig_url'], self.sample_task_http['url'])
        self.assertEqual(result['save'], self.sample_task_http['fetch']['save'])
        self.assertIn('content', result)

        content = result['content']
        self.assertIn('<h2>POST', content)
        self.assertIn('..A:', content)
        self.assertIn('..Cookie:', content)
        # FIXME: cookies in headers not supported
        self.assertNotIn('a=b', content)
        self.assertIn('c=d', content)
        self.assertIn('binux', content)

    def test_20_dataurl_get(self):
        data = dict(self.sample_task_http)
        data['url'] = 'data:,hello';
        result = self.fetcher.sync_fetch(data)
        self.assertEqual(result['status_code'], 200)
        self.assertIn('content', result)
        self.assertEqual(result['content'], 'hello')

    def test_30_with_queue(self):
        data = dict(self.sample_task_http)
        data['url'] = 'data:,hello';
        self.inqueue.put(data)
        task, result = self.outqueue.get()
        self.assertEqual(result['status_code'], 200)
        self.assertIn('content', result)
        self.assertEqual(result['content'], 'hello')

    def test_40_with_rpc(self):
        data = dict(self.sample_task_http)
        data['url'] = 'data:,hello';
        result = pickle.loads(self.rpc.fetch(data).data)
        self.assertEqual(result['status_code'], 200)
        self.assertIn('content', result)
        self.assertEqual(result['content'], 'hello')
开发者ID:BCriswell,项目名称:pyspider,代码行数:91,代码来源:test_fetcher.py

示例5: TestFetcher

# 需要导入模块: from pyspider.fetcher.tornado_fetcher import Fetcher [as 别名]
# 或者: from pyspider.fetcher.tornado_fetcher.Fetcher import sync_fetch [as 别名]
class TestFetcher(unittest.TestCase):
    sample_task_http = {
        "taskid": "taskid",
        "project": "project",
        "url": "http://echo.opera.com/",
        "fetch": {"method": "GET", "headers": {"Cookie": "a=b", "a": "b"}, "timeout": 60, "save": "abc"},
        "process": {"callback": "callback", "save": [1, 2, 3]},
    }

    @classmethod
    def setUpClass(self):
        self.inqueue = Queue(10)
        self.outqueue = Queue(10)
        self.fetcher = Fetcher(self.inqueue, self.outqueue)
        self.rpc = xmlrpclib.ServerProxy("http://localhost:%d" % 24444)
        self.xmlrpc_thread = utils.run_in_thread(self.fetcher.xmlrpc_run, port=24444)
        self.thread = utils.run_in_thread(self.fetcher.run)

    @classmethod
    def tearDownClass(self):
        self.rpc._quit()
        self.thread.join()
        time.sleep(1)

    def test_10_http_get(self):
        result = self.fetcher.sync_fetch(self.sample_task_http)
        self.assertEqual(result["status_code"], 200)
        self.assertEqual(result["orig_url"], self.sample_task_http["url"])
        self.assertEqual(result["save"], self.sample_task_http["fetch"]["save"])
        self.assertIn("content", result)

        content = result["content"]
        self.assertIn("..A:", content)
        self.assertIn("..Cookie:", content)
        self.assertIn("a=b", content)

    def test_10_http_post(self):
        request = dict(self.sample_task_http)
        request["fetch"]["method"] = "POST"
        request["fetch"]["data"] = "binux"
        request["fetch"]["cookies"] = {"c": "d"}
        result = self.fetcher.sync_fetch(request)
        self.assertEqual(result["status_code"], 200)
        self.assertEqual(result["orig_url"], self.sample_task_http["url"])
        self.assertEqual(result["save"], self.sample_task_http["fetch"]["save"])
        self.assertIn("content", result)

        content = result["content"]
        self.assertIn("<h2>POST", content)
        self.assertIn("..A:", content)
        self.assertIn("..Cookie:", content)
        # FIXME: cookies in headers not supported
        self.assertNotIn("a=b", content)
        self.assertIn("c=d", content)
        self.assertIn("binux", content)

    def test_20_dataurl_get(self):
        data = dict(self.sample_task_http)
        data["url"] = "data:,hello"
        result = self.fetcher.sync_fetch(data)
        self.assertEqual(result["status_code"], 200)
        self.assertIn("content", result)
        self.assertEqual(result["content"], "hello")

    def test_30_with_queue(self):
        data = dict(self.sample_task_http)
        data["url"] = "data:,hello"
        self.inqueue.put(data)
        task, result = self.outqueue.get()
        self.assertEqual(result["status_code"], 200)
        self.assertIn("content", result)
        self.assertEqual(result["content"], "hello")

    def test_40_with_rpc(self):
        data = dict(self.sample_task_http)
        data["url"] = "data:,hello"
        result = umsgpack.unpackb(self.rpc.fetch(data).data)
        self.assertEqual(result["status_code"], 200)
        self.assertIn("content", result)
        self.assertEqual(result["content"], "hello")

    def test_50_base64_data(self):
        request = dict(self.sample_task_http)
        request["fetch"]["method"] = "POST"
        request["fetch"]["data"] = "[BASE64-DATA]1tDOxA==[/BASE64-DATA]"
        self.inqueue.put(request)
        task, result = self.outqueue.get()
        self.assertEqual(result["status_code"], 200)
        self.assertIn(" d6 ", result["content"])
        self.assertIn(" d0 ", result["content"])
        self.assertIn(" ce ", result["content"])
        self.assertIn(" c4 ", result["content"])
开发者ID:hemengsi123,项目名称:pyspider,代码行数:94,代码来源:test_fetcher.py

示例6: TestFetcher

# 需要导入模块: from pyspider.fetcher.tornado_fetcher import Fetcher [as 别名]
# 或者: from pyspider.fetcher.tornado_fetcher.Fetcher import sync_fetch [as 别名]
class TestFetcher(unittest.TestCase):
    sample_task_http = {
        'taskid': 'taskid',
        'project': 'project',
        'url': 'http://echo.opera.com/',
        'fetch': {
            'method': 'GET',
            'headers': {
                'Cookie': 'a=b',
                'a': 'b'
            },
            'cookies': {
                'c': 'd',
            },
            'timeout': 60,
            'save': 'abc',
        },
        'process': {
            'callback': 'callback',
            'save': [1, 2, 3],
        },
    }

    @classmethod
    def setUpClass(self):
        self.inqueue = Queue(10)
        self.outqueue = Queue(10)
        self.fetcher = Fetcher(self.inqueue, self.outqueue)
        self.fetcher.phantomjs_proxy = 'localhost:25555'
        self.rpc = xmlrpclib.ServerProxy('http://localhost:%d' % 24444)
        self.xmlrpc_thread = utils.run_in_thread(self.fetcher.xmlrpc_run, port=24444)
        self.thread = utils.run_in_thread(self.fetcher.run)
        self.phantomjs = subprocess.Popen(['phantomjs',
            os.path.join(os.path.dirname(__file__),
                '../pyspider/fetcher/phantomjs_fetcher.js'),
            '25555'])

    @classmethod
    def tearDownClass(self):
        self.phantomjs.kill()
        self.phantomjs.wait()
        self.rpc._quit()
        self.thread.join()
        time.sleep(1)

    def test_10_http_get(self):
        result = self.fetcher.sync_fetch(self.sample_task_http)
        self.assertEqual(result['status_code'], 200)
        self.assertEqual(result['orig_url'], self.sample_task_http['url'])
        self.assertEqual(result['save'], self.sample_task_http['fetch']['save'])
        self.assertIn('content', result)

        content = result['content']
        self.assertIn('<b>A:', content)
        self.assertIn('<b>Cookie:</b>', content)
        self.assertIn('c=d</td>', content)

    def test_10_http_post(self):
        request = copy.deepcopy(self.sample_task_http)
        request['fetch']['method'] = 'POST'
        request['fetch']['data'] = 'binux'
        request['fetch']['cookies'] = {'c': 'd'}
        result = self.fetcher.sync_fetch(request)
        self.assertEqual(result['status_code'], 200)
        self.assertEqual(result['orig_url'], self.sample_task_http['url'])
        self.assertEqual(result['save'], self.sample_task_http['fetch']['save'])
        self.assertIn('content', result)

        content = result['content']
        self.assertIn('<h2>POST', content)
        self.assertIn('..A:', content)
        self.assertIn('..Cookie:', content)
        # FIXME: cookies in headers not supported
        self.assertNotIn('a=b', content)
        self.assertIn('c=d', content)
        self.assertIn('binux', content)

    def test_20_dataurl_get(self):
        data = copy.deepcopy(self.sample_task_http)
        data['url'] = 'data:,hello'
        result = self.fetcher.sync_fetch(data)
        self.assertEqual(result['status_code'], 200)
        self.assertIn('content', result)
        self.assertEqual(result['content'], 'hello')

    def test_30_with_queue(self):
        data = copy.deepcopy(self.sample_task_http)
        data['url'] = 'data:,hello'
        self.inqueue.put(data)
        task, result = self.outqueue.get()
        self.assertEqual(result['status_code'], 200)
        self.assertIn('content', result)
        self.assertEqual(result['content'], 'hello')

    def test_40_with_rpc(self):
        data = copy.deepcopy(self.sample_task_http)
        data['url'] = 'data:,hello'
        result = umsgpack.unpackb(self.rpc.fetch(data).data)
        self.assertEqual(result['status_code'], 200)
        self.assertIn('content', result)
#.........这里部分代码省略.........
开发者ID:aleemb,项目名称:pyspider,代码行数:103,代码来源:test_fetcher.py

示例7: TestFetcher

# 需要导入模块: from pyspider.fetcher.tornado_fetcher import Fetcher [as 别名]
# 或者: from pyspider.fetcher.tornado_fetcher.Fetcher import sync_fetch [as 别名]
class TestFetcher(unittest.TestCase):
    sample_task_http = {
        "taskid": "taskid",
        "project": "project",
        "url": "",
        "fetch": {
            "method": "GET",
            "headers": {"Cookie": "a=b", "a": "b"},
            "cookies": {"c": "d"},
            "timeout": 60,
            "save": "abc",
        },
        "process": {"callback": "callback", "save": [1, 2, 3]},
    }

    @classmethod
    def setUpClass(self):
        import tests.data_test_webpage
        import httpbin

        self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run, port=14887)
        self.httpbin = "http://127.0.0.1:14887"

        self.inqueue = Queue(10)
        self.outqueue = Queue(10)
        self.fetcher = Fetcher(self.inqueue, self.outqueue)
        self.fetcher.phantomjs_proxy = "127.0.0.1:25555"
        self.rpc = xmlrpc_client.ServerProxy("http://localhost:%d" % 24444)
        self.xmlrpc_thread = utils.run_in_thread(self.fetcher.xmlrpc_run, port=24444)
        self.thread = utils.run_in_thread(self.fetcher.run)
        self.proxy_thread = subprocess.Popen(
            ["pyproxy", "--username=binux", "--password=123456", "--port=14830", "--debug"], close_fds=True
        )
        self.proxy = "127.0.0.1:14830"
        try:
            self.phantomjs = subprocess.Popen(
                [
                    "phantomjs",
                    os.path.join(os.path.dirname(__file__), "../pyspider/fetcher/phantomjs_fetcher.js"),
                    "25555",
                ]
            )
        except OSError:
            self.phantomjs = None
        time.sleep(0.5)

    @classmethod
    def tearDownClass(self):
        self.proxy_thread.terminate()
        self.proxy_thread.wait()
        self.httpbin_thread.terminate()
        self.httpbin_thread.join()

        if self.phantomjs:
            self.phantomjs.kill()
            self.phantomjs.wait()
        self.rpc._quit()
        self.thread.join()

        assert not utils.check_port_open(5000)
        assert not utils.check_port_open(23333)
        assert not utils.check_port_open(24444)
        assert not utils.check_port_open(25555)
        assert not utils.check_port_open(14887)

        time.sleep(1)

    def test_10_http_get(self):
        request = copy.deepcopy(self.sample_task_http)
        request["url"] = self.httpbin + "/get"
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200, result)
        self.assertEqual(response.orig_url, request["url"])
        self.assertEqual(response.save, request["fetch"]["save"])
        self.assertIsNotNone(response.json, response.content)
        self.assertEqual(response.json["headers"].get("A"), "b", response.json)
        self.assertIn("c=d", response.json["headers"].get("Cookie"), response.json)
        self.assertIn("a=b", response.json["headers"].get("Cookie"), response.json)

    def test_15_http_post(self):
        request = copy.deepcopy(self.sample_task_http)
        request["url"] = self.httpbin + "/post"
        request["fetch"]["method"] = "POST"
        request["fetch"]["data"] = "binux"
        request["fetch"]["cookies"] = {"c": "d"}
        result = self.fetcher.sync_fetch(request)
        response = rebuild_response(result)

        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.orig_url, request["url"])
        self.assertEqual(response.save, request["fetch"]["save"])
        self.assertIsNotNone(response.json, response.content)

        self.assertEqual(response.json["form"].get("binux"), "")
        self.assertEqual(response.json["headers"].get("A"), "b", response.json)
        self.assertIn("c=d", response.json["headers"].get("Cookie"), response.json)
        self.assertIn("a=b", response.json["headers"].get("Cookie"), response.json)

#.........这里部分代码省略.........
开发者ID:appleboy1977,项目名称:pyspider,代码行数:103,代码来源:test_fetcher.py


注:本文中的pyspider.fetcher.tornado_fetcher.Fetcher.sync_fetch方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。