当前位置: 首页>>代码示例>>Python>>正文


Python Request.new方法代码示例

本文整理汇总了Python中wpull.http.request.Request.new方法的典型用法代码示例。如果您正苦于以下问题:Python Request.new方法的具体用法?Python Request.new怎么用?Python Request.new使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在wpull.http.request.Request的用法示例。


在下文中一共展示了Request.new方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_html_detect

# 需要导入模块: from wpull.http.request import Request [as 别名]
# 或者: from wpull.http.request.Request import new [as 别名]
    def test_html_detect(self):
        self.assertTrue(HTMLReader.is_file(
            io.BytesIO('<html><body>hi</body></html>'.encode('utf-16le'))
        ))
        self.assertFalse(HTMLReader.is_file(
            io.BytesIO('hello world!'.encode('utf-16le'))
        ))
        self.assertTrue(HTMLReader.is_file(
            io.BytesIO(b'<title>hello</title>hi')
        ))
        self.assertTrue(HTMLReader.is_file(
            io.BytesIO(b'<html><body>hello')
        ))
        self.assertTrue(HTMLReader.is_file(
            io.BytesIO(
                b'The document has moved <a href="somewhere.html">here</a>'
            )
        ))
        self.assertTrue(
            HTMLReader.is_url(URLInfo.parse('example.com/index.htm'))
        )
        self.assertTrue(
            HTMLReader.is_url(URLInfo.parse('example.com/index.html'))
        )
        self.assertTrue(
            HTMLReader.is_url(URLInfo.parse('example.com/index.dhtm'))
        )
        self.assertTrue(
            HTMLReader.is_url(URLInfo.parse('example.com/index.xhtml'))
        )
        self.assertTrue(
            HTMLReader.is_url(URLInfo.parse('example.com/index.xht'))
        )
        self.assertFalse(
            HTMLReader.is_url(URLInfo.parse('example.com/image.jpg'))
        )
        self.assertTrue(
            HTMLReader.is_request(Request.new('example.com/index.html'))
        )
        self.assertFalse(
            HTMLReader.is_request(Request.new('example.com/image.jpg'))
        )

        response = Response('HTTP/1.0', '200', 'OK')
        response.fields['Content-Type'] = 'text/html'
        self.assertTrue(HTMLReader.is_response(response))

        response = Response('HTTP/1.0', '200', 'OK')
        response.fields['Content-Type'] = 'image/png'
        self.assertFalse(HTMLReader.is_response(response))
开发者ID:lowks,项目名称:wpull,代码行数:52,代码来源:document_test.py

示例2: test_fetch_disallow

# 需要导入模块: from wpull.http.request import Request [as 别名]
# 或者: from wpull.http.request.Request import new [as 别名]
    def test_fetch_disallow(self):
        http_client = MockHTTPClient()
        pool = RobotsTxtPool()
        client = RichClient(http_client, pool)
        session = MockRobotsTxtRichClientSession(
            client, Request.new('http://example.com')
        )

        self.assertEqual(RobotsState.unknown, session._robots_state)

        request = session.next_request
        self.assertTrue(request.url_info.url.endswith('robots.txt'))

        response = Response('HTTP/1.0', 200, 'OK')
        response.body.content_file = io.StringIO('User-agent:*\nDisallow: /\n')

        http_client.response = response
        yield session.fetch()

        self.assertEqual(RobotsState.denied, session._robots_state)

        request = session.next_request
        self.assertIsNone(request)

        try:
            yield session.fetch()
        except RobotsDenied:
            pass
        else:
            self.fail()

        self.assertTrue(session.done)
开发者ID:DanielOaks,项目名称:wpull,代码行数:34,代码来源:robotstxt_test.py

示例3: test_sitemap_scraper_xml

# 需要导入模块: from wpull.http.request import Request [as 别名]
# 或者: from wpull.http.request.Request import new [as 别名]
    def test_sitemap_scraper_xml(self):
        scraper = SitemapScraper()
        request = Request.new('http://example.com/sitemap.xml')
        response = Response('HTTP/1.0', 200, 'OK')

        with wpull.util.reset_file_offset(response.body.content_file):
            response.body.content_file.write(
                b'''<?xml version="1.0" encoding="UTF-8"?>
                <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
                   <url>
                      <loc>http://www.example.com/</loc>
                      <lastmod>2005-01-01</lastmod>
                      <changefreq>monthly</changefreq>
                      <priority>0.8</priority>
                   </url>
                </urlset>
            '''
            )

        scrape_info = scraper.scrape(request, response)
        inline_urls = scrape_info['inline_urls']
        linked_urls = scrape_info['linked_urls']

        self.assertEqual({
            'http://www.example.com/',
            },
            linked_urls
        )
        self.assertFalse(inline_urls)
开发者ID:lowks,项目名称:wpull,代码行数:31,代码来源:scraper_test.py

示例4: test_server_error

# 需要导入模块: from wpull.http.request import Request [as 别名]
# 或者: from wpull.http.request.Request import new [as 别名]
    def test_server_error(self):
        http_client = MockHTTPClient()
        pool = RobotsTxtPool()
        client = RichClient(http_client, pool)
        session = MockRobotsTxtRichClientSession(
            client, Request.new('http://example.com')
        )

        self.assertEqual(RobotsState.unknown, session._robots_state)

        for dummy in range(21):
            request = session.next_request
            self.assertTrue(request.url_info.url.endswith('robots.txt'))

            response = Response('HTTP/1.0', 500, 'Opps')

            http_client.response = response
            yield session.fetch()

        request = session.next_request
        self.assertIsNone(request)

        try:
            yield session.fetch()
        except RobotsDenied:
            pass
        else:
            self.fail()

        self.assertTrue(session.done)
开发者ID:imshashank,项目名称:data-mining,代码行数:32,代码来源:robotstxt_test.py

示例5: test_rss_as_html

# 需要导入模块: from wpull.http.request import Request [as 别名]
# 或者: from wpull.http.request.Request import new [as 别名]
    def test_rss_as_html(self):
        scraper = HTMLScraper()
        request = Request.new('http://example.com/')
        response = Response('HTTP/1.0', 200, '')
        response.fields['content-type'] = 'application/rss+xml'

        with wpull.util.reset_file_offset(response.body.content_file):
            html_file_path = os.path.join(os.path.dirname(__file__),
                                          'testing', 'samples', 'rss.xml')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body.content_file)

        scrape_info = scraper.scrape(request, response)

        self.assertTrue(scrape_info)
        inline_urls = scrape_info['inline_urls']
        linked_urls = scrape_info['linked_urls']
        self.assertFalse(
            inline_urls
        )
        self.assertEqual(
            {
                'http://www.someexamplerssdomain.com/main.html',
                'http://www.wikipedia.org/'
            },
            linked_urls
        )
开发者ID:lowks,项目名称:wpull,代码行数:29,代码来源:scraper_test.py

示例6: test_html_scraper_links_base_href

# 需要导入模块: from wpull.http.request import Request [as 别名]
# 或者: from wpull.http.request.Request import new [as 别名]
    def test_html_scraper_links_base_href(self):
        scraper = HTMLScraper()
        request = Request.new('http://example.com/')
        response = Response('HTTP/1.0', 200, 'OK')

        with wpull.util.reset_file_offset(response.body.content_file):
            html_file_path = os.path.join(os.path.dirname(__file__),
                                          'testing', 'samples',
                                          'basehref.html')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body.content_file)

        scrape_info = scraper.scrape(request, response)
        inline_urls = scrape_info['inline_urls']
        linked_urls = scrape_info['linked_urls']

        self.assertEqual('utf-8', scrape_info['encoding'])

        self.assertEqual({
            'http://cdn.example.com/stylesheet1.css',
            'http://www.example.com/stylesheet2.css',
            'http://example.com/a/stylesheet3.css',
            'http://example.com/a/dir/image1.png',
            'http://example.com/dir/image2.png',
            'http://example.net/image3.png',
            'http://example.com/dir/image4.png',
            },
            inline_urls
        )
        self.assertEqual({
            'http://example.com/a/'
            },
            linked_urls
        )
开发者ID:lowks,项目名称:wpull,代码行数:36,代码来源:scraper_test.py

示例7: test_xhtml_invalid

# 需要导入模块: from wpull.http.request import Request [as 别名]
# 或者: from wpull.http.request.Request import new [as 别名]
    def test_xhtml_invalid(self):
        scraper = HTMLScraper()
        request = Request.new('http://example.com/')
        response = Response('HTTP/1.0', 200, '')

        with wpull.util.reset_file_offset(response.body.content_file):
            html_file_path = os.path.join(os.path.dirname(__file__),
                                          'testing', 'samples',
                                          'xhtml_invalid.html')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body.content_file)

        scrape_info = scraper.scrape(request, response)
        inline_urls = scrape_info['inline_urls']
        linked_urls = scrape_info['linked_urls']

        self.assertEqual(
            {
                'http://example.com/image.png',
                'http://example.com/script.js',
            },
            inline_urls
        )
        self.assertEqual(
            {
                'http://example.com/link'
            },
            linked_urls
        )
开发者ID:lowks,项目名称:wpull,代码行数:31,代码来源:scraper_test.py

示例8: test_html_soup

# 需要导入模块: from wpull.http.request import Request [as 别名]
# 或者: from wpull.http.request.Request import new [as 别名]
    def test_html_soup(self):
        scraper = HTMLScraper()
        request = Request.new('http://example.com/')
        response = Response('HTTP/1.0', 200, '')
        response.fields['Refresh'] = 'yes'

        with wpull.util.reset_file_offset(response.body.content_file):
            html_file_path = os.path.join(os.path.dirname(__file__),
                                          'testing', 'samples', 'soup.html')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body.content_file)

        scrape_info = scraper.scrape(request, response)
        inline_urls = scrape_info['inline_urls']
        linked_urls = scrape_info['linked_urls']

        self.assertEqual(
            {'http://example.com/ABOUTM~1.JPG'},
            inline_urls
        )
        self.assertEqual(
            {
                'http://example.com/BLOG',
                'http://example.com/web ring/Join.htm',
            },
            linked_urls
        )
开发者ID:lowks,项目名称:wpull,代码行数:29,代码来源:scraper_test.py

示例9: test_html_krokozyabry

# 需要导入模块: from wpull.http.request import Request [as 别名]
# 或者: from wpull.http.request.Request import new [as 别名]
    def test_html_krokozyabry(self):
        scraper = HTMLScraper()
        request = Request.new('http://example.com/')
        response = Response('HTTP/1.0', 200, '')
        response.fields['content-type'] = 'text/html; charset=KOI8-R'

        with wpull.util.reset_file_offset(response.body.content_file):
            html_file_path = os.path.join(os.path.dirname(__file__),
                                          'testing', 'samples',
                                          'krokozyabry.html')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body.content_file)

        scrape_info = scraper.scrape(request, response)
        inline_urls = scrape_info['inline_urls']
        linked_urls = scrape_info['linked_urls']

        self.assertEqual('koi8-r', scrape_info['encoding'])

        self.assertEqual(
            set(),
            inline_urls
        )
        self.assertEqual(
            {'http://example.com/Кракозябры'},
            linked_urls
        )
开发者ID:lowks,项目名称:wpull,代码行数:29,代码来源:scraper_test.py

示例10: test_javascript_heavy_inline_monstrosity

# 需要导入模块: from wpull.http.request import Request [as 别名]
# 或者: from wpull.http.request.Request import new [as 别名]
    def test_javascript_heavy_inline_monstrosity(self):
        scraper = HTMLScraper()
        request = Request.new('http://example.com/')
        response = Response('HTTP/1.0', 200, 'OK')

        with wpull.util.reset_file_offset(response.body.content_file):
            html_file_path = os.path.join(os.path.dirname(__file__),
                                          'testing', 'samples',
                                          'twitchplayspokemonfirered.html')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body.content_file)

        scrape_info = scraper.scrape(request, response)
        inline_urls = scrape_info['inline_urls']
        linked_urls = scrape_info['linked_urls']

        self.assertIn(
            'http://cdn.bulbagarden.net/upload/archive/a/a4/'
            '20090718115357%21195Quagsire.png',
            inline_urls
        )
        self.assertIn(
            'http://www.google.com/url?q=http%3A%2F%2Fwww.reddit.com%2F'
            'user%2FGoldenSandslash15&sa=D&sntz=1&'
            'usg=AFQjCNElFBxZYdNm5mWoRSncf5tbdIJQ-A',
            linked_urls
        )
开发者ID:lowks,项目名称:wpull,代码行数:29,代码来源:scraper_test.py

示例11: test_redirect_loop

# 需要导入模块: from wpull.http.request import Request [as 别名]
# 或者: from wpull.http.request.Request import new [as 别名]
    def test_redirect_loop(self):
        http_client = MockHTTPClient()
        pool = RobotsTxtPool()
        client = RichClient(http_client, pool)
        session = MockRobotsTxtRichClientSession(
            client, Request.new('http://example.com')
        )

        self.assertEqual(RobotsState.unknown, session._robots_state)

        for dummy in range(21):
            request = session.next_request
            self.assertTrue(request.url_info.url.endswith('robots.txt'))

            response = Response('HTTP/1.0', 302, 'See else')
            response.url_info = request.url_info
            response.fields['location'] = '/robots.txt'

            http_client.response = response
            yield session.fetch()

        request = session.next_request
        self.assertTrue(request)

        response = Response('HTTP/1.0', 200, 'OK')

        http_client.response = response
        yield session.fetch()

        self.assertEqual(RobotsState.ok, session._robots_state)

        print(session.next_request)
        self.assertTrue(session.done)
开发者ID:imshashank,项目名称:data-mining,代码行数:35,代码来源:robotstxt_test.py

示例12: test_sitemap_scraper_xml_index

# 需要导入模块: from wpull.http.request import Request [as 别名]
# 或者: from wpull.http.request.Request import new [as 别名]
    def test_sitemap_scraper_xml_index(self):
        scraper = SitemapScraper()
        request = Request.new('http://example.com/sitemap.xml')
        response = Response('HTTP/1.0', 200, 'OK')

        with wpull.util.reset_file_offset(response.body.content_file):
            response.body.content_file.write(
                b'''<?xml version="1.0" encoding="UTF-8"?>
                <sitemapindex
                xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
                   <sitemap>
                      <loc>http://www.example.com/sitemap1.xml.gz</loc>
                      <lastmod>2004-10-01T18:23:17+00:00</lastmod>
                   </sitemap>
                </sitemapindex>
            '''
            )

        scrape_info = scraper.scrape(request, response)
        inline_urls = scrape_info['inline_urls']
        linked_urls = scrape_info['linked_urls']

        self.assertEqual({
            'http://www.example.com/sitemap1.xml.gz',
            },
            linked_urls
        )
        self.assertFalse(inline_urls)
开发者ID:lowks,项目名称:wpull,代码行数:30,代码来源:scraper_test.py

示例13: test_request

# 需要导入模块: from wpull.http.request import Request [as 别名]
# 或者: from wpull.http.request.Request import new [as 别名]
 def test_request(self):
     request = Request.new('http://example.com/robots.txt')
     self.assertEqual(
         (b'GET /robots.txt HTTP/1.1\r\n'
         b'Host: example.com\r\n'
         b'\r\n'),
         request.header()
     )
开发者ID:DanielOaks,项目名称:wpull,代码行数:10,代码来源:http_test.py

示例14: test_connection_reuse

# 需要导入模块: from wpull.http.request import Request [as 别名]
# 或者: from wpull.http.request.Request import new [as 别名]
 def test_connection_reuse(self):
     connection = Connection('localhost', self._port)
     request = Request.new(self.get_url('/'))
     request.version = 'HTTP/1.0'
     response = yield connection.fetch(request)
     self.assertEqual(200, response.status_code)
     response = yield connection.fetch(request)
     self.assertEqual(200, response.status_code)
开发者ID:DanielOaks,项目名称:wpull,代码行数:10,代码来源:http_test.py

示例15: test_ssl_fail

# 需要导入模块: from wpull.http.request import Request [as 别名]
# 或者: from wpull.http.request.Request import new [as 别名]
 def test_ssl_fail(self):
     connection = Connection('localhost', self.get_http_port())
     try:
         yield connection.fetch(Request.new(self.get_url('/')))
     except SSLVerficationError:
         pass
     else:
         self.fail()
开发者ID:DanielOaks,项目名称:wpull,代码行数:10,代码来源:http_test.py


注:本文中的wpull.http.request.Request.new方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。