本文整理汇总了Python中wpull.http.request.Request.address方法的典型用法代码示例。如果您正苦于以下问题:Python Request.address方法的具体用法?Python Request.address怎么用?Python Request.address使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类wpull.http.request.Request
的用法示例。
在下文中一共展示了Request.address方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_warc_recorder_rollback
# 需要导入模块: from wpull.http.request import Request [as 别名]
# 或者: from wpull.http.request.Request import address [as 别名]
def test_warc_recorder_rollback(self):
warc_filename = 'asdf.warc'
warc_prefix = 'asdf'
with open(warc_filename, 'wb') as warc_file:
warc_file.write(b'a' * 10)
warc_recorder = WARCRecorder(
warc_prefix,
params=WARCRecorderParams(
compress=False,
)
)
request = HTTPRequest('http://example.com/')
request.address = ('0.0.0.0', 80)
response = HTTPResponse(200, 'OK')
response.body = Body()
with wpull.util.reset_file_offset(response.body):
response.body.write(b'KITTEH DOGE')
with warc_recorder.session() as session:
session.pre_request(request)
session.request_data(request.to_bytes())
class BadRecord(WARCRecord):
def __init__(self, original_record):
super().__init__()
self.block_file = original_record.block_file
self.fields = original_record.fields
def __iter__(self):
for dummy in range(1000):
yield b"where's my elephant?"
raise OSError('Oops')
session._child_session._request_record = \
BadRecord(session._child_session._request_record)
original_offset = os.path.getsize(warc_filename)
with self.assertRaises((OSError, IOError)):
session.request(request)
new_offset = os.path.getsize(warc_filename)
self.assertEqual(new_offset, original_offset)
self.assertFalse(os.path.exists(warc_filename + '-wpullinc'))
_logger.debug('original offset {0}'.format(original_offset))
示例2: test_warc_max_size_and_append
# 需要导入模块: from wpull.http.request import Request [as 别名]
# 或者: from wpull.http.request.Request import address [as 别名]
def test_warc_max_size_and_append(self):
file_prefix = 'asdf'
with open('asdf-00000.warc', 'w'):
pass
with open('asdf-00001.warc', 'w'):
pass
warc_recorder = WARCRecorder(
file_prefix,
params=WARCRecorderParams(
compress=False,
max_size=1,
appending=True
),
)
request = HTTPRequest('http://example.com/1')
request.address = ('0.0.0.0', 80)
response = HTTPResponse(200, 'OK')
response.body = Body()
with wpull.util.reset_file_offset(response.body):
response.body.write(b'BLAH')
with warc_recorder.session() as session:
session.pre_request(request)
session.request_data(request.to_bytes())
session.request(request)
session.pre_response(response)
session.response_data(response.to_bytes())
session.response_data(response.body.content())
session.response(response)
warc_recorder.close()
self.assertTrue(os.path.exists('asdf-00000.warc'))
self.assertTrue(os.path.exists('asdf-00001.warc'))
self.assertTrue(os.path.exists('asdf-00002.warc'))
self.assertTrue(os.path.exists('asdf-00003.warc'))
self.assertTrue(os.path.exists('asdf-meta.warc'))
self.assertEqual(0, os.path.getsize('asdf-00000.warc'))
self.assertEqual(0, os.path.getsize('asdf-00001.warc'))
self.assertNotEqual(0, os.path.getsize('asdf-00002.warc'))
self.assertNotEqual(0, os.path.getsize('asdf-00003.warc'))
self.assertNotEqual(0, os.path.getsize('asdf-meta.warc'))
示例3: test_warc_recorder_journal
# 需要导入模块: from wpull.http.request import Request [as 别名]
# 或者: from wpull.http.request.Request import address [as 别名]
def test_warc_recorder_journal(self):
warc_filename = 'asdf.warc'
warc_prefix = 'asdf'
warc_recorder = WARCRecorder(
warc_prefix,
params=WARCRecorderParams(
compress=False,
)
)
request = HTTPRequest('http://example.com/')
request.address = ('0.0.0.0', 80)
response = HTTPResponse(200, 'OK')
response.body = Body()
with wpull.util.reset_file_offset(response.body):
response.body.write(b'KITTEH DOGE')
with warc_recorder.session() as session:
session.pre_request(request)
session.request_data(request.to_bytes())
test_instance = self
class MockRecord(WARCRecord):
def __init__(self, original_record):
super().__init__()
self.block_file = original_record.block_file
self.fields = original_record.fields
def __iter__(self):
print(list(os.walk('.')))
test_instance.assertTrue(
os.path.exists(warc_filename + '-wpullinc')
)
for dummy in range(1000):
yield b"where's my elephant?"
session._child_session._request_record = \
MockRecord(session._child_session._request_record)
session.request(request)
self.assertFalse(os.path.exists(warc_filename + '-wpullinc'))
示例4: test_warc_move_max_size
# 需要导入模块: from wpull.http.request import Request [as 别名]
# 或者: from wpull.http.request.Request import address [as 别名]
def test_warc_move_max_size(self):
file_prefix = 'asdf'
cdx_filename = 'asdf.cdx'
os.mkdir('./blah/')
warc_recorder = WARCRecorder(
file_prefix,
params=WARCRecorderParams(
compress=False,
cdx=True,
move_to='./blah/',
max_size=1,
),
)
request = HTTPRequest('http://example.com/1')
request.address = ('0.0.0.0', 80)
response = HTTPResponse(200, 'OK')
response.body = Body()
with wpull.util.reset_file_offset(response.body):
response.body.write(b'BLAH')
with warc_recorder.session() as session:
session.pre_request(request)
session.request_data(request.to_bytes())
session.request(request)
session.pre_response(response)
session.response_data(response.to_bytes())
session.response_data(response.body.content())
session.response(response)
warc_recorder.close()
self.assertTrue(os.path.exists('./blah/asdf-00000.warc'))
self.assertTrue(os.path.exists('./blah/asdf-00001.warc'))
self.assertTrue(os.path.exists('./blah/asdf-meta.warc'))
self.assertTrue(os.path.exists('./blah/' + cdx_filename))
示例5: test_cdx_dedup
# 需要导入模块: from wpull.http.request import Request [as 别名]
# 或者: from wpull.http.request.Request import address [as 别名]
def test_cdx_dedup(self):
url_table = URLTable()
warc_recorder = WARCRecorder(
'asdf',
params=WARCRecorderParams(
compress=False, cdx=True, url_table=url_table
)
)
url_table.add_visits([
(
'http://example.com/fennec',
'<urn:uuid:8a534d31-bd06-4056-8a0f-bdc5fd611036>',
'B62D734VFEKIDLFAB7TTSCSZF64BKAYJ'
)
])
request = HTTPRequest('http://example.com/fennec')
request.address = ('0.0.0.0', 80)
response = HTTPResponse(200, 'OK')
response.body = Body()
revisit_response_header_size = len(response.to_bytes())
with wpull.util.reset_file_offset(response.body):
response.body.write(b'kitbit')
with warc_recorder.session() as session:
session.pre_request(request)
session.request_data(request.to_bytes())
session.request(request)
session.pre_response(response)
session.response_data(response.to_bytes())
session.response_data(response.body.content())
session.response(response)
request = HTTPRequest('http://example.com/horse')
request.address = ('0.0.0.0', 80)
response = HTTPResponse(200, 'OKaaaaaaaaaaaaaaaaaaaaaaaaaa')
response.body = Body()
with wpull.util.reset_file_offset(response.body):
response.body.write(b'kitbit')
with warc_recorder.session() as session:
session.pre_request(request)
session.request_data(request.to_bytes())
session.request(request)
session.pre_response(response)
session.response_data(response.to_bytes())
session.response_data(response.body.content())
session.response(response)
_logger.info('FINISHED')
warc_recorder.close()
with open('asdf.warc', 'rb') as in_file:
warc_file_content = in_file.read()
with open('asdf.cdx', 'rb') as in_file:
cdx_file_content = in_file.read()
self.assertTrue(warc_file_content.startswith(b'WARC/1.0'))
self.assertIn(b'WARC-Type: revisit\r\n', warc_file_content)
self.assertIn(
b'WARC-Refers-To: '
b'<urn:uuid:8a534d31-bd06-4056-8a0f-bdc5fd611036>\r\n',
warc_file_content
)
self.assertIn(b'WARC-Truncated: length\r\n', warc_file_content)
self.assertIn(
b'WARC-Profile: http://netpreserve.org/warc/1.0/revisit/'
b'identical-payload-digest\r\n',
warc_file_content
)
self.assertIn(
b'Content-Length: ' +
str(revisit_response_header_size).encode('ascii') + b'\r\n',
warc_file_content
)
self.assertIn(
b'WARC-Target-URI: http://example.com/fennec\r\n',
warc_file_content
)
self.assertIn(
b'WARC-Target-URI: http://example.com/horse\r\n', warc_file_content
)
self.assertEqual(
1,
warc_file_content.count(b'kitbit')
)
self.assertIn(b'http://example.com/horse ', cdx_file_content)
示例6: test_warc_recorder
# 需要导入模块: from wpull.http.request import Request [as 别名]
# 或者: from wpull.http.request.Request import address [as 别名]
def test_warc_recorder(self):
file_prefix = 'asdf'
warc_filename = 'asdf.warc'
cdx_filename = 'asdf.cdx'
warc_recorder = WARCRecorder(
file_prefix,
params=WARCRecorderParams(
compress=False,
extra_fields=[('Extra-field', 'my_extra_field')],
cdx=True,
),
)
request = HTTPRequest('http://example.com/')
request.prepare_for_send()
request.address = ('0.0.0.0', 80)
request.prepare_for_send()
response = HTTPResponse(200, 'OK')
response.body = Body()
with wpull.util.reset_file_offset(response.body):
response.body.write(b'KITTEH DOGE')
with warc_recorder.session() as session:
session.pre_request(request)
session.request_data(request.to_bytes())
session.request(request)
session.pre_response(response)
session.response_data(response.to_bytes())
session.response_data(response.body.content())
session.response(response)
_logger.info('FINISHED')
warc_recorder.close()
with open(warc_filename, 'rb') as in_file:
warc_file_content = in_file.read()
with open(cdx_filename, 'rb') as in_file:
cdx_file_content = in_file.read()
self.assertTrue(warc_file_content.startswith(b'WARC/1.0'))
self.assertIn(b'WARC-Type: warcinfo\r\n', warc_file_content)
self.assertIn(b'Content-Type: application/warc-fields',
warc_file_content)
self.assertIn(b'WARC-Date: ', warc_file_content)
self.assertIn(b'WARC-Record-ID: <urn:uuid:', warc_file_content)
self.assertIn(b'WARC-Block-Digest: sha1:', warc_file_content)
self.assertIn(b'WARC-Payload-Digest: sha1:', warc_file_content)
self.assertIn(b'WARC-Type: request\r\n', warc_file_content)
self.assertIn(b'WARC-Target-URI: http://', warc_file_content)
self.assertIn(b'Content-Type: application/http;msgtype=request',
warc_file_content)
self.assertIn(b'WARC-Type: response', warc_file_content)
self.assertIn(b'WARC-Concurrent-To: <urn:uuid:', warc_file_content)
self.assertIn(b'Content-Type: application/http;msgtype=response',
warc_file_content)
self.assertIn(
'Wpull/{0}'.format(wpull.version.__version__).encode('utf-8'),
warc_file_content
)
self.assertIn(
'Python/{0}'.format(
wpull.util.python_version()).encode('utf-8'),
warc_file_content
)
self.assertIn(b'Extra-Field: my_extra_field', warc_file_content)
self.assertIn(b'GET / HTTP', warc_file_content)
self.assertIn(b'KITTEH DOGE', warc_file_content)
self.assertIn(b'FINISHED', warc_file_content)
self.assertIn(b'WARC-Target-URI: urn:X-wpull:log', warc_file_content)
self.assertIn(b'Content-Length:', warc_file_content)
self.assertNotIn(b'Content-Length: 0', warc_file_content)
cdx_lines = cdx_file_content.split(b'\n')
cdx_labels = cdx_lines[0].strip().split(b' ')
cdx_fields = cdx_lines[1].split(b' ')
print(cdx_lines)
self.assertEqual(3, len(cdx_lines))
self.assertEqual(10, len(cdx_labels))
self.assertEqual(9, len(cdx_fields))
self.assertTrue(cdx_lines[0].startswith(b' CDX'))
self.assertEqual(b'http://example.com/', cdx_fields[0])
self.assertEqual(b'-', cdx_fields[2])
self.assertEqual(b'200', cdx_fields[3])
self.assertNotEqual(b'-', cdx_fields[4])
self.assertNotEqual(b'0', cdx_fields[5])
self.assertNotEqual(b'0', cdx_fields[6])
self.assertEqual(
os.path.basename(warc_filename), cdx_fields[7].decode('ascii'))
length = int(cdx_fields[5])
offset = int(cdx_fields[6])
with open(warc_filename, 'rb') as in_file:
#.........这里部分代码省略.........
示例7: test_warc_recorder_max_size
# 需要导入模块: from wpull.http.request import Request [as 别名]
# 或者: from wpull.http.request.Request import address [as 别名]
def test_warc_recorder_max_size(self):
file_prefix = 'asdf'
cdx_filename = 'asdf.cdx'
warc_recorder = WARCRecorder(
file_prefix,
params=WARCRecorderParams(
compress=False,
extra_fields=[('Extra-field', 'my_extra_field')],
cdx=True, max_size=1,
)
)
request = HTTPRequest('http://example.com/1')
request.address = ('0.0.0.0', 80)
response = HTTPResponse(200, 'OK')
response.body = Body()
with wpull.util.reset_file_offset(response.body):
response.body.write(b'KITTEH DOGE')
with warc_recorder.session() as session:
session.pre_request(request)
session.request_data(request.to_bytes())
session.request(request)
session.pre_response(response)
session.response_data(response.to_bytes())
session.response_data(response.body.content())
session.response(response)
request = HTTPRequest('http://example.com/2')
request.address = ('0.0.0.0', 80)
response = HTTPResponse(200, 'OK')
response.body = Body()
with wpull.util.reset_file_offset(response.body):
response.body.write(b'DOGE KITTEH')
with warc_recorder.session() as session:
session.pre_request(request)
session.request_data(request.to_bytes())
session.request(request)
session.pre_response(response)
session.response_data(response.to_bytes())
session.response_data(response.body.content())
session.response(response)
_logger.info('FINISHED')
warc_recorder.close()
with open('asdf-00000.warc', 'rb') as in_file:
warc_file_content = in_file.read()
self.assertTrue(warc_file_content.startswith(b'WARC/1.0'))
self.assertIn(b'WARC-Type: warcinfo', warc_file_content)
self.assertIn(b'KITTEH DOGE', warc_file_content)
with open('asdf-00001.warc', 'rb') as in_file:
warc_file_content = in_file.read()
self.assertTrue(warc_file_content.startswith(b'WARC/1.0'))
self.assertIn(b'WARC-Type: warcinfo', warc_file_content)
self.assertIn(b'DOGE KITTEH', warc_file_content)
with open(cdx_filename, 'rb') as in_file:
cdx_file_content = in_file.read()
cdx_lines = cdx_file_content.split(b'\n')
cdx_labels = cdx_lines[0].strip().split(b' ')
print(cdx_lines)
self.assertEqual(4, len(cdx_lines))
self.assertEqual(10, len(cdx_labels))
self.assertIn(b'http://example.com/1', cdx_file_content)
self.assertIn(b'http://example.com/2', cdx_file_content)
with open('asdf-meta.warc', 'rb') as in_file:
meta_file_content = in_file.read()
self.assertIn(b'FINISHED', meta_file_content)
self.validate_warc('asdf-00000.warc')
self.validate_warc('asdf-00001.warc')
self.validate_warc('asdf-meta.warc')