本文整理汇总了Python中wpull.app.Builder类的典型用法代码示例。如果您正苦于以下问题:Python Builder类的具体用法?Python Builder怎么用?Python Builder使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Builder类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_timestamping_hit_orig
def test_timestamping_hit_orig(self):
arg_parser = AppArgumentParser()
args = arg_parser.parse_args([
self.get_url('/lastmod'),
'--timestamping'
])
with cd_tempdir() as temp_dir:
filename = os.path.join(temp_dir, 'lastmod')
filename_orig = os.path.join(temp_dir, 'lastmod')
with open(filename, 'wb') as out_file:
out_file.write(b'HI')
with open(filename_orig, 'wb') as out_file:
out_file.write(b'HI')
os.utime(filename_orig, (631152000, 631152000))
builder = Builder(args)
engine = builder.build()
exit_code = yield engine()
self.assertEqual(0, exit_code)
with open(filename, 'rb') as in_file:
self.assertEqual(b'HI', in_file.read())
with open(filename_orig, 'rb') as in_file:
self.assertEqual(b'HI', in_file.read())
示例2: test_big_payload
def test_big_payload(self):
hash_obj = hashlib.sha1(b'foxfoxfox')
payload_list = []
for dummy in range(10000):
data = hash_obj.digest()
hash_obj.update(data)
payload_list.append(data)
data = hash_obj.digest()
payload_list.append(data)
expected_payload = b''.join(payload_list)
arg_parser = AppArgumentParser()
args = arg_parser.parse_args([self.get_url('/big_payload')])
builder = Builder(args)
with cd_tempdir():
engine = builder.build()
exit_code = yield engine()
self.assertTrue(os.path.exists('big_payload'))
with open('big_payload', 'rb') as in_file:
self.assertEqual(expected_payload, in_file.read())
self.assertEqual(0, exit_code)
self.assertEqual(1, builder.factory['Statistics'].files)
示例3: test_local_encoding
def test_local_encoding(self):
arg_parser = AppArgumentParser()
with tempfile.NamedTemporaryFile() as in_file:
in_file.write(self.get_url('/?qwerty').encode('utf-32-le'))
in_file.write('\n'.encode('utf-32-le'))
in_file.flush()
opts = [
self.get_url('/?asdf'),
'--local-encoding', 'utf-32-le',
'--input-file', in_file.name
]
opts = [string.encode('utf-32-le') for string in opts]
args = arg_parser.parse_args(opts)
builder = Builder(args)
with cd_tempdir():
engine = builder.build()
exit_code = yield engine()
self.assertEqual(0, exit_code)
self.assertEqual(2, builder.factory['Statistics'].files)
示例4: test_app_phantomjs
def test_app_phantomjs(self):
arg_parser = AppArgumentParser()
args = arg_parser.parse_args([
self.get_url('/static/simple_javascript.html'),
'--warc-file', 'test',
'-4',
'--no-robots',
'--phantomjs',
'--phantomjs-wait', '0.1',
'--phantomjs-scroll', '2',
])
builder = Builder(args)
with cd_tempdir():
engine = builder.build()
exit_code = yield engine()
self.assertTrue(os.path.exists('test.warc.gz'))
self.assertTrue(
os.path.exists('simple_javascript.html.snapshot.html')
)
self.assertTrue(
os.path.exists('simple_javascript.html.snapshot.pdf')
)
with open('simple_javascript.html.snapshot.html', 'rb') as in_file:
data = in_file.read()
self.assertIn(b'Hello world!', data)
self.assertEqual(0, exit_code)
self.assertGreaterEqual(builder.factory['Statistics'].files, 1)
示例5: test_app_args_warc_dedup
def test_app_args_warc_dedup(self):
arg_parser = AppArgumentParser()
with cd_tempdir():
with open('dedup.cdx', 'wb') as out_file:
out_file.write(b' CDX a k u\n')
out_file.write(
self.get_url('/static/my_file.txt').encode('ascii')
)
out_file.write(b' KQ4IUKATKL63FT5GMAE2YDRV3WERNL34')
out_file.write(b' <under-the-deer>\n')
args = arg_parser.parse_args([
self.get_url('/static/my_file.txt'),
'--no-parent',
'--warc-file', 'test',
'--no-warc-compression',
'-4',
'--no-robots',
'--warc-dedup', 'dedup.cdx',
])
builder = Builder(args)
engine = builder.build()
exit_code = yield engine()
with open('test.warc', 'rb') as in_file:
data = in_file.read()
self.assertIn(b'KQ4IUKATKL63FT5GMAE2YDRV3WERNL34', data)
self.assertIn(b'Type: revisit', data)
self.assertIn(b'<under-the-deer>', data)
self.assertEqual(0, exit_code)
self.assertGreaterEqual(builder.factory['Statistics'].files, 1)
示例6: test_cookie
def test_cookie(self):
arg_parser = AppArgumentParser()
with tempfile.NamedTemporaryFile() as in_file:
in_file.write(b'# Kittens\n')
in_file.write(b'localhost.local')
in_file.write(b'\tFALSE\t/\tFALSE\t\ttest\tno\n')
in_file.flush()
args = arg_parser.parse_args([
self.get_url('/cookie'),
'--load-cookies', in_file.name,
'--tries', '1',
'--save-cookies', 'wpull_test_cookies.txt',
'--keep-session-cookies',
])
builder = Builder(args)
with cd_tempdir():
engine = builder.build()
exit_code = yield engine()
self.assertEqual(0, exit_code)
self.assertEqual(1, builder.factory['Statistics'].files)
cookies = list(builder.factory['CookieJar'])
_logger.debug('{0}'.format(cookies))
self.assertEqual(1, len(cookies))
self.assertEqual('test', cookies[0].name)
self.assertEqual('yes', cookies[0].value)
with open('wpull_test_cookies.txt', 'rb') as saved_file:
cookie_data = saved_file.read()
self.assertIn(b'test\tyes', cookie_data)
示例7: test_redirect_diff_host_recursive
def test_redirect_diff_host_recursive(self):
arg_parser = AppArgumentParser()
args = arg_parser.parse_args([
self.get_url('/redirect?where=diff-host'),
'--recursive'
])
builder = Builder(args)
with cd_tempdir():
engine = builder.build()
exit_code = yield engine()
self.assertEqual(0, exit_code)
self.assertEqual(0, builder.factory['Statistics'].files)
示例8: test_app_args
def test_app_args(self):
arg_parser = AppArgumentParser()
args = arg_parser.parse_args([
'/',
'--base', self.get_url('/').encode('utf-8'),
'--no-parent',
'--recursive',
'--page-requisites',
'--database', b'test.db',
'--server-response',
'--random-wait',
b'--wait', b'0.1',
'--protocol-directories',
'--referer', 'http://test.test',
'--accept-regex', r'.*',
'--header', 'Hello: world!',
'--exclude-domains', 'asdf.invalid',
'--exclude-hostnames', 'qwerty.invalid,uiop.invalid',
'--no-clobber',
'--rotate-dns',
'-4',
'--concurrent', '2',
'--no-check-certificate',
'--ascii-print',
'--progress', 'dot',
'--secure-protocol', 'TLSv1',
'--convert-links', '--backup-converted',
'--accept', '*',
'--no-strong-robots',
'--restrict-file-names', 'windows,lower',
'--quota', '10m',
'--max-filename-length', '100',
'--user-agent', 'ΑΒΓαβγ',
'--remote-encoding', 'latin1',
'--http-compression',
'--bind-address', '127.0.0.1',
])
with cd_tempdir():
builder = Builder(args)
engine = builder.build()
exit_code = yield engine()
print(list(os.walk('.')))
self.assertTrue(os.path.exists(
'http/localhost+{0}/index.html'.format(self.get_http_port())
))
self.assertTrue(os.path.exists(
'http/localhost+{0}/index.html.orig'.format(
self.get_http_port())
))
self.assertEqual(0, exit_code)
self.assertEqual(builder.factory['Statistics'].files, 2)
示例9: test_app_phantomjs
def test_app_phantomjs(self):
arg_parser = AppArgumentParser()
script_filename = os.path.join(os.path.dirname(__file__),
'testing', 'boring_script.py')
args = arg_parser.parse_args([
self.get_url('/static/simple_javascript.html'),
'--warc-file', 'test',
'--no-warc-compression',
'-4',
'--no-robots',
'--phantomjs',
'--phantomjs-wait', '0.1',
'--phantomjs-scroll', '2',
'--header', 'accept-language: dragon',
'--python-script', script_filename,
])
builder = Builder(args)
with cd_tempdir():
engine = builder.build()
exit_code = yield engine()
self.assertTrue(os.path.exists('test.warc'))
self.assertTrue(
os.path.exists('simple_javascript.html.snapshot.html')
)
self.assertTrue(
os.path.exists('simple_javascript.html.snapshot.pdf')
)
with open('simple_javascript.html.snapshot.html', 'rb') as in_file:
data = in_file.read()
self.assertIn(b'Hello world!', data)
with open('test.warc', 'rb') as in_file:
data = in_file.read()
self.assertIn(b'urn:X-wpull:snapshot?url=', data)
self.assertIn(b'text/html', data)
self.assertIn(b'application/pdf', data)
self.assertIn(b'application/json', data)
self.assertIn(b'"set_scroll_top"', data)
try:
self.assertIn(b'Accept-Encoding: identity', data)
except AssertionError:
# webkit treats localhost differently
self.assertNotIn(b'Accept-Encoding: gzip', data)
self.assertIn(b'Accept-Language: dragon', data)
self.assertEqual(0, exit_code)
self.assertGreaterEqual(builder.factory['Statistics'].files, 1)
示例10: test_immediate_robots_forbidden
def test_immediate_robots_forbidden(self):
arg_parser = AppArgumentParser()
args = arg_parser.parse_args([
self.get_url('/forbidden'),
'--recursive',
])
builder = Builder(args)
with cd_tempdir():
engine = builder.build()
exit_code = yield engine()
self.assertEqual(0, exit_code)
self.assertEqual(0, builder.factory['Statistics'].files)
示例11: test_non_http_redirect
def test_non_http_redirect(self):
arg_parser = AppArgumentParser()
args = arg_parser.parse_args([
self.get_url('/non_http_redirect'),
'--recursive',
'--no-robots'
])
builder = Builder(args)
with cd_tempdir():
engine = builder.build()
exit_code = yield engine()
self.assertEqual(0, exit_code)
self.assertEqual(0, builder.factory['Statistics'].files)
示例12: test_long_cookie
def test_long_cookie(self):
arg_parser = AppArgumentParser()
args = arg_parser.parse_args([
self.get_url('/long_cookie'),
])
builder = Builder(args)
with cd_tempdir():
engine = builder.build()
exit_code = yield engine()
self.assertEqual(0, exit_code)
self.assertEqual(1, builder.factory['Statistics'].files)
cookies = list(builder.factory['CookieJar'])
_logger.debug('{0}'.format(cookies))
self.assertEqual(0, len(cookies))
示例13: test_redirect_diff_host
def test_redirect_diff_host(self):
arg_parser = AppArgumentParser()
args = arg_parser.parse_args([
self.get_url('/redirect?where=diff-host'),
'--waitretry', '0'
])
builder = Builder(args)
with cd_tempdir():
engine = builder.build()
exit_code = yield engine()
# FIXME: for now, we'll assume the DNS failed to resolve because
# it tried to span hosts
self.assertEqual(4, exit_code)
self.assertEqual(0, builder.factory['Statistics'].files)
示例14: test_ignore_length
def test_ignore_length(self):
arg_parser = AppArgumentParser()
args = arg_parser.parse_args([
self.get_url('/underrun'),
'--ignore-length',
'--no-robots',
])
builder = Builder(args)
with cd_tempdir():
engine = builder.build()
exit_code = yield engine()
self.assertEqual(0, exit_code)
self.assertEqual(1, builder.factory['Statistics'].files)
示例15: test_quota
def test_quota(self):
arg_parser = AppArgumentParser()
args = arg_parser.parse_args([
self.get_url('/blog/'),
'--recursive',
'--quota', '1',
])
with cd_tempdir():
builder = Builder(args)
engine = builder.build()
exit_code = yield engine()
self.assertEqual(0, exit_code)
self.assertEqual(1, builder.factory['Statistics'].files)