本文整理匯總了Python中wpull.application.builder.Builder類的典型用法代碼示例。如果您正苦於以下問題:Python Builder類的具體用法?Python Builder怎麽用?Python Builder使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了Builder類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: test_save_cookie
def test_save_cookie(self):
arg_parser = AppArgumentParser()
with tempfile.NamedTemporaryFile() as in_file:
in_file.write(b"# Kittens\n")
in_file.write(b"localhost.local")
in_file.write(b"\tFALSE\t/\tFALSE\t9999999999\tisloggedin\t1\n")
in_file.write(b"\tFALSE\t/\tFALSE\t\tadmin\t1\n")
in_file.flush()
args = arg_parser.parse_args(
[
self.get_url("/some_page/"),
"--load-cookies",
in_file.name,
"--tries",
"1",
"--save-cookies",
"wpull_test_cookies.txt",
]
)
builder = Builder(args, unit_test=True)
app = builder.build()
exit_code = yield from app.run()
self.assertEqual(0, exit_code)
self.assertEqual(1, builder.factory["Statistics"].files)
with open("wpull_test_cookies.txt", "rb") as saved_file:
cookie_data = saved_file.read()
self.assertIn(b"isloggedin\t1", cookie_data)
self.assertNotIn(b"admin\t1", cookie_data)
示例2: test_app_python_plugin_script
def test_app_python_plugin_script(self):
arg_parser = AppArgumentParser()
filename = os.path.join(os.path.dirname(__file__),
'sample_user_scripts', 'extensive.plugin.py')
args = arg_parser.parse_args([
self.get_url('/'),
self.get_url('/some_page'),
self.get_url('/mordor'),
'localhost:1/wolf',
'--plugin-script', filename,
'--page-requisites',
'--reject-regex', '/post/',
'--wait', '12',
'--retry-connrefused', '--tries', '1'
])
builder = Builder(args, unit_test=True)
app = builder.build()
exit_code = yield from app.run()
print(list(os.walk('.')))
self.assertEqual(42, exit_code)
engine = builder.factory['PipelineSeries']
self.assertEqual(2, engine.concurrency)
stats = builder.factory['Statistics']
self.assertEqual(3, stats.files)
# duration should be virtually 0 but account for slowness on travis ci
self.assertGreater(10.0, stats.duration)
示例3: test_session_cookie
def test_session_cookie(self):
arg_parser = AppArgumentParser()
with tempfile.NamedTemporaryFile() as in_file:
in_file.write(b"# Kittens\n")
in_file.write(b"localhost.local")
# session cookie, Python style
in_file.write(b"\tFALSE\t/\tFALSE\t\ttest\tno\n")
# session cookie, Firefox/Wget/Curl style
in_file.write(b"\tFALSE\t/\tFALSE\t0\tsessionid\tboxcat\n")
in_file.flush()
args = arg_parser.parse_args(
[
self.get_url("/cookie"),
"--load-cookies",
in_file.name,
"--tries",
"1",
"--save-cookies",
"wpull_test_cookies.txt",
"--keep-session-cookies",
]
)
builder = Builder(args, unit_test=True)
app = builder.build()
callback_called = False
def callback(pipeline):
nonlocal callback_called
if callback_called:
return
callback_called = True
self.assertEqual(2, len(builder.factory["CookieJar"]))
app.event_dispatcher.add_listener(Application.Event.pipeline_end, callback)
exit_code = yield from app.run()
self.assertTrue(callback_called)
self.assertEqual(0, exit_code)
self.assertEqual(1, builder.factory["Statistics"].files)
cookies = list(sorted(builder.factory["CookieJar"], key=lambda cookie: cookie.name))
_logger.debug("{0}".format(cookies))
self.assertEqual(2, len(cookies))
self.assertEqual("sessionid", cookies[0].name)
self.assertEqual("boxcat", cookies[0].value)
self.assertEqual("test", cookies[1].name)
self.assertEqual("yes", cookies[1].value)
with open("wpull_test_cookies.txt", "rb") as saved_file:
cookie_data = saved_file.read()
self.assertIn(b"test\tyes", cookie_data)
示例4: test_app_args_post_data
def test_app_args_post_data(self):
arg_parser = AppArgumentParser()
args = arg_parser.parse_args([self.get_url("/post/"), "--post-data", "text=hi"])
builder = Builder(args, unit_test=True)
app = builder.build()
exit_code = yield from app.run()
self.assertEqual(0, exit_code)
示例5: test_timestamping_hit_orig
def test_timestamping_hit_orig(self):
arg_parser = AppArgumentParser()
args = arg_parser.parse_args([self.get_url("/lastmod"), "--timestamping"])
filename = os.path.join(self.temp_dir.name, "lastmod")
filename_orig = os.path.join(self.temp_dir.name, "lastmod")
with open(filename, "wb") as out_file:
out_file.write(b"HI")
with open(filename_orig, "wb") as out_file:
out_file.write(b"HI")
os.utime(filename_orig, (631152000, 631152000))
builder = Builder(args, unit_test=True)
app = builder.build()
exit_code = yield from app.run()
self.assertEqual(0, exit_code)
with open(filename, "rb") as in_file:
self.assertEqual(b"HI", in_file.read())
with open(filename_orig, "rb") as in_file:
self.assertEqual(b"HI", in_file.read())
示例6: main
def main(exit=True, install_tornado_bridge=True, use_signals=True):
if install_tornado_bridge:
tornado.platform.asyncio.AsyncIOMainLoop().install()
arg_parser = AppArgumentParser()
args = arg_parser.parse_args()
builder = Builder(args)
application = builder.build()
if use_signals:
application.setup_signal_handlers()
if args.debug_manhole:
import manhole
import wpull
wpull.wpull_builder = builder
manhole.install()
exit_code = application.run_sync()
if exit:
sys.exit(exit_code)
else:
return exit_code
示例7: test_ssl_bad_certificate
def test_ssl_bad_certificate(self):
arg_parser = AppArgumentParser()
args = arg_parser.parse_args([
self.get_url('/'),
'--no-robots',
'--no-check-certificate',
'--tries', '1'
])
builder = Builder(args, unit_test=True)
class MockWebSession(WebSession):
@asyncio.coroutine
def start(self):
raise SSLVerificationError('A very bad certificate!')
class MockWebClient(builder.factory.class_map['WebClient']):
def session(self, request):
return MockWebSession(request, self._http_client, self._redirect_tracker_factory(), Request)
builder.factory.class_map['WebClient'] = MockWebClient
app = builder.build()
exit_code = yield from app.run()
self.assertEqual(7, exit_code)
self.assertEqual(0, builder.factory['Statistics'].files)
示例8: test_app_args_warc_dedup
def test_app_args_warc_dedup(self):
arg_parser = AppArgumentParser()
with open('dedup.cdx', 'wb') as out_file:
out_file.write(b' CDX a k u\n')
out_file.write(
self.get_url('/static/my_file.txt').encode('ascii')
)
out_file.write(b' KQ4IUKATKL63FT5GMAE2YDRV3WERNL34')
out_file.write(b' <under-the-deer>\n')
args = arg_parser.parse_args([
self.get_url('/static/my_file.txt'),
'--no-parent',
'--warc-file', 'test',
'--no-warc-compression',
'-4',
'--no-robots',
'--warc-dedup', 'dedup.cdx',
])
builder = Builder(args, unit_test=True)
app = builder.build()
exit_code = yield from app.run()
with open('test.warc', 'rb') as in_file:
data = in_file.read()
self.assertIn(b'KQ4IUKATKL63FT5GMAE2YDRV3WERNL34', data)
self.assertIn(b'Type: revisit', data)
self.assertIn(b'<under-the-deer>', data)
self.assertEqual(0, exit_code)
self.assertGreaterEqual(builder.factory['Statistics'].files, 1)
示例9: test_app_args_warc
def test_app_args_warc(self):
arg_parser = AppArgumentParser()
args = arg_parser.parse_args([
self.get_url('/'),
'--no-parent',
'--recursive',
'--page-requisites',
'--warc-file', 'test',
'-4',
'--no-robots',
'--no-warc-digests',
])
builder = Builder(args, unit_test=True)
app = builder.build()
exit_code = yield from app.run()
self.assertTrue(os.path.exists('test.warc.gz'))
with gzip.GzipFile('test.warc.gz') as in_file:
data = in_file.read()
self.assertIn(b'FINISHED', data)
self.assertEqual(0, exit_code)
self.assertGreaterEqual(builder.factory['Statistics'].files, 1)
示例10: test_big_payload
def test_big_payload(self):
hash_obj = hashlib.sha1(b"foxfoxfox")
payload_list = []
for dummy in range(10000):
data = hash_obj.digest()
hash_obj.update(data)
payload_list.append(data)
data = hash_obj.digest()
payload_list.append(data)
expected_payload = b"".join(payload_list)
arg_parser = AppArgumentParser()
args = arg_parser.parse_args([self.get_url("/big_payload")])
builder = Builder(args, unit_test=True)
app = builder.build()
exit_code = yield from app.run()
self.assertTrue(os.path.exists("big_payload"))
with open("big_payload", "rb") as in_file:
self.assertEqual(expected_payload, in_file.read())
self.assertEqual(0, exit_code)
self.assertEqual(1, builder.factory["Statistics"].files)
示例11: test_immediate_robots_forbidden
def test_immediate_robots_forbidden(self):
arg_parser = AppArgumentParser()
args = arg_parser.parse_args([self.get_url("/forbidden"), "--recursive"])
builder = Builder(args, unit_test=True)
app = builder.build()
exit_code = yield from app.run()
self.assertEqual(0, exit_code)
self.assertEqual(0, builder.factory["Statistics"].files)
示例12: test_session_timeout
def test_session_timeout(self):
arg_parser = AppArgumentParser()
args = arg_parser.parse_args([self.get_url("/sleep_long"), "--tries=1", "--session-timeout=0.1"])
builder = Builder(args, unit_test=True)
app = builder.build()
exit_code = yield from app.run()
self.assertEqual(4, exit_code)
self.assertEqual(0, builder.factory["Statistics"].files)
示例13: test_misc_urls
def test_misc_urls(self):
arg_parser = AppArgumentParser()
args = arg_parser.parse_args(["http://[0:0:0:0:0:ffff:a00:0]/", "--tries", "1", "--timeout", "0.5", "-r"])
builder = Builder(args, unit_test=True)
app = builder.build()
exit_code = yield from app.run()
self.assertEqual(4, exit_code)
示例14: test_no_content
def test_no_content(self):
arg_parser = AppArgumentParser()
args = arg_parser.parse_args([self.get_url("/no_content"), "--tries=1"])
builder = Builder(args, unit_test=True)
app = builder.build()
exit_code = yield from app.run()
self.assertEqual(0, exit_code)
self.assertEqual(1, builder.factory["Statistics"].files)
示例15: test_non_http_redirect
def test_non_http_redirect(self):
arg_parser = AppArgumentParser()
args = arg_parser.parse_args([self.get_url("/non_http_redirect"), "--recursive", "--no-robots"])
builder = Builder(args, unit_test=True)
app = builder.build()
exit_code = yield from app.run()
self.assertEqual(0, exit_code)
self.assertEqual(0, builder.factory["Statistics"].files)