本文整理汇总了Python中mapreduce.test_support.execute_until_empty函数的典型用法代码示例。如果您正苦于以下问题:Python execute_until_empty函数的具体用法?Python execute_until_empty怎么用?Python execute_until_empty使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了execute_until_empty函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testSortFile
def testSortFile(self):
"""Test sorting a file."""
bucket_name = "testbucket"
test_filename = "testfile"
full_filename = "/%s/%s" % (bucket_name, test_filename)
input_data = [(str(i), "_" + str(i)) for i in range(100)]
with cloudstorage.open(full_filename, mode="w") as f:
with records.RecordsWriter(f) as w:
for (k, v) in input_data:
proto = kv_pb.KeyValue()
proto.set_key(k)
proto.set_value(v)
w.write(proto.Encode())
p = shuffler._SortChunksPipeline("testjob", bucket_name, [[full_filename]])
p.start()
test_support.execute_until_empty(self.taskqueue)
p = shuffler._SortChunksPipeline.from_id(p.pipeline_id)
input_data.sort()
output_files = p.outputs.default.value[0]
output_data = []
for output_file in output_files:
with cloudstorage.open(output_file) as f:
for binary_record in records.RecordsReader(f):
proto = kv_pb.KeyValue()
proto.ParseFromString(binary_record)
output_data.append((proto.key(), proto.value()))
self.assertEquals(input_data, output_data)
self.assertEquals(1, len(self.emails))
示例2: _run_test
def _run_test(self, num_shards, num_files):
bucket_name = "testing"
object_prefix = "file-"
job_name = "test_map"
input_class = (input_readers.__name__ + "." +
input_readers._GoogleCloudStorageInputReader.__name__)
expected_content = self.create_test_content(bucket_name,
object_prefix,
num_files)
control.start_map(
job_name,
__name__ + "." + "_input_reader_memory_mapper",
input_class,
{
"input_reader": {
"bucket_name": bucket_name,
"objects": [object_prefix + "*"]
},
},
shard_count=num_shards)
test_support.execute_until_empty(self.taskqueue)
self.assertEqual(expected_content.sort(), _memory_mapper_data.sort())
示例3: testMergeFiles
def testMergeFiles(self):
"""Test merging multiple files."""
input_data = [(str(i), "_" + str(i)) for i in range(100)]
input_data.sort()
bucket_name = "testbucket"
test_filename = "testfile"
full_filename = "/%s/%s" % (bucket_name, test_filename)
with cloudstorage.open(full_filename, mode="w") as f:
with records.RecordsWriter(f) as w:
for (k, v) in input_data:
proto = kv_pb.KeyValue()
proto.set_key(k)
proto.set_value(v)
w.write(proto.Encode())
p = TestMergePipeline(bucket_name, [full_filename, full_filename, full_filename])
p.start()
test_support.execute_until_empty(self.taskqueue)
p = TestMergePipeline.from_id(p.pipeline_id)
output_file = p.outputs.default.value[0]
output_data = []
with cloudstorage.open(output_file) as f:
for record in records.RecordsReader(f):
output_data.append(record)
expected_data = [str((k, [v, v, v], False)) for (k, v) in input_data]
self.assertEquals(expected_data, output_data)
self.assertEquals(1, len(self.emails))
示例4: testShardRetry
def testShardRetry(self):
entity_count = 200
db.delete(TestOutputEntity.all())
db.delete(RetryCount.all())
for i in range(entity_count):
TestEntity(data=str(i)).put()
p = mapper_pipeline.MapperPipeline(
"test",
handler_spec=__name__ + ".test_shard_retry_map",
input_reader_spec=input_readers.__name__ + ".DatastoreInputReader",
params={
"input_reader": {
"entity_kind": __name__ + "." + TestEntity.__name__,
},
},
shards=5)
p.start()
test_support.execute_until_empty(self.taskqueue)
self.assertEquals(1, len(self.emails))
self.assertTrue(self.emails[0][1].startswith(
"Pipeline successful:"))
p = mapper_pipeline.MapperPipeline.from_id(p.pipeline_id)
outputs = []
for output in TestOutputEntity.all():
outputs.append(int(output.data))
outputs.sort()
expected_outputs = [i for i in range(entity_count)]
expected_outputs.sort()
self.assertEquals(expected_outputs, outputs)
示例5: testShardRetryTooMany
def testShardRetryTooMany(self):
entity_count = 200
db.delete(TestOutputEntity.all())
db.delete(RetryCount.all())
for i in range(entity_count):
TestEntity(data=str(i)).put()
p = mapper_pipeline.MapperPipeline(
"test",
handler_spec=__name__ + ".test_shard_retry_too_many_map",
input_reader_spec=input_readers.__name__ + ".DatastoreInputReader",
params={
"input_reader": {
"entity_kind": __name__ + "." + TestEntity.__name__,
},
},
shards=5)
p.max_attempts = 1
p.start()
test_support.execute_until_empty(self.taskqueue)
state = model.MapreduceState.all().get()
self.assertEqual(model.MapreduceState.RESULT_FAILED, state.result_status)
self.assertEquals(1, len(self.emails))
self.assertTrue(self.emails[0][1].startswith(
"Pipeline aborted:"))
示例6: testFailedMap
def testFailedMap(self):
for i in range(1):
TestEntity(data=str(i)).put()
pipeline.pipeline._DEFAULT_MAX_ATTEMPTS = 1
p = mapper_pipeline.MapperPipeline(
"test",
handler_spec=__name__ + ".test_fail_map",
input_reader_spec=input_readers.__name__ + ".DatastoreInputReader",
params={
"input_reader": {
"entity_kind": __name__ + "." + TestEntity.__name__,
},
},
shards=5)
p.start()
test_support.execute_until_empty(self.taskqueue)
p = mapper_pipeline.MapperPipeline.from_id(p.pipeline_id)
self.assertTrue(p.was_aborted)
self.assertTrue(p.outputs.job_id.filled)
state = model.MapreduceState.get_by_job_id(p.outputs.job_id.value)
self.assertEqual(model.MapreduceState.RESULT_FAILED, state.result_status)
self.assertFalse(p.outputs.result_status.filled)
self.assertFalse(p.outputs.default.filled)
self.assertEquals(1, len(self.emails))
self.assertTrue(self.emails[0][1].startswith(
"Pipeline aborted:"))
示例7: testNoCombiner
def testNoCombiner(self):
"""Test running with low values count but without combiner."""
# Even though this test doesn't have combiner specified, it's still
# interesting to run. It forces MergePipeline to produce partial
# key values and we verify that they are combined correctly in reader.
# Prepare test data
entity_count = 200
for i in range(entity_count):
TestEntity(data=str(i)).put()
TestEntity(data=str(i)).put()
p = mapreduce_pipeline.MapreducePipeline(
"test",
__name__ + ".test_combiner_map",
__name__ + ".test_combiner_reduce",
input_reader_spec=input_readers.__name__ + ".DatastoreInputReader",
output_writer_spec=output_writers.__name__ + ".BlobstoreOutputWriter",
mapper_params={"entity_kind": __name__ + ".TestEntity"},
shards=4,
)
p.start()
test_support.execute_until_empty(self.taskqueue)
p = mapreduce_pipeline.MapreducePipeline.from_id(p.pipeline_id)
self.assertEquals(1, len(p.outputs.default.value))
output_file = p.outputs.default.value[0]
file_content = []
with files.open(output_file, "r") as f:
file_content = sorted(f.read(10000000).strip().split("\n"))
self.assertEquals(["('0', 9800)", "('1', 9900)", "('2', 10000)", "('3', 10100)"], file_content)
示例8: testEmptyMapper
def testEmptyMapper(self):
"""Test empty mapper over empty dataset."""
p = mapper_pipeline.MapperPipeline(
"empty_map",
handler_spec=__name__ + ".test_empty_handler",
input_reader_spec=input_readers.__name__ + ".DatastoreInputReader",
params={
"input_reader": {
"entity_kind": __name__ + ".TestEntity",
# Test datetime can be json serialized.
"filters": [("dt", "=", datetime.datetime(2000, 1, 1))],
},
},
)
p.start()
test_support.execute_until_empty(self.taskqueue)
self.assertEquals(1, len(self.emails))
self.assertTrue(self.emails[0][1].startswith(
"Pipeline successful:"))
p = mapper_pipeline.MapperPipeline.from_id(p.pipeline_id)
self.assertTrue(p.outputs.job_id.value)
counters = p.outputs.counters.value
self.assertTrue(counters)
self.assertTrue(context.COUNTER_MAPPER_WALLTIME_MS in counters)
示例9: testFailedMapReduce
def testFailedMapReduce(self):
# Add some random data.
entity_count = 200
for i in range(entity_count):
TestEntity(data=str(i)).put()
TestEntity(data=str(i)).put()
p = mapreduce_pipeline.MapreducePipeline(
"test",
__name__ + ".test_failed_map",
__name__ + ".test_mapreduce_reduce",
input_reader_spec=input_readers.__name__ + ".DatastoreInputReader",
output_writer_spec=(
output_writers.__name__ + ".BlobstoreRecordsOutputWriter"),
mapper_params={
"entity_kind": __name__ + "." + TestEntity.__name__,
},
shards=16)
p.start()
test_support.execute_until_empty(self.taskqueue)
p = mapreduce_pipeline.MapreducePipeline.from_id(p.pipeline_id)
self.assertEqual(model.MapreduceState.RESULT_FAILED,
p.outputs.result_status.value)
self.assertEqual(0, len(p.outputs.default.value))
示例10: testHugeTaskUseDatastore
def testHugeTaskUseDatastore(self):
"""Test map job with huge parameter values."""
input_file = files.blobstore.create()
input_data = [str(i) for i in range(100)]
with files.open(input_file, "a") as f:
with records.RecordsWriter(f) as w:
for record in input_data:
w.write(record)
files.finalize(input_file)
input_file = files.blobstore.get_file_name(
files.blobstore.get_blob_key(input_file))
mapreduce_id = control.start_map(
"test_map",
__name__ + ".TestHandler",
"mapreduce.input_readers.RecordsReader",
{
"file": input_file,
# the parameter can't be compressed and wouldn't fit into
# taskqueue payload
"huge_parameter": random_string(900000)
},
shard_count=4,
base_path="/mapreduce_base_path")
test_support.execute_until_empty(self.taskqueue)
self.assertEquals(100, len(TestHandler.processed_entites))
self.assertEquals([], model._HugeTaskPayload.all().fetch(100))
示例11: testFetchEndToEnd
def testFetchEndToEnd(self):
"""Test for through of fetcher job"""
createMockCrawlDbDatum("http://foo.com/bar.txt")
static_robots = "User-agent: test\nDisallow: /content_0\nDisallow: /content_1\nDisallow: /content_3"
self.setReturnValue(url="http://foo.com/robots.txt",
content=static_robots,
headers={"Content-Length": len(static_robots),
"content-type": "text/plain"})
static_content = "test"
static_content_length = len(static_content)
self.setReturnValue(url="http://foo.com/bar.txt",
content=static_content,
headers={"Content-Length": static_content_length,
"Content-Type": "text/plain"})
p = pipelines.FetcherPipeline("FetcherPipeline",
params={
"entity_kind": "lakshmi.datum.CrawlDbDatum"
},
parser_params={
"text/plain": __name__ + "._parserNotOutlinks"
},
shards=2)
p.start()
test_support.execute_until_empty(self.taskqueue)
示例12: testMergeFiles
def testMergeFiles(self):
"""Test merging multiple files."""
input_data = [(str(i), "_" + str(i)) for i in range(100)]
input_data.sort()
input_file = files.blobstore.create()
with files.open(input_file, "a") as f:
with records.RecordsWriter(f) as w:
for (k, v) in input_data:
proto = file_service_pb.KeyValue()
proto.set_key(k)
proto.set_value(v)
w.write(proto.Encode())
files.finalize(input_file)
input_file = files.blobstore.get_file_name(files.blobstore.get_blob_key(input_file))
p = TestMergePipeline([input_file, input_file, input_file])
p.start()
test_support.execute_until_empty(self.taskqueue)
p = TestMergePipeline.from_id(p.pipeline_id)
output_file = p.outputs.default.value[0]
output_data = []
with files.open(output_file, "r") as f:
for record in records.RecordsReader(f):
output_data.append(record)
expected_data = [str((k, [v, v, v], False)) for (k, v) in input_data]
self.assertEquals(expected_data, output_data)
示例13: testShuffleFiles
def testShuffleFiles(self):
"""Test shuffling multiple files."""
input_data = [(str(i), str(i)) for i in range(100)]
input_data.sort()
input_file = files.blobstore.create()
with files.open(input_file, "a") as f:
with records.RecordsWriter(f) as w:
for (k, v) in input_data:
proto = file_service_pb.KeyValue()
proto.set_key(k)
proto.set_value(v)
w.write(proto.Encode())
files.finalize(input_file)
input_file = files.blobstore.get_file_name(files.blobstore.get_blob_key(input_file))
p = shuffler.ShufflePipeline("testjob", [input_file, input_file, input_file])
p.start()
test_support.execute_until_empty(self.taskqueue)
p = shuffler.ShufflePipeline.from_id(p.pipeline_id)
output_files = p.outputs.default.value
output_data = []
for output_file in output_files:
with files.open(output_file, "r") as f:
for record in records.RecordsReader(f):
proto = file_service_pb.KeyValues()
proto.ParseFromString(record)
output_data.append((proto.key(), proto.value_list()))
output_data.sort()
expected_data = sorted([(str(k), [str(v), str(v), str(v)]) for (k, v) in input_data])
self.assertEquals(expected_data, output_data)
示例14: testSingleShard
def testSingleShard(self):
entity_count = 1000
for _ in range(entity_count):
TestEntity().put()
mapreduce_id = control.start_map(
"test_map",
__name__ + ".test_handler_yield_key_str",
"mapreduce.input_readers.DatastoreInputReader",
{
"entity_kind": __name__ + "." + TestEntity.__name__,
},
shard_count=4,
base_path="/mapreduce_base_path",
output_writer_spec=BLOBSTORE_WRITER_NAME)
test_support.execute_until_empty(self.taskqueue)
mapreduce_state = model.MapreduceState.get_by_job_id(mapreduce_id)
filenames = output_writers.BlobstoreOutputWriter.get_filenames(
mapreduce_state)
self.assertEqual(1, len(filenames))
blob_name = filenames[0]
self.assertTrue(blob_name.startswith("/blobstore/"))
self.assertFalse(blob_name.startswith("/blobstore/writable:"))
with files.open(blob_name, "r") as f:
data = f.read(10000000)
self.assertEquals(1000, len(data.strip().split("\n")))
示例15: testDedicatedParams
def testDedicatedParams(self):
entity_count = 1000
for _ in range(entity_count):
TestEntity().put()
mapreduce_id = control.start_map(
"test_map",
__name__ + ".test_handler_yield_key_str",
"mapreduce.input_readers.DatastoreInputReader",
{
"input_reader": {
"entity_kind": __name__ + "." + TestEntity.__name__,
},
"output_writer": {
"filesystem": "gs",
"gs_bucket_name": "bucket",
},
},
shard_count=4,
base_path="/mapreduce_base_path",
output_writer_spec=FILE_WRITER_NAME)
test_support.execute_until_empty(self.taskqueue)
mapreduce_state = model.MapreduceState.get_by_job_id(mapreduce_id)
filenames = output_writers.FileOutputWriter.get_filenames(mapreduce_state)
self.assertEqual(1, len(filenames))
self.assertTrue(filenames[0].startswith("/gs/bucket/"))
with files.open(filenames[0], "r") as f:
data = f.read(10000000)
self.assertEquals(1000, len(data.strip().split("\n")))