本文整理汇总了Python中mrjob.job.MRJob.set_status方法的典型用法代码示例。如果您正苦于以下问题:Python MRJob.set_status方法的具体用法?Python MRJob.set_status怎么用?Python MRJob.set_status使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类mrjob.job.MRJob
的用法示例。
在下文中一共展示了MRJob.set_status方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: reducer
# 需要导入模块: from mrjob.job import MRJob [as 别名]
# 或者: from mrjob.job.MRJob import set_status [as 别名]
def reducer(self, n, vars):
MRJob.set_status(self, "=============> reducer called")
samples_from_mappers = []
counts_from_mappers = []
# First read all the counts from different mappers fo we know the total number of items and we can give
# each of the sets coming from different mappers their appropriate weight
total_counts_from_mappers = 0
for x in vars:
input = json.loads(x)
total_counts_from_mappers += input[0]
counts_from_mappers.append(input[0])
samples_from_mappers.append(input[1])
# Now based on the number of samples in each mapper we need to select appropriate number of samples form
# samples_from_mappers
i = 0
for sample_set in samples_from_mappers:
weight = counts_from_mappers[i] * 1.0 / total_counts_from_mappers
number_of_needed_samples = int(round(weight * self.options.sample_size))
for j in range(number_of_needed_samples):
yield 1, sample_set.pop()
i += 1
示例2: test_counters_and_status
# 需要导入模块: from mrjob.job import MRJob [as 别名]
# 或者: from mrjob.job.MRJob import set_status [as 别名]
def test_counters_and_status(self):
mr_job = MRJob().sandbox()
mr_job.increment_counter('Foo', 'Bar')
mr_job.set_status('Initializing qux gradients...')
mr_job.increment_counter('Foo', 'Bar')
mr_job.increment_counter('Foo', 'Baz', 20)
mr_job.set_status('Sorting metasyntactic variables...')
parsed_stderr = parse_mr_job_stderr(mr_job.stderr.getvalue())
self.assertEqual(
parsed_stderr, {
'counters': {
'Foo': {
'Bar': 2,
'Baz': 20
}
},
'statuses': [
'Initializing qux gradients...',
'Sorting metasyntactic variables...'
],
'other': []
})
# make sure parse_counters() works
self.assertEqual(mr_job.parse_counters(), parsed_stderr['counters'])
示例3: reducer
# 需要导入模块: from mrjob.job import MRJob [as 别名]
# 或者: from mrjob.job.MRJob import set_status [as 别名]
def reducer(self, n, vars):
MRJob.set_status(self, "=============> reducer called")
print "reducer:", vars
samples_from_mappers = []
counts_from_mappers = []
# First read all the counts from different mappers fo we know the total number of items and we can give
# each of the sets coming from different mappers their appropriate weight
total_counts_from_mappers = 0
for x in vars:
input = json.loads(x)
total_counts_from_mappers += input[0]
counts_from_mappers.append(input[0])
samples_from_mappers.append(input[1])
# Now based on the number of samples in each mapper we need to select appropriate number of samples form
# samples_from_mappers
i = 0
fileOut=open(os.path.join(PROJECT_ROOT , 'output.txt'),"w")
for sample_set in samples_from_mappers:
weight = counts_from_mappers[i] * 1.0 / total_counts_from_mappers
number_of_needed_samples = int(round(weight * self.options.sample_size))
for j in range(number_of_needed_samples):
fileOut.write(str(sample_set.pop()) + '\n')
i += 1
fileOut.close()
if False: yield 1,2
示例4: mapper_final
# 需要导入模块: from mrjob.job import MRJob [as 别名]
# 或者: from mrjob.job.MRJob import set_status [as 别名]
def mapper_final(self):
MRJob.set_status(self, "=============> mapper final called")
out = [self.count, self.samples]
jOut = json.dumps(out)
yield 1, jOut
示例5: reducer_final
# 需要导入模块: from mrjob.job import MRJob [as 别名]
# 或者: from mrjob.job.MRJob import set_status [as 别名]
def reducer_final(self):
MRJob.set_status(self, "=============> reducer final called")
for label in self.output:
stratum_samples = self.output[label]
yield label, (len(stratum_samples), stratum_samples)
示例6: mapper_final
# 需要导入模块: from mrjob.job import MRJob [as 别名]
# 或者: from mrjob.job.MRJob import set_status [as 别名]
def mapper_final(self):
MRJob.set_status(self, "=============> mapper final called")
for label in self.strata:
stratum = self.strata[label]
number_of_samples = int( len(stratum) * self.options.sampling_rate )
if not stratum: # stratum should not be empty
pass
else:
示例7: mapper
# 需要导入模块: from mrjob.job import MRJob [as 别名]
# 或者: from mrjob.job.MRJob import set_status [as 别名]
def mapper(self, key, line):
MRJob.set_status(self, "=============> mapper called")
sample_line = line
self.count += 1
if len(self.samples) <= self.options.sample_size:
self.samples.append(sample_line)
else:
expected_prob = (self.options.sample_size * 1.0) / self.count
actual_prob = random.random()
if actual_prob <= expected_prob:
index = random.randint(0, self.options.sample_size)
self.samples[index] = sample_line
示例8: test_counters_and_status
# 需要导入模块: from mrjob.job import MRJob [as 别名]
# 或者: from mrjob.job.MRJob import set_status [as 别名]
def test_counters_and_status(self):
mr_job = MRJob().sandbox()
mr_job.increment_counter("Foo", "Bar")
mr_job.set_status("Initializing qux gradients...")
mr_job.increment_counter("Foo", "Bar")
mr_job.increment_counter("Foo", "Baz", 20)
mr_job.set_status("Sorting metasyntactic variables...")
parsed_stderr = parse_mr_job_stderr(mr_job.stderr.getvalue())
self.assertEqual(
parsed_stderr,
{
"counters": {"Foo": {"Bar": 2, "Baz": 20}},
"statuses": ["Initializing qux gradients...", "Sorting metasyntactic variables..."],
"other": [],
},
)
示例9: test_unicode_set_status
# 需要导入模块: from mrjob.job import MRJob [as 别名]
# 或者: from mrjob.job.MRJob import set_status [as 别名]
def test_unicode_set_status(self):
mr_job = MRJob().sandbox()
# shouldn't raise an exception
mr_job.set_status(u'💩')
示例10: mapper_init
# 需要导入模块: from mrjob.job import MRJob [as 别名]
# 或者: from mrjob.job.MRJob import set_status [as 别名]
def mapper_init(self):
MRJob.set_status(self, "=============> mapper init called")