本文整理汇总了Python中odps.df.DataFrame.map_reduce方法的典型用法代码示例。如果您正苦于以下问题:Python DataFrame.map_reduce方法的具体用法?Python DataFrame.map_reduce怎么用?Python DataFrame.map_reduce使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类odps.df.DataFrame
的用法示例。
在下文中一共展示了DataFrame.map_reduce方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Test
# 需要导入模块: from odps.df import DataFrame [as 别名]
# 或者: from odps.df.DataFrame import map_reduce [as 别名]
#.........这里部分代码省略.........
def testHeadAndTail(self):
res = self.odps_df.head(2)
self.assertEqual(len(res), 2)
df = self.odps_df[self.odps_df['name'] == 'name1']
res = df.head(1)
self.assertEqual(len(res), 1)
self.assertIsNotNone(df._cache_data)
res = self.odps_df.tail(2)
self.assertEqual(len(res), 2)
self.assertTrue(all(it > 1 for it in res.values['id']))
self.assertEqual(len(self.odps_df.name.head(2)), 2)
self.assertEqual(len(self.odps_df.name.tail(2)), 2)
res = self.pd_df.head(1)
self.assertEqual(len(res), 1)
df = self.pd_df[self.pd_df['name'] == 'name1']
res = df.head(1)
self.assertEqual(len(res), 1)
self.assertIsNotNone(df._cache_data)
res = self.pd_df.tail(1)
self.assertEqual(len(res), 1)
self.assertEqual(res.values['id'][0], 6)
self.assertEqual(len(self.pd_df.name.head(1)), 1)
self.assertEqual(len(self.pd_df.name.tail(1)), 1)
def testMapReduceWithResource(self):
pd_df2 = self.odps_df.to_pandas(wrap=True)
@output(['name', 'id'], ['string', 'int'])
def reducer(resources):
d = dict()
for r in resources[0]:
if r.name in d:
d[r.name] += r.id
else:
d[r.name] = r.id
def inner(keys):
def h(row, done):
if row.name in d:
d[row.name] += row.id
else:
d[row.name] = row.id
if done:
yield row.name, d[row.name]
return h
return inner
expr = pd_df2.map_reduce(reducer=reducer, reducer_resources=[self.pd_df], group='name')
result = expr.execute()
self.assertEqual(result.values['id'].sum(), 17)
odps_df2 = self.pd_df.persist('pyodps_df_mixed2', odps=self.odps)
try:
expr = self.odps_df.map_reduce(reducer=reducer, reducer_resources=[odps_df2], group='name')
result = expr.execute()
self.assertEqual(result.values['id'].sum(), 17)
expr = self.odps_df.map_reduce(reducer=reducer, reducer_resources=[self.pd_df], group='name')
result = expr.execute()
self.assertEqual(result.values['id'].sum(), 17)
expr = pd_df2.map_reduce(reducer=reducer, reducer_resources=[odps_df2], group='name')
result = expr.execute()
self.assertEqual(result.values['id'].sum(), 17)
finally:
next(odps_df2.data_source()).drop()
def testBloomFilter(self):
import numpy as np
data2 = [
['name1'],
['name3']
]
table_name = tn('pyodps_test_mixed_engine_bf_table2')
self.odps.delete_table(table_name, if_exists=True)
table2 = self.odps.create_table(name=table_name,
schema=Schema.from_lists(['name'], ['string']))
expr2 = DataFrame(table2)
self.odps.write_table(table2, 0, [table2.new_record(values=d) for d in data2])
try:
expr = self.odps_df.bloom_filter('name', expr2[:1].name, capacity=10)
res = self.engine.execute(expr)
self.assertTrue(np.all(res['name'] != 'name2'))
finally:
table2.drop()
示例2: Test
# 需要导入模块: from odps.df import DataFrame [as 别名]
# 或者: from odps.df.DataFrame import map_reduce [as 别名]
#.........这里部分代码省略.........
table2 = self.odps.create_table(table_name, table.schema)
try:
res = DataFrame(table2).head(10)
self.assertEqual(len(res), 0)
finally:
table2.drop()
def testMapReduceWithResource(self):
pd_df2 = self.odps_df.to_pandas(wrap=True)
@output(["name", "id"], ["string", "int"])
def reducer(resources):
d = dict()
for r in resources[0]:
if r.name in d:
d[r.name] += r.id
else:
d[r.name] = r.id
def inner(keys):
def h(row, done):
if row.name in d:
d[row.name] += row.id
else:
d[row.name] = row.id
if done:
yield row.name, d[row.name]
return h
return inner
expr = pd_df2.map_reduce(reducer=reducer, reducer_resources=[self.pd_df], group="name")
result = expr.execute()
self.assertEqual(result.values["id"].sum(), 17)
odps_df2 = self.pd_df.persist(tn("pyodps_df_mixed2"), odps=self.odps)
try:
expr = self.odps_df.map_reduce(reducer=reducer, reducer_resources=[odps_df2], group="name")
result = expr.execute()
self.assertEqual(result.values["id"].sum(), 17)
expr = self.odps_df.map_reduce(reducer=reducer, reducer_resources=[self.pd_df], group="name")
result = expr.execute()
self.assertEqual(result.values["id"].sum(), 17)
expr = pd_df2.map_reduce(reducer=reducer, reducer_resources=[odps_df2], group="name")
result = expr.execute()
self.assertEqual(result.values["id"].sum(), 17)
finally:
next(odps_df2.data_source()).drop()
def testBloomFilter(self):
import numpy as np
data2 = [["name1"], ["name3"]]
table_name = tn("pyodps_test_mixed_engine_bf_table2")
self.odps.delete_table(table_name, if_exists=True)
table2 = self.odps.create_table(name=table_name, schema=Schema.from_lists(["name"], ["string"]))
expr2 = DataFrame(table2)
self.odps.write_table(table2, 0, data2)
try: