本文整理汇总了Python中odps.df.DataFrame.join方法的典型用法代码示例。如果您正苦于以下问题:Python DataFrame.join方法的具体用法?Python DataFrame.join怎么用?Python DataFrame.join使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类odps.df.DataFrame
的用法示例。
在下文中一共展示了DataFrame.join方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testJoin
# 需要导入模块: from odps.df import DataFrame [as 别名]
# 或者: from odps.df.DataFrame import join [as 别名]
def testJoin(self):
expr = self.odps_df.join(self.pd_df, 'name').sort('id_x')
result = self.engine.execute(expr).values
df = DataFrame(self.odps_df.to_pandas())
expected = self.pd_engine.execute(df.join(self.pd_df, 'name').sort('id_x')).values
self.assertTrue(result.equals(expected))
示例2: testCacheTable
# 需要导入模块: from odps.df import DataFrame [as 别名]
# 或者: from odps.df.DataFrame import join [as 别名]
def testCacheTable(self):
df = self.odps_df.join(self.pd_df, 'name').cache()
df2 = df.sort('id_x')
dag = self.engine._compile_dag(df2)
self.assertEqual(len(dag.nodes()), 3)
result = self.engine.execute(df2).values
df3 = DataFrame(self.odps_df.to_pandas())
expected = self.pd_engine.execute(df3.join(self.pd_df, 'name').sort('id_x')).values
self.assertTrue(result.equals(expected))
self.assertEqual(len(self.engine._generated_table_names), 2)
table = df._cache_data
self.assertEqual(len(df.execute()), len(expected))
self.assertIs(df._cache_data, table)
df4 = df[df.id_x < 3].count()
result = self.engine.execute(df4)
self.assertEqual(result, 2)
self.assertEqual(df4._cache_data, 2)
示例3: testMixed
# 需要导入模块: from odps.df import DataFrame [as 别名]
# 或者: from odps.df.DataFrame import join [as 别名]
def testMixed(self):
expr = self.odps_df.union(
self.odps_df.join(self.pd_df, "name")[lambda x: x.name, lambda x: x.id_x.rename("id")]
).sort(["name", "id"])
expr = expr[expr["name"].isin(self.pd_df["name"])]
result = self.engine.execute(expr).values
df = DataFrame(self.odps_df.to_pandas())
test_expr = df.union(df.join(self.pd_df, "name")[lambda x: x.name, lambda x: x.id_x.rename("id")]).sort(
["name", "id"]
)
test_expr = test_expr[test_expr["name"].isin(self.pd_df["name"])]
expected = self.pd_engine.execute(test_expr).values
self.assertTrue(result.equals(expected))
示例4: Test
# 需要导入模块: from odps.df import DataFrame [as 别名]
# 或者: from odps.df.DataFrame import join [as 别名]
class Test(TestBase):
def setup(self):
datatypes = lambda *types: [validate_data_type(t) for t in types]
schema = Schema.from_lists(['name', 'id', 'fid', 'isMale', 'scale', 'birth'],
datatypes('string', 'bigint', 'double', 'boolean', 'decimal', 'datetime'))
table = MockTable(name='pyodps_test_expr_table', schema=schema)
self.tb = DataFrame(table)
import pandas as pd
df = pd.DataFrame([['name1', 2, 3.14], ['name2', 100, 2.7]], columns=['name', 'id', 'fid'])
self.pd = DataFrame(df)
self.expr = self.tb.join(self.pd, on='name')
self.engine = MixedEngine(self.odps)
def testMixedCompile(self):
dag, expr, callbacks = self.engine._compile(self.expr)
self.assertEqual(len(dag._graph), 2)
topos = dag.topological_sort()
root_node = root, _ = topos[0]
expr_node = expr, _ = topos[1]
self.assertTrue(root.is_ancestor(expr))
self.assertIn(id(expr_node), dag._graph[id(root_node)])
self.assertEqual(len(available_engines(expr.data_source())), 1)
def testCacheCompile(self):
expr = self.tb['name', 'id'].cache()
expr = expr.groupby('name').agg(expr.id.mean()).cache()
expr = expr.distinct()
dag, expr, callbacks = self.engine._compile(expr)
self.assertEqual(len(dag._graph), 3)
topos = dag.topological_sort()
project_node = projected, _ = topos[0]
groupby_node = grouped, _ = topos[1]
distinct_node = distincted, _ = topos[2]
self.assertIn(id(groupby_node), dag._graph[id(project_node)])
self.assertIn(id(distinct_node), dag._graph[id(groupby_node)])
self.assertIsInstance(distincted, DistinctCollectionExpr)
示例5: Test
# 需要导入模块: from odps.df import DataFrame [as 别名]
# 或者: from odps.df.DataFrame import join [as 别名]
class Test(TestBase):
def setup(self):
import pandas as pd
odps_data = [
['name1', 1],
['name2', 2],
['name1', 3],
]
pd_data = [
['name1', 5],
['name2', 6]
]
names = ['name', 'id']
types = ['string', 'bigint']
table = tn('pyodps_df_mixed')
self.odps.delete_table(table, if_exists=True)
self.t = self.odps.create_table(table, Schema.from_lists(names, types))
with self.t.open_writer() as w:
w.write([self.t.new_record(r) for r in odps_data])
self.odps_df = DataFrame(self.t)
self.pd_df = DataFrame(pd.DataFrame(pd_data, columns=names))
self.engine = MixedEngine(self.odps)
self.pd_engine = PandasEngine(self.odps)
def teardown(self):
self.t.drop()
def assertPandasEqual(self, df1, df2):
from odps.compat import six
from odps import types as o_types
from pandas.util.testing import assert_frame_equal
# compare column types
def get_odps_type(p_type):
for data_type, builtin_type in six.iteritems(o_types._odps_primitive_to_builtin_types):
if issubclass(p_type.type, builtin_type):
return data_type
types1 = [get_odps_type(dt) for dt in df1.dtypes]
types2 = [get_odps_type(dt) for dt in df2.dtypes]
self.assertSequenceEqual(types1, types2)
assert_frame_equal(df1, df2, check_dtype=False)
def testJoin(self):
expr = self.odps_df.join(self.pd_df, 'name').sort('id_x')
result = self.engine.execute(expr).values
df = DataFrame(self.odps_df.to_pandas())
expected = self.pd_engine.execute(df.join(self.pd_df, 'name').sort('id_x')).values
self.assertTrue(result.equals(expected))
def testUnion(self):
expr = self.odps_df.union(self.pd_df).sort(['id', 'name'])
result = self.engine.execute(expr).values
df = DataFrame(self.odps_df.to_pandas())
expected = self.pd_engine.execute(df.union(self.pd_df).sort(['id', 'name'])).values
self.assertTrue(result.equals(expected))
def testIsIn(self):
expr = self.odps_df['name'].isin(self.pd_df['name']).rename('isin')
result = self.engine.execute(expr).values
df = DataFrame(self.odps_df.to_pandas())
expected = self.pd_engine.execute(df['name'].isin(self.pd_df['name']).rename('isin')).values
self.assertTrue(result.equals(expected))
def testMixed(self):
expr = self.odps_df.union(
self.odps_df.join(self.pd_df, 'name')[
lambda x: x.name,
lambda x: x.id_x.rename('id')
]).sort(['name', 'id'])
expr = expr[expr['name'].isin(self.pd_df['name'])]
result = self.engine.execute(expr).values
df = DataFrame(self.odps_df.to_pandas())
test_expr = df.union(
df.join(self.pd_df, 'name')[
lambda x: x.name,
lambda x: x.id_x.rename('id')
]).sort(['name', 'id'])
test_expr = test_expr[test_expr['name'].isin(self.pd_df['name'])]
expected = self.pd_engine.execute(test_expr).values
self.assertTrue(result.equals(expected))
def testPandasPersist(self):
import pandas as pd, numpy as np
self.odps.to_global()
tmp_table_name = tn('pyodps_test_mixed_persist')
self.odps.delete_table(tmp_table_name, if_exists=True)
#.........这里部分代码省略.........
示例6: Test
# 需要导入模块: from odps.df import DataFrame [as 别名]
# 或者: from odps.df.DataFrame import join [as 别名]
class Test(TestBase):
def setup(self):
import pandas as pd
odps_data = [["name1", 1], ["name2", 2], ["name1", 3]]
pd_data = [["name1", 5], ["name2", 6]]
names = ["name", "id"]
types = ["string", "bigint"]
table = tn("pyodps_df_mixed")
self.odps.delete_table(table, if_exists=True)
self.t = self.odps.create_table(table, Schema.from_lists(names, types))
with self.t.open_writer() as w:
w.write([self.t.new_record(r) for r in odps_data])
self.odps_df = DataFrame(self.t)
self.pd_df = DataFrame(pd.DataFrame(pd_data, columns=names))
self.engine = MixedEngine(self.odps)
self.pd_engine = PandasEngine(self.odps)
def teardown(self):
self.t.drop()
def testGroupReduction(self):
expr = self.odps_df.select(self.odps_df, id2=self.odps_df.id.map(lambda x: x + 1))
expr = expr.groupby("name").id2.sum()
expected = [["name1", 6], ["name2", 3]]
res = self.engine.execute(expr)
result = self._get_result(res)
self.assertEqual(sorted([[r[1]] for r in expected]), sorted(result))
def assertPandasEqual(self, df1, df2):
from odps.compat import six
from odps import types as o_types
from pandas.util.testing import assert_frame_equal
# compare column types
def get_odps_type(p_type):
for data_type, builtin_type in six.iteritems(o_types._odps_primitive_to_builtin_types):
if issubclass(p_type.type, builtin_type):
return data_type
types1 = [get_odps_type(dt) for dt in df1.dtypes]
types2 = [get_odps_type(dt) for dt in df2.dtypes]
self.assertSequenceEqual(types1, types2)
assert_frame_equal(df1, df2, check_dtype=False)
def testJoin(self):
expr = self.odps_df.join(self.pd_df, "name").sort("id_x")
result = self.engine.execute(expr).values
df = DataFrame(self.odps_df.to_pandas())
expected = self.pd_engine.execute(df.join(self.pd_df, "name").sort("id_x")).values
self.assertTrue(result.equals(expected))
def testUnion(self):
expr = self.odps_df.union(self.pd_df).sort(["id", "name"])
result = self.engine.execute(expr).values
df = DataFrame(self.odps_df.to_pandas())
expected = self.pd_engine.execute(df.union(self.pd_df).sort(["id", "name"])).values
self.assertTrue(result.equals(expected))
def testIsIn(self):
expr = self.odps_df["name"].isin(self.pd_df["name"]).rename("isin")
result = self.engine.execute(expr).values
df = DataFrame(self.odps_df.to_pandas())
expected = self.pd_engine.execute(df["name"].isin(self.pd_df["name"]).rename("isin")).values
self.assertTrue(result.equals(expected))
def testMixed(self):
expr = self.odps_df.union(
self.odps_df.join(self.pd_df, "name")[lambda x: x.name, lambda x: x.id_x.rename("id")]
).sort(["name", "id"])
expr = expr[expr["name"].isin(self.pd_df["name"])]
result = self.engine.execute(expr).values
df = DataFrame(self.odps_df.to_pandas())
test_expr = df.union(df.join(self.pd_df, "name")[lambda x: x.name, lambda x: x.id_x.rename("id")]).sort(
["name", "id"]
)
test_expr = test_expr[test_expr["name"].isin(self.pd_df["name"])]
expected = self.pd_engine.execute(test_expr).values
self.assertTrue(result.equals(expected))
def testPandasPersist(self):
import pandas as pd, numpy as np
self.odps.to_global()
tmp_table_name = tn("pyodps_test_mixed_persist")
self.odps.delete_table(tmp_table_name, if_exists=True)
pd_df = pd.DataFrame(np.arange(9).reshape(3, 3), columns=list("abc"))
#.........这里部分代码省略.........