本文整理汇总了Python中odps.df.DataFrame.sample方法的典型用法代码示例。如果您正苦于以下问题:Python DataFrame.sample方法的具体用法?Python DataFrame.sample怎么用?Python DataFrame.sample使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类odps.df.DataFrame
的用法示例。
在下文中一共展示了DataFrame.sample方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Test
# 需要导入模块: from odps.df import DataFrame [as 别名]
# 或者: from odps.df.DataFrame import sample [as 别名]
#.........这里部分代码省略.........
"""
odps.Schema {
sepal_length float64
sepal_width float64
petal_length float64
petal_width float64
category string
}
"""
)
).strip()
self.assertEqual(rstrip_lines(repr(self.df.dtypes)).strip(), old_dtypes_repr)
new_df = self.df.roles(label="category").key_value("sepal_length")
new_dtypes_repr = rstrip_lines(
textwrap.dedent(
"""
odps.Schema {
sepal_length KV(':', ',') FEATURE
sepal_width float64 FEATURE
petal_length float64 FEATURE
petal_width float64 FEATURE
category string LABEL
}
"""
)
).strip()
self.assertEqual(rstrip_lines(repr(new_df.dtypes)).strip(), new_dtypes_repr)
def test_merge(self):
self.odps.delete_table(TEMP_TABLE_1_NAME, if_exists=True)
self.odps.execute_sql("create table {0} (col11 string, col12 string) lifecycle 1".format(TEMP_TABLE_1_NAME))
self.odps.delete_table(TEMP_TABLE_2_NAME, if_exists=True)
self.odps.execute_sql("create table {0} (col21 string, col22 string) lifecycle 1".format(TEMP_TABLE_2_NAME))
df1 = DataFrame(self.odps.get_table(TEMP_TABLE_1_NAME))
df2 = DataFrame(self.odps.get_table(TEMP_TABLE_2_NAME))
self.assertRaises(ValueError, lambda: merge_data(df1))
merged1 = merge_data(df1, df2)
self.assertEqual(_df_roles(merged1), dict(col21="FEATURE", col11="FEATURE", col12="FEATURE", col22="FEATURE"))
merged2 = merge_data((df1, "col11"), (df2, "col21", True))
self.assertEqual(_df_roles(merged2), dict(col11="FEATURE", col22="FEATURE"))
merged3 = merge_data((df1, "col11"), (df2, "col21", True), auto_rename=True)
self.assertEqual(_df_roles(merged3), dict(t0_col11="FEATURE", t1_col22="FEATURE"))
merged4 = df1.merge_with(df2)
self.assertEqual(_df_roles(merged4), dict(col21="FEATURE", col11="FEATURE", col12="FEATURE", col22="FEATURE"))
def test_sample(self):
num_sampled = self.df.sample(n=20)
adapter = adapter_from_df(num_sampled)
self.assertIsInstance(num_sampled, DataFrame)
self.assertEqual(adapter._bind_node.code_name, "RandomSample")
frac_sampled = self.df.sample(frac=0.5)
adapter = adapter_from_df(frac_sampled)
self.assertIsInstance(frac_sampled, DataFrame)
self.assertEqual(adapter._bind_node.code_name, "RandomSample")
weighted_sampled = self.df.sample(frac=0.5, weights=self.df.sepal_length)
adapter = adapter_from_df(weighted_sampled)
self.assertIsInstance(weighted_sampled, DataFrame)
self.assertEqual(adapter._bind_node.code_name, "WeightedSample")
self.assertEqual(adapter._bind_node.parameters["probCol"], "sepal_length")
stratified_sampled = self.df.sample(frac={"Iris-setosa": 0.5}, strata="category")
adapter = adapter_from_df(stratified_sampled)
self.assertIsInstance(stratified_sampled, DataFrame)
self.assertEqual(adapter._bind_node.code_name, "StratifiedSample")
def test_batch_persist(self):
options.runner.dry_run = False
call_seq = []
dfs = []
tables = []
for idx in range(3):
write_str = "F%d" % idx
def gen_fun(wobj):
return lambda _: call_seq.append(wobj)
f = gen_fun((write_str, "U"))
df_upper = self.mock_action(self.df, action=f)
f = gen_fun((write_str, "D"))
df_lower = self.mock_action(df_upper, action=f)
dfs.append(df_lower)
tables.append("TN" + str(idx))
DataFrame.batch_persist(dfs, tables)
for idx in range(3):
write_str = "F%d" % idx
self.assertListEqual([p[1] for p in call_seq if p[0] == write_str], list("UD"))
for dir in "UD":
self.assertListEqual(sorted(p[0] for p in call_seq if p[1] == dir), ["F0", "F1", "F2"])