Python DataFrame.sample方法代码示例

本文整理汇总了Python中odps.df.DataFrame.sample方法的典型用法代码示例。如果您正苦于以下问题：Python DataFrame.sample方法的具体用法？Python DataFrame.sample怎么用？Python DataFrame.sample使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类odps.df.DataFrame的用法示例。

在下文中一共展示了DataFrame.sample方法的1个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: Test

# 需要导入模块: from odps.df import DataFrame [as 别名]
# 或者: from odps.df.DataFrame import sample [as 别名]

#.........这里部分代码省略.........
                """
        odps.Schema {
          sepal_length            float64
          sepal_width             float64
          petal_length            float64
          petal_width             float64
          category                string
        }
        """
            )
        ).strip()
        self.assertEqual(rstrip_lines(repr(self.df.dtypes)).strip(), old_dtypes_repr)
        new_df = self.df.roles(label="category").key_value("sepal_length")
        new_dtypes_repr = rstrip_lines(
            textwrap.dedent(
                """
        odps.Schema {
          sepal_length            KV(':', ',')   FEATURE
          sepal_width             float64        FEATURE
          petal_length            float64        FEATURE
          petal_width             float64        FEATURE
          category                string         LABEL
        }
        """
            )
        ).strip()
        self.assertEqual(rstrip_lines(repr(new_df.dtypes)).strip(), new_dtypes_repr)

    def test_merge(self):
        self.odps.delete_table(TEMP_TABLE_1_NAME, if_exists=True)
        self.odps.execute_sql("create table {0} (col11 string, col12 string) lifecycle 1".format(TEMP_TABLE_1_NAME))
        self.odps.delete_table(TEMP_TABLE_2_NAME, if_exists=True)
        self.odps.execute_sql("create table {0} (col21 string, col22 string) lifecycle 1".format(TEMP_TABLE_2_NAME))

        df1 = DataFrame(self.odps.get_table(TEMP_TABLE_1_NAME))
        df2 = DataFrame(self.odps.get_table(TEMP_TABLE_2_NAME))

        self.assertRaises(ValueError, lambda: merge_data(df1))

        merged1 = merge_data(df1, df2)
        self.assertEqual(_df_roles(merged1), dict(col21="FEATURE", col11="FEATURE", col12="FEATURE", col22="FEATURE"))

        merged2 = merge_data((df1, "col11"), (df2, "col21", True))
        self.assertEqual(_df_roles(merged2), dict(col11="FEATURE", col22="FEATURE"))

        merged3 = merge_data((df1, "col11"), (df2, "col21", True), auto_rename=True)
        self.assertEqual(_df_roles(merged3), dict(t0_col11="FEATURE", t1_col22="FEATURE"))

        merged4 = df1.merge_with(df2)
        self.assertEqual(_df_roles(merged4), dict(col21="FEATURE", col11="FEATURE", col12="FEATURE", col22="FEATURE"))

    def test_sample(self):
        num_sampled = self.df.sample(n=20)
        adapter = adapter_from_df(num_sampled)
        self.assertIsInstance(num_sampled, DataFrame)
        self.assertEqual(adapter._bind_node.code_name, "RandomSample")

        frac_sampled = self.df.sample(frac=0.5)
        adapter = adapter_from_df(frac_sampled)
        self.assertIsInstance(frac_sampled, DataFrame)
        self.assertEqual(adapter._bind_node.code_name, "RandomSample")

        weighted_sampled = self.df.sample(frac=0.5, weights=self.df.sepal_length)
        adapter = adapter_from_df(weighted_sampled)
        self.assertIsInstance(weighted_sampled, DataFrame)
        self.assertEqual(adapter._bind_node.code_name, "WeightedSample")
        self.assertEqual(adapter._bind_node.parameters["probCol"], "sepal_length")

        stratified_sampled = self.df.sample(frac={"Iris-setosa": 0.5}, strata="category")
        adapter = adapter_from_df(stratified_sampled)
        self.assertIsInstance(stratified_sampled, DataFrame)
        self.assertEqual(adapter._bind_node.code_name, "StratifiedSample")

    def test_batch_persist(self):
        options.runner.dry_run = False
        call_seq = []

        dfs = []
        tables = []
        for idx in range(3):
            write_str = "F%d" % idx

            def gen_fun(wobj):
                return lambda _: call_seq.append(wobj)

            f = gen_fun((write_str, "U"))
            df_upper = self.mock_action(self.df, action=f)
            f = gen_fun((write_str, "D"))
            df_lower = self.mock_action(df_upper, action=f)

            dfs.append(df_lower)
            tables.append("TN" + str(idx))

        DataFrame.batch_persist(dfs, tables)

        for idx in range(3):
            write_str = "F%d" % idx
            self.assertListEqual([p[1] for p in call_seq if p[0] == write_str], list("UD"))
        for dir in "UD":
            self.assertListEqual(sorted(p[0] for p in call_seq if p[1] == dir), ["F0", "F1", "F2"])

开发者ID:aliyun，项目名称:aliyun-odps-python-sdk，代码行数:104，代码来源:test_mixin.py

注：本文中的odps.df.DataFrame.sample方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。