当前位置: 首页>>代码示例>>Python>>正文


Python df.DataFrame类代码示例

本文整理汇总了Python中odps.df.DataFrame的典型用法代码示例。如果您正苦于以下问题:Python DataFrame类的具体用法?Python DataFrame怎么用?Python DataFrame使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了DataFrame类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_df_store

 def test_df_store(self):
     self.delete_table(IONOSPHERE_SORTED_TABLE_PART)
     self.create_ionosphere_two_parts(IONOSPHERE_TABLE_TWO_PARTS)
     df = DataFrame(self.odps.get_table(IONOSPHERE_TABLE_TWO_PARTS)).filter_partition('part1=1,part2=2')
     drop_table(self.odps, IONOSPHERE_SORTED_TABLE_PART, async=False)
     sorted_df = df.groupby(df['class']).agg(df.a01.count().rename('count')).sort('class', ascending=False)
     sorted_df.persist(IONOSPHERE_SORTED_TABLE_PART)
开发者ID:aliyun,项目名称:aliyun-odps-python-sdk,代码行数:7,代码来源:test_dataframe.py

示例2: persist

    def persist(self, line):
        try:
            import pandas as pd
            has_pandas = True
        except ImportError:
            has_pandas = False

        self._set_odps()

        line = line.strip().strip(';')

        frame_name, table_name = line.split(None, 1)

        if '.' in table_name:
            project_name, table_name = tuple(table_name.split('.', 1))
        else:
            project_name = None

        frame = self.shell.user_ns[frame_name]
        if self._odps.exist_table(table_name, project=project_name):
            raise TypeError('%s already exists' % table_name)

        if isinstance(frame, DataFrame):
            frame.persist(name=table_name, project=project_name, notify=False)
        elif has_pandas and isinstance(frame, pd.DataFrame):
            frame = DataFrame(frame)
            frame.persist(name=table_name, project=project_name, notify=False)
        html_notify('Persist succeeded')
开发者ID:hitflame,项目名称:aliyun-odps-python-sdk,代码行数:28,代码来源:magics.py

示例3: test_batch_persist

    def test_batch_persist(self):
        options.runner.dry_run = False
        call_seq = []

        dfs = []
        tables = []
        for idx in range(3):
            write_str = "F%d" % idx

            def gen_fun(wobj):
                return lambda _: call_seq.append(wobj)

            f = gen_fun((write_str, "U"))
            df_upper = self.mock_action(self.df, action=f)
            f = gen_fun((write_str, "D"))
            df_lower = self.mock_action(df_upper, action=f)

            dfs.append(df_lower)
            tables.append("TN" + str(idx))

        DataFrame.batch_persist(dfs, tables)

        for idx in range(3):
            write_str = "F%d" % idx
            self.assertListEqual([p[1] for p in call_seq if p[0] == write_str], list("UD"))
        for dir in "UD":
            self.assertListEqual(sorted(p[0] for p in call_seq if p[1] == dir), ["F0", "F1", "F2"])
开发者ID:aliyun,项目名称:aliyun-odps-python-sdk,代码行数:27,代码来源:test_mixin.py

示例4: test_normalize

    def test_normalize(self):
        self.delete_table(IONOSPHERE_NORMALIZED_TABLE)
        self.delete_table(IONOSPHERE_TABLE_ONE_PART)
        self.create_ionosphere_one_part(IONOSPHERE_TABLE_ONE_PART)
        df = DataFrame(self.odps.get_table(IONOSPHERE_TABLE_ONE_PART)).filter_partition('part=0, part=1')

        normalize(df.exclude_fields('class')).persist(IONOSPHERE_NORMALIZED_TABLE)
开发者ID:aliyun,项目名称:aliyun-odps-python-sdk,代码行数:7,代码来源:test_partitions.py

示例5: testCacheTable

    def testCacheTable(self):
        df = self.odps_df.join(self.pd_df, 'name').cache()
        df2 = df.sort('id_x')

        dag = self.engine._compile_dag(df2)
        self.assertEqual(len(dag.nodes()), 3)

        result = self.engine.execute(df2).values

        df3 = DataFrame(self.odps_df.to_pandas())
        expected = self.pd_engine.execute(df3.join(self.pd_df, 'name').sort('id_x')).values
        self.assertTrue(result.equals(expected))

        self.assertEqual(len(self.engine._generated_table_names), 2)

        table = df._cache_data
        self.assertEqual(len(df.execute()), len(expected))

        self.assertIs(df._cache_data, table)

        df4 = df[df.id_x < 3].count()
        result = self.engine.execute(df4)
        self.assertEqual(result, 2)

        self.assertEqual(df4._cache_data, 2)
开发者ID:hitflame,项目名称:aliyun-odps-python-sdk,代码行数:25,代码来源:test_mixed_engine.py

示例6: setup

    def setup(self):
        import pandas as pd

        odps_data = [
            ['name1', 1],
            ['name2', 2],
            ['name1', 3],
        ]

        pd_data = [
            ['name1', 5],
            ['name2', 6]
        ]

        names = ['name', 'id']
        types = ['string', 'bigint']

        table = tn('pyodps_df_mixed')
        self.odps.delete_table(table, if_exists=True)
        self.t = self.odps.create_table(table, Schema.from_lists(names, types))
        with self.t.open_writer() as w:
            w.write([self.t.new_record(r) for r in odps_data])

        self.odps_df = DataFrame(self.t)
        self.pd_df = DataFrame(pd.DataFrame(pd_data, columns=names))

        self.engine = MixedEngine(self.odps)
        self.pd_engine = PandasEngine(self.odps)
开发者ID:hitflame,项目名称:aliyun-odps-python-sdk,代码行数:28,代码来源:test_mixed_engine.py

示例7: testJoin

    def testJoin(self):
        expr = self.odps_df.join(self.pd_df, 'name').sort('id_x')
        result = self.engine.execute(expr).values

        df = DataFrame(self.odps_df.to_pandas())
        expected = self.pd_engine.execute(df.join(self.pd_df, 'name').sort('id_x')).values
        self.assertTrue(result.equals(expected))
开发者ID:hitflame,项目名称:aliyun-odps-python-sdk,代码行数:7,代码来源:test_mixed_engine.py

示例8: testUnion

    def testUnion(self):
        expr = self.odps_df.union(self.pd_df).sort(['id', 'name'])
        result = self.engine.execute(expr).values

        df = DataFrame(self.odps_df.to_pandas())
        expected = self.pd_engine.execute(df.union(self.pd_df).sort(['id', 'name'])).values
        self.assertTrue(result.equals(expected))
开发者ID:hitflame,项目名称:aliyun-odps-python-sdk,代码行数:7,代码来源:test_mixed_engine.py

示例9: testCachePersist

    def testCachePersist(self):
        expr = self.odps_df

        data2 = [["name1", 3.2], ["name3", 2.4]]

        table_name = tn("pyodps_test_mixed_engine_cp_table2")
        self.odps.delete_table(table_name, if_exists=True)
        table2 = self.odps.create_table(
            name=table_name, schema=Schema.from_lists(["name", "fid"], ["string", "double"])
        )
        expr2 = DataFrame(table2)
        self.odps.write_table(table2, 0, data2)

        @output(expr.schema.names, expr.schema.types)
        def h(row):
            yield row

        l = expr.filter(expr.id > 0).apply(h, axis=1).cache()
        r = expr2.filter(expr2.fid > 0)
        joined = l.join(r, on=["name", r.fid < 4])["id", "fid"].cache()

        output_table = tn("pyodps_test_mixed_engine_cp_output_table")
        self.odps.delete_table(output_table, if_exists=True)
        schema = Schema.from_lists(["id", "fid"], ["bigint", "double"], ["ds"], ["string"])
        output_t = self.odps.create_table(output_table, schema, if_not_exists=True)

        t = joined.persist(output_table, partition="ds=today", create_partition=True)
        self.assertEqual(len(t.execute()), 2)

        output_t.drop()
开发者ID:aliyun,项目名称:aliyun-odps-python-sdk,代码行数:30,代码来源:test_mixed_engine.py

示例10: testPandasGroupbyFilter

    def testPandasGroupbyFilter(self):
        import pandas as pd

        data = [
            [2001, 1],
            [2002, 2],
            [2003, 3]
        ]
        df = DataFrame(pd.DataFrame(data, columns=['id', 'fid']))

        df2 = df.groupby('id').agg(df.fid.sum())
        df3 = df2[df2.id == 2003]

        expected = [
            [2003, 3]
        ]

        self.assertEqual(df3.execute().values.values.tolist(), expected)

        df2 = df.groupby('id').agg(df.fid.sum())
        df2.execute()
        self.assertIsNotNone(df2._cache_data)
        df3 = df2[df2.id == 2003]

        self.assertEqual(df3.execute().values.values.tolist(), expected)
        self.assertEqual(df3.execute().values.values.tolist(), expected)

        df4 = df.fid.sum()
        self.assertEqual(df4.execute(), 6)
        self.assertEqual(df4.execute(), 6)
开发者ID:aliyun,项目名称:aliyun-odps-python-sdk,代码行数:30,代码来源:test_dataframe.py

示例11: test_direct_method

 def test_direct_method(self):
     self.create_ionosphere(IONOSPHERE_TABLE)
     df = DataFrame(self.odps.get_table(IONOSPHERE_TABLE)).roles(label='class')
     train, test = df.split(0.6)
     lr = LogisticRegression(epsilon=0.01)
     model = lr.train(train)
     predicted = model.predict(test)
     predicted.to_pandas()
开发者ID:aliyun,项目名称:aliyun-odps-python-sdk,代码行数:8,代码来源:test_dataframe.py

示例12: testHeadAndTail

    def testHeadAndTail(self):
        df = DataFrame(self.table)

        self.assertEqual(1, len(df.head(1)))
        self.assertEqual(2, len(df.head(2)))
        self.assertEqual([3, 'name3'], list(df.tail(1)[0]))

        r = df[df.name == 'name2'].head(1)
        self.assertEqual(1, len(r))
        self.assertEqual([2, 'name2'], list(r[0]))
开发者ID:aliyun,项目名称:aliyun-odps-python-sdk,代码行数:10,代码来源:test_dataframe.py

示例13: test_kmeans

 def test_kmeans(self):
     self.delete_table(IONOSPHERE_CLUSTER_LABEL_TABLE)
     self.delete_offline_model(IONOSPHERE_CLUSTER_MODEL)
     df = DataFrame(self.odps.get_table(IONOSPHERE_TABLE))
     labeled, model = KMeans(center_count=3).transform(df.exclude_fields('class'))
     model.persist(IONOSPHERE_CLUSTER_MODEL, delay=True)
     pmml = model.load_pmml()
     print(pmml)
     eresult = calinhara_score(labeled, model)
     print(eresult)
开发者ID:aliyun,项目名称:aliyun-odps-python-sdk,代码行数:10,代码来源:test_clustering.py

示例14: test_df_consecutive

 def test_df_consecutive(self):
     self.create_ionosphere(IONOSPHERE_TABLE)
     df = DataFrame(self.odps.get_table(IONOSPHERE_TABLE))
     df = df[df['a04'] != 0]
     df = df.roles(label='class')
     df.head(10)
     train, test = df.split(0.6)
     lr = LogisticRegression(epsilon=0.01)
     model = lr.train(train)
     predicted = model.predict(test)
     predicted.to_pandas()
开发者ID:aliyun,项目名称:aliyun-odps-python-sdk,代码行数:11,代码来源:test_dataframe.py

示例15: test_mock_kmeans

    def test_mock_kmeans(self):
        options.runner.dry_run = True
        self.maxDiff = None

        df = DataFrame(self.odps.get_table(IONOSPHERE_TABLE))
        labeled, model = KMeans(center_count=3).transform(df.exclude_fields('class'))
        labeled._add_case(self.gen_check_params_case(
            {'inputTableName': IONOSPHERE_TABLE, 'centerCount': '3', 'distanceType': 'euclidean',
             'idxTableName': IONOSPHERE_CLUSTER_LABEL_TABLE, 'initCentersMethod': 'sample',
             'modelName': 'pm_k_means_0_2', 'appendColsIndex': ','.join('%d' % i for i in range(0, 35)),
             'selectedColNames': ','.join('a%02d' % i for i in range(1, 35)), 'loop': '100', 'accuracy': '0.0'}))
        labeled.persist(IONOSPHERE_CLUSTER_LABEL_TABLE)
开发者ID:aliyun,项目名称:aliyun-odps-python-sdk,代码行数:12,代码来源:test_clustering.py


注:本文中的odps.df.DataFrame类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。