本文整理汇总了Python中odps.df.DataFrame类的典型用法代码示例。如果您正苦于以下问题:Python DataFrame类的具体用法?Python DataFrame怎么用?Python DataFrame使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了DataFrame类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_df_store
def test_df_store(self):
self.delete_table(IONOSPHERE_SORTED_TABLE_PART)
self.create_ionosphere_two_parts(IONOSPHERE_TABLE_TWO_PARTS)
df = DataFrame(self.odps.get_table(IONOSPHERE_TABLE_TWO_PARTS)).filter_partition('part1=1,part2=2')
drop_table(self.odps, IONOSPHERE_SORTED_TABLE_PART, async=False)
sorted_df = df.groupby(df['class']).agg(df.a01.count().rename('count')).sort('class', ascending=False)
sorted_df.persist(IONOSPHERE_SORTED_TABLE_PART)
示例2: persist
def persist(self, line):
try:
import pandas as pd
has_pandas = True
except ImportError:
has_pandas = False
self._set_odps()
line = line.strip().strip(';')
frame_name, table_name = line.split(None, 1)
if '.' in table_name:
project_name, table_name = tuple(table_name.split('.', 1))
else:
project_name = None
frame = self.shell.user_ns[frame_name]
if self._odps.exist_table(table_name, project=project_name):
raise TypeError('%s already exists' % table_name)
if isinstance(frame, DataFrame):
frame.persist(name=table_name, project=project_name, notify=False)
elif has_pandas and isinstance(frame, pd.DataFrame):
frame = DataFrame(frame)
frame.persist(name=table_name, project=project_name, notify=False)
html_notify('Persist succeeded')
示例3: test_batch_persist
def test_batch_persist(self):
options.runner.dry_run = False
call_seq = []
dfs = []
tables = []
for idx in range(3):
write_str = "F%d" % idx
def gen_fun(wobj):
return lambda _: call_seq.append(wobj)
f = gen_fun((write_str, "U"))
df_upper = self.mock_action(self.df, action=f)
f = gen_fun((write_str, "D"))
df_lower = self.mock_action(df_upper, action=f)
dfs.append(df_lower)
tables.append("TN" + str(idx))
DataFrame.batch_persist(dfs, tables)
for idx in range(3):
write_str = "F%d" % idx
self.assertListEqual([p[1] for p in call_seq if p[0] == write_str], list("UD"))
for dir in "UD":
self.assertListEqual(sorted(p[0] for p in call_seq if p[1] == dir), ["F0", "F1", "F2"])
示例4: test_normalize
def test_normalize(self):
self.delete_table(IONOSPHERE_NORMALIZED_TABLE)
self.delete_table(IONOSPHERE_TABLE_ONE_PART)
self.create_ionosphere_one_part(IONOSPHERE_TABLE_ONE_PART)
df = DataFrame(self.odps.get_table(IONOSPHERE_TABLE_ONE_PART)).filter_partition('part=0, part=1')
normalize(df.exclude_fields('class')).persist(IONOSPHERE_NORMALIZED_TABLE)
示例5: testCacheTable
def testCacheTable(self):
df = self.odps_df.join(self.pd_df, 'name').cache()
df2 = df.sort('id_x')
dag = self.engine._compile_dag(df2)
self.assertEqual(len(dag.nodes()), 3)
result = self.engine.execute(df2).values
df3 = DataFrame(self.odps_df.to_pandas())
expected = self.pd_engine.execute(df3.join(self.pd_df, 'name').sort('id_x')).values
self.assertTrue(result.equals(expected))
self.assertEqual(len(self.engine._generated_table_names), 2)
table = df._cache_data
self.assertEqual(len(df.execute()), len(expected))
self.assertIs(df._cache_data, table)
df4 = df[df.id_x < 3].count()
result = self.engine.execute(df4)
self.assertEqual(result, 2)
self.assertEqual(df4._cache_data, 2)
示例6: setup
def setup(self):
import pandas as pd
odps_data = [
['name1', 1],
['name2', 2],
['name1', 3],
]
pd_data = [
['name1', 5],
['name2', 6]
]
names = ['name', 'id']
types = ['string', 'bigint']
table = tn('pyodps_df_mixed')
self.odps.delete_table(table, if_exists=True)
self.t = self.odps.create_table(table, Schema.from_lists(names, types))
with self.t.open_writer() as w:
w.write([self.t.new_record(r) for r in odps_data])
self.odps_df = DataFrame(self.t)
self.pd_df = DataFrame(pd.DataFrame(pd_data, columns=names))
self.engine = MixedEngine(self.odps)
self.pd_engine = PandasEngine(self.odps)
示例7: testJoin
def testJoin(self):
expr = self.odps_df.join(self.pd_df, 'name').sort('id_x')
result = self.engine.execute(expr).values
df = DataFrame(self.odps_df.to_pandas())
expected = self.pd_engine.execute(df.join(self.pd_df, 'name').sort('id_x')).values
self.assertTrue(result.equals(expected))
示例8: testUnion
def testUnion(self):
expr = self.odps_df.union(self.pd_df).sort(['id', 'name'])
result = self.engine.execute(expr).values
df = DataFrame(self.odps_df.to_pandas())
expected = self.pd_engine.execute(df.union(self.pd_df).sort(['id', 'name'])).values
self.assertTrue(result.equals(expected))
示例9: testCachePersist
def testCachePersist(self):
expr = self.odps_df
data2 = [["name1", 3.2], ["name3", 2.4]]
table_name = tn("pyodps_test_mixed_engine_cp_table2")
self.odps.delete_table(table_name, if_exists=True)
table2 = self.odps.create_table(
name=table_name, schema=Schema.from_lists(["name", "fid"], ["string", "double"])
)
expr2 = DataFrame(table2)
self.odps.write_table(table2, 0, data2)
@output(expr.schema.names, expr.schema.types)
def h(row):
yield row
l = expr.filter(expr.id > 0).apply(h, axis=1).cache()
r = expr2.filter(expr2.fid > 0)
joined = l.join(r, on=["name", r.fid < 4])["id", "fid"].cache()
output_table = tn("pyodps_test_mixed_engine_cp_output_table")
self.odps.delete_table(output_table, if_exists=True)
schema = Schema.from_lists(["id", "fid"], ["bigint", "double"], ["ds"], ["string"])
output_t = self.odps.create_table(output_table, schema, if_not_exists=True)
t = joined.persist(output_table, partition="ds=today", create_partition=True)
self.assertEqual(len(t.execute()), 2)
output_t.drop()
示例10: testPandasGroupbyFilter
def testPandasGroupbyFilter(self):
import pandas as pd
data = [
[2001, 1],
[2002, 2],
[2003, 3]
]
df = DataFrame(pd.DataFrame(data, columns=['id', 'fid']))
df2 = df.groupby('id').agg(df.fid.sum())
df3 = df2[df2.id == 2003]
expected = [
[2003, 3]
]
self.assertEqual(df3.execute().values.values.tolist(), expected)
df2 = df.groupby('id').agg(df.fid.sum())
df2.execute()
self.assertIsNotNone(df2._cache_data)
df3 = df2[df2.id == 2003]
self.assertEqual(df3.execute().values.values.tolist(), expected)
self.assertEqual(df3.execute().values.values.tolist(), expected)
df4 = df.fid.sum()
self.assertEqual(df4.execute(), 6)
self.assertEqual(df4.execute(), 6)
示例11: test_direct_method
def test_direct_method(self):
self.create_ionosphere(IONOSPHERE_TABLE)
df = DataFrame(self.odps.get_table(IONOSPHERE_TABLE)).roles(label='class')
train, test = df.split(0.6)
lr = LogisticRegression(epsilon=0.01)
model = lr.train(train)
predicted = model.predict(test)
predicted.to_pandas()
示例12: testHeadAndTail
def testHeadAndTail(self):
df = DataFrame(self.table)
self.assertEqual(1, len(df.head(1)))
self.assertEqual(2, len(df.head(2)))
self.assertEqual([3, 'name3'], list(df.tail(1)[0]))
r = df[df.name == 'name2'].head(1)
self.assertEqual(1, len(r))
self.assertEqual([2, 'name2'], list(r[0]))
示例13: test_kmeans
def test_kmeans(self):
self.delete_table(IONOSPHERE_CLUSTER_LABEL_TABLE)
self.delete_offline_model(IONOSPHERE_CLUSTER_MODEL)
df = DataFrame(self.odps.get_table(IONOSPHERE_TABLE))
labeled, model = KMeans(center_count=3).transform(df.exclude_fields('class'))
model.persist(IONOSPHERE_CLUSTER_MODEL, delay=True)
pmml = model.load_pmml()
print(pmml)
eresult = calinhara_score(labeled, model)
print(eresult)
示例14: test_df_consecutive
def test_df_consecutive(self):
self.create_ionosphere(IONOSPHERE_TABLE)
df = DataFrame(self.odps.get_table(IONOSPHERE_TABLE))
df = df[df['a04'] != 0]
df = df.roles(label='class')
df.head(10)
train, test = df.split(0.6)
lr = LogisticRegression(epsilon=0.01)
model = lr.train(train)
predicted = model.predict(test)
predicted.to_pandas()
示例15: test_mock_kmeans
def test_mock_kmeans(self):
options.runner.dry_run = True
self.maxDiff = None
df = DataFrame(self.odps.get_table(IONOSPHERE_TABLE))
labeled, model = KMeans(center_count=3).transform(df.exclude_fields('class'))
labeled._add_case(self.gen_check_params_case(
{'inputTableName': IONOSPHERE_TABLE, 'centerCount': '3', 'distanceType': 'euclidean',
'idxTableName': IONOSPHERE_CLUSTER_LABEL_TABLE, 'initCentersMethod': 'sample',
'modelName': 'pm_k_means_0_2', 'appendColsIndex': ','.join('%d' % i for i in range(0, 35)),
'selectedColNames': ','.join('a%02d' % i for i in range(1, 35)), 'loop': '100', 'accuracy': '0.0'}))
labeled.persist(IONOSPHERE_CLUSTER_LABEL_TABLE)