本文整理汇总了Python中odps.models.Schema类的典型用法代码示例。如果您正苦于以下问题:Python Schema类的具体用法?Python Schema怎么用?Python Schema使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Schema类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testCachePersist
def testCachePersist(self):
expr = self.odps_df
data2 = [["name1", 3.2], ["name3", 2.4]]
table_name = tn("pyodps_test_mixed_engine_cp_table2")
self.odps.delete_table(table_name, if_exists=True)
table2 = self.odps.create_table(
name=table_name, schema=Schema.from_lists(["name", "fid"], ["string", "double"])
)
expr2 = DataFrame(table2)
self.odps.write_table(table2, 0, data2)
@output(expr.schema.names, expr.schema.types)
def h(row):
yield row
l = expr.filter(expr.id > 0).apply(h, axis=1).cache()
r = expr2.filter(expr2.fid > 0)
joined = l.join(r, on=["name", r.fid < 4])["id", "fid"].cache()
output_table = tn("pyodps_test_mixed_engine_cp_output_table")
self.odps.delete_table(output_table, if_exists=True)
schema = Schema.from_lists(["id", "fid"], ["bigint", "double"], ["ds"], ["string"])
output_t = self.odps.create_table(output_table, schema, if_not_exists=True)
t = joined.persist(output_table, partition="ds=today", create_partition=True)
self.assertEqual(len(t.execute()), 2)
output_t.drop()
示例2: setup
def setup(self):
datatypes = lambda *types: [validate_data_type(t) for t in types]
schema = Schema.from_lists(["name", "id"], datatypes("string", "int64"))
table = MockTable(name="pyodps_test_expr_table", schema=schema)
self.expr = CollectionExpr(_source_data=table, _schema=schema)
schema2 = Schema.from_lists(["name2", "id2"], datatypes("string", "int64"))
table2 = MockTable(name="pyodps_test_expr_table2", schema=schema2)
self.expr2 = CollectionExpr(_source_data=table2, _schema=schema2)
示例3: testChineseSchema
def testChineseSchema(self):
s = Schema.from_lists([u'用户'], ['string'], ['分区'], ['bigint'])
self.assertIn('用户', s)
self.assertEqual(s.get_column('用户').type.name, 'string')
self.assertEqual(s.get_partition(u'分区').type.name, 'bigint')
self.assertEqual(s['用户'].type.name, 'string')
self.assertEqual(s[u'分区'].type.name, 'bigint')
s2 = Schema.from_lists(['用户'], ['string'], [u'分区'], ['bigint'])
self.assertEqual(s, s2)
示例4: setup
def setup(self):
schema = Schema.from_lists(['name', 'id', 'fid'], [types.string, types.int64, types.float64])
table = MockTable(name='pyodps_test_expr_table', schema=schema)
table._client = self.config.odps.rest
self.expr = CollectionExpr(_source_data=table, _schema=schema)
schema2 = Schema.from_lists(['name', 'id', 'fid'], [types.string, types.int64, types.float64],
['part1', 'part2'], [types.string, types.int64])
table2 = MockTable(name='pyodps_test_expr_table2', schema=schema2)
table2._client = self.config.odps.rest
self.expr2 = CollectionExpr(_source_data=table2, _schema=schema2)
示例5: testTableResource
def testTableResource(self):
test_table_name = tn('pyodps_t_tmp_resource_table')
schema = Schema.from_lists(['id', 'name'], ['string', 'string'])
self.odps.delete_table(test_table_name, if_exists=True)
self.odps.create_table(test_table_name, schema)
resource_name = tn('pyodps_t_tmp_table_resource')
try:
self.odps.delete_resource(resource_name)
except errors.NoSuchObject:
pass
res = self.odps.create_resource(resource_name, 'table', table_name=test_table_name)
self.assertIsInstance(res, TableResource)
self.assertEqual(res.get_source_table().name, test_table_name)
self.assertIsNone(res.get_source_table_partition())
self.assertIs(res, self.odps.get_resource(resource_name))
del res.parent[resource_name] # delete from cache
self.assertIsNot(res, self.odps.get_resource(resource_name))
res = self.odps.get_resource(resource_name)
self.assertIsInstance(res, TableResource)
self.assertEqual(res.get_source_table().name, test_table_name)
self.assertIsNone(res.get_source_table_partition())
test_table_name = tn('pyodps_t_tmp_resource_table')
test_table_partition = 'pt=test,sec=1'
schema = Schema.from_lists(['id', 'name'], ['string', 'string'], ['pt', 'sec'], ['string', 'bigint'])
self.odps.delete_table(test_table_name, if_exists=True)
table = self.odps.create_table(test_table_name, schema)
table.create_partition(test_table_partition)
resource_name = tn('pyodps_t_tmp_table_resource')
res = res.update(partition=test_table_partition)
self.assertIsInstance(res, TableResource)
self.assertEqual(res.get_source_table().name, test_table_name)
self.assertEqual(str(res.get_source_table_partition()),
str(types.PartitionSpec(test_table_partition)))
self.assertIs(res, self.odps.get_resource(resource_name))
test_table_partition = 'pt=test,sec=2'
table.create_partition(test_table_partition)
res = res.update(partition=test_table_partition)
self.assertIsInstance(res, TableResource)
self.assertEqual(res.get_source_table().name, test_table_name)
self.assertEqual(str(res.get_source_table_partition()),
str(types.PartitionSpec(test_table_partition)))
self.assertIs(res, self.odps.get_resource(resource_name))
self.odps.delete_resource(resource_name)
self.odps.delete_table(test_table_name)
示例6: testJoinGroupby
def testJoinGroupby(self):
data = [
['name1', 4, 5.3, None, None, None],
['name2', 2, 3.5, None, None, None],
['name1', 4, 4.2, None, None, None],
['name1', 3, 2.2, None, None, None],
['name1', 3, 4.1, None, None, None],
]
schema2 = Schema.from_lists(['name', 'id2', 'id3'],
[types.string, types.int64, types.int64])
self._gen_data(data=data)
data2 = [
['name1', 4, -1],
['name2', 1, -2]
]
import pandas as pd
expr2 = CollectionExpr(_source_data=pd.DataFrame(data2, columns=schema2.names),
_schema=schema2)
expr = self.expr.join(expr2, on='name')[self.expr]
expr = expr.groupby('id').agg(expr.fid.sum())
res = self.engine.execute(expr)
result = self._get_result(res)
expected = pd.DataFrame(data, columns=self.expr.schema.names).groupby('id').agg({'fid': 'sum'})
self.assertEqual(expected.reset_index().values.tolist(), result)
示例7: testBloomFilter
def testBloomFilter(self):
data = [
['name1', 4, 5.3, None, None, None],
['name2', 2, 3.5, None, None, None],
['name1', 4, 4.2, None, None, None],
['name1', 3, 2.2, None, None, None],
['name1', 3, 4.1, None, None, None],
]
data2 = [
['name1'],
['name3']
]
self._gen_data(data=data)
schema2 = Schema.from_lists(['name', ], [types.string])
import pandas as pd
expr2 = CollectionExpr(_source_data=pd.DataFrame(data2, columns=schema2.names),
_schema=schema2)
expr = self.expr.bloom_filter('name', expr2[:1].name, capacity=10)
res = self.engine.execute(expr)
result = self._get_result(res)
self.assertTrue(all(r[0] != 'name2' for r in result))
示例8: setUp
def setUp(self):
TestBase.setUp(self)
self.pr = cProfile.Profile()
self.pr.enable()
fields = ['bigint', 'double', 'datetime', 'boolean', 'string', 'decimal']
types = ['bigint', 'double', 'datetime', 'boolean', 'string', 'decimal']
self.SCHEMA = Schema.from_lists(fields, types)
示例9: testReadMapArraySQLInstance
def testReadMapArraySQLInstance(self):
test_table = tn('pyodps_t_tmp_read_map_array_sql_instance')
self.odps.delete_table(test_table, if_exists=True)
table = self.odps.create_table(
test_table,
schema=Schema.from_lists(
['idx', 'map_col', 'array_col'],
['bigint', odps_types.Map(odps_types.string, odps_types.string), odps_types.Array(odps_types.string)],
)
)
data = [
[0, {'key1': 'value1', 'key2': 'value2'}, ['item1', 'item2', 'item3']],
[1, {'key3': 'value3', 'key4': 'value4'}, ['item4', 'item5']],
]
self.odps.write_table(test_table, data)
with self.odps.execute_sql('select * from %s' % test_table).open_reader(table.schema) as reader:
read_data = [list(r.values) for r in reader]
read_data = sorted(read_data, key=lambda r: r[0])
expected_data = sorted(data, key=lambda r: r[0])
self.assertSequenceEqual(read_data, expected_data)
table.drop()
示例10: setup
def setup(self):
import pandas as pd
odps_data = [
['name1', 1],
['name2', 2],
['name1', 3],
]
pd_data = [
['name1', 5],
['name2', 6]
]
names = ['name', 'id']
types = ['string', 'bigint']
table = tn('pyodps_df_mixed')
self.odps.delete_table(table, if_exists=True)
self.t = self.odps.create_table(table, Schema.from_lists(names, types))
with self.t.open_writer() as w:
w.write([self.t.new_record(r) for r in odps_data])
self.odps_df = DataFrame(self.t)
self.pd_df = DataFrame(pd.DataFrame(pd_data, columns=names))
self.engine = MixedEngine(self.odps)
self.pd_engine = PandasEngine(self.odps)
示例11: testReadWriteTable
def testReadWriteTable(self):
test_table_name = tn('pyodps_t_tmp_read_write_table')
schema = Schema.from_lists(['id', 'name', 'right'], ['bigint', 'string', 'boolean'])
self.odps.delete_table(test_table_name, if_exists=True)
self.assertFalse(self.odps.exist_table(test_table_name))
table = self.odps.create_table(test_table_name, schema)
data = [[111, 'aaa', True],
[222, 'bbb', False],
[333, 'ccc', True],
[444, '中文', False]]
length = len(data)
records = [Record(schema=schema, values=values) for values in data]
texted_data = [[it[0], to_str(it[1]), it[2]] for it in data]
self.odps.write_table(table, 0, records)
self.assertSequenceEqual(texted_data, [record.values for record in self.odps.read_table(table, length)])
self.assertSequenceEqual(texted_data[::2],
[record.values for record in self.odps.read_table(table, length, step=2)])
self.assertSequenceEqual(texted_data, [record.values for record in table.head(length)])
self.odps.delete_table(test_table_name)
self.assertFalse(self.odps.exist_table(test_table_name))
示例12: testCreateDeleteTable
def testCreateDeleteTable(self):
test_table_name = tn("pyodps_t_tmp_create_table")
schema = Schema.from_lists(["id", "name"], ["bigint", "string"], ["ds"], ["string"])
tables = self.odps._project.tables
tables.delete(test_table_name, if_exists=True)
self.assertFalse(self.odps.exist_table(test_table_name))
table = tables.create(test_table_name, schema, lifecycle=10)
self.assertIsNone(table._getattr("owner"))
self.assertIsNotNone(table.owner)
self.assertEqual(table.name, test_table_name)
self.assertEqual(table.schema, schema)
self.assertEqual(table.lifecycle, 10)
tables.delete(test_table_name, if_exists=True)
self.assertFalse(self.odps.exist_table(test_table_name))
table = self.odps.create_table(test_table_name, schema, shard_num=10, hub_lifecycle=5)
self.assertEqual(table.name, test_table_name)
self.assertEqual(table.schema, schema)
self.assertNotEqual(table.lifecycle, 10)
self.assertEqual(table.shard.shard_num, 10)
self.odps.delete_table(test_table_name, if_exists=True)
self.assertFalse(self.odps.exist_table(test_table_name))
示例13: setup
def setup(self):
datatypes = lambda *types: [validate_data_type(t) for t in types]
schema = Schema.from_lists(["name", "id", "fid"], datatypes("string", "int64", "float64"))
table = MockTable(name="pyodps_test_expr_table", schema=schema)
self.expr = CollectionExpr(_source_data=table, _schema=schema)
self.ctx = ExecuteContext()
示例14: testNullableRecord
def testNullableRecord(self):
s = Schema.from_lists(
['col%s'%i for i in range(8)],
['bigint', 'double', 'string', 'datetime', 'boolean', 'decimal',
'array<string>', 'map<string,bigint>'])
r = Record(schema=s, values=[None]*8)
self.assertSequenceEqual(r.values, [None]*8)
示例15: testCreateDeleteTable
def testCreateDeleteTable(self):
test_table_name = tn('pyodps_t_tmp_create_table')
schema = Schema.from_lists(['id', 'name'], ['bigint', 'string'], ['ds', ], ['string',])
tables = self.odps._project.tables
tables.delete(test_table_name, if_exists=True)
self.assertFalse(self.odps.exist_table(test_table_name))
table = tables.create(test_table_name, schema, lifecycle=10)
self.assertEqual(table.name, test_table_name)
self.assertEqual(table.schema, schema)
self.assertEqual(table.lifecycle, 10)
tables.delete(test_table_name, if_exists=True)
self.assertFalse(self.odps.exist_table(test_table_name))
table = self.odps.create_table(test_table_name, schema, shard_num=10, hub_lifecycle=5)
self.assertEqual(table.name, test_table_name)
self.assertEqual(table.schema, schema)
self.assertNotEqual(table.lifecycle, 10)
self.assertEqual(table.shard.shard_num, 10)
self.odps.delete_table(test_table_name, if_exists=True)
self.assertFalse(self.odps.exist_table(test_table_name))