本文整理汇总了Python中dazzle.core.table.Table类的典型用法代码示例。如果您正苦于以下问题:Python Table类的具体用法?Python Table怎么用?Python Table使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Table类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test4
def test4():
from dazzle.core.dataset import DataSet
from dazzle.core.table import Table
test_dir = os.path.join("/temp", "dazzle-test")
ds = DataSet(test_dir, force_create=True)
t = Table("t", ds, [("a", np.array([np.nan, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, np.nan]))])
ca = t.get_column("a")
print(ca.str_values(format="%.4f"))
示例2: test2
def test2():
from dazzle.core.dataset import DataSet
from dazzle.core.table import Table
test_dir = os.path.join("/temp", "dazzle-test")
ds = DataSet(test_dir, force_create=True)
t = Table("t", ds, [("a", np.int)])
ca = t.get_column("a")
#t.append({'a': np.random.randint(10000000, size=5*(10**8)).astype(np.int32)})
print(ca.__dict__)
示例3: test_rebuild01
def test_rebuild01(self):
cat = Table.from_csv("Category", self.ds, os.path.join(AVITO_DATA_DIR, "Category.tsv"), delimiter='\t',
usecols=['CategoryID', 'ParentCategoryID', 'Level'], verbose=False)
cat.rebuild({"CategoryID": np.int8, "Level": np.int8, "ParentCategoryID": np.int8})
self.assertEqual(len(cat[:]), 69)
self.assertEqual(cat['CategoryID'].dtype, np.int8)
self.assertEqual(cat[0]['CategoryID'], -128) # int8.min
self.assertEqual(cat[0]['Level'], -128) # int8.min
self.assertEqual(cat[0]['ParentCategoryID'], -128) # int8.min
示例4: open
def open(data_dir):
"""Open and return an existing DataSet.
Side effect: open each Table in this dataset.
"""
json_file = os.path.join(data_dir, "dataset.json")
if not os.path.exists(json_file):
raise DazzleError("No 'dataset.json' file found in %s" % data_dir)
ds = DataSet(data_dir, mode='open')
with open(json_file, 'rb') as f:
data = json.loads(f.read().decode('ascii'))
params = data["compression_params"]
ds._compression_params = bcolz.cparams(clevel=params["_clevel"], shuffle=params["_shuffle"],
cname=params["_cname"])
for table in data["tables"]:
table = Table(table["name"], ds, [], mode='open')
table._ctable = bcolz.open(table.data_dir)
table._build_columns_from_ctable()
table._dataset._add_table(table)
ds.save()
return ds
示例5: setUp
def setUp(self):
self.a = [6, 4, 7, 4, 6, 9]
self.test_dir = os.path.join("/temp", "dazzle-test")
self.ds = DataSet(self.test_dir, force_create=True)
self.t = Table("t", self.ds, [("a", np.array([], np.int)), ("b", np.array([], np.float))], force_create=True)
self.u = Table("u", self.ds, [("a", np.array([1, 2], np.int)), ("b", np.array([1.1, 2.2], np.float))], force_create=True)
示例6: test_copy02
def test_copy02(self):
Table.copy("t", self.ds, "/bim/bam")
示例7: load_dataset
def load_dataset():
"""'Raw'-dataset is the result of loading the CSV sources data into dazzle tables, only filtering out data
that we don't want to further process.
The method is programmed in a non-destructive way so as to be able to launch it several times
before getting the job done.
"""
import os
from dazzle.core.dataset import DataSet
if DataSet.exists(raw_dir):
ds = DataSet.open(raw_dir)
else:
ds = DataSet(raw_dir, force_create=True)
# Notes:
# - many of the following attributes should be unsigned int instead of signed int, but numexpr works only on
# signed data.
# - Simlarly to pandas, we use the types required to contain the existing data, not the types we desire to use
if ds.get_table("Category") is None:
t = Table.from_csv("Category", ds, os.path.join(csv_dir, "Category.tsv"), delimiter='\t', chunksize=10**7,
usecols=['CategoryID', 'ParentCategoryID', 'Level'],
dtype={'CategoryID': 'i4', 'ParentCategoryID': 'i1', 'Level': 'i1'})
t = None
# Notice the filter attribute that does not exist in pandas.read_csv(). It makes it possible to skip some rows
# based on a numexpr expression. IsClick == IsClick is true iff IsClick is not na
if ds.get_table("TrainSearchStream") is None:
t = Table.from_csv("TrainSearchStream", ds, os.path.join(csv_dir, "trainSearchStream.tsv"), delimiter='\t', chunksize=10**7,
usecols=['SearchID', 'AdID', 'Position', 'ObjectType', 'HistCTR', 'IsClick'],
dtype={'SearchID':'i4', 'AdID':'i4', 'Position':'i1', 'ObjectType':'i1', 'HistCTR':'f4', 'IsClick':'f1'},
filter='(ObjectType == 3) & (IsClick == IsClick)')
t = None
# We avoid to load the string fields. We will see this problem later with Don
if ds.get_table("AdsInfo") is None:
t = Table.from_csv("AdsInfo", ds, os.path.join(csv_dir, "AdsInfo.tsv"), delimiter='\t', chunksize=10**7,
usecols=['AdID', 'LocationID', 'CategoryID', 'Price', 'IsContext'],
dtype={'AdID':'i4', 'LocationID':'f4', 'CategoryID':'f4', 'Price': 'f4', 'IsContext': 'f1'})
t = None
# We avoid to load the string fields. We will see this problem later with Don
if ds.get_table("SearchInfo") is None:
t = Table.from_csv("SearchInfo", ds, os.path.join(csv_dir, "SearchInfo.tsv"), delimiter='\t', chunksize=10**7,
usecols=['SearchID', 'IPID', 'UserID', 'IsUserLoggedOn', 'LocationID', 'CategoryID'],
dtype={'SearchID':'i4', 'IPID':'i4', 'UserID':'f4', 'IsUserLoggedOn':'f1',
'LocationID':'f4', 'CategoryID':'f4'})
t = None
if ds.get_table("userInfo") is None:
t = Table.from_csv("userInfo", ds, os.path.join(csv_dir, "userInfo.tsv"), delimiter='\t', chunksize=10**7,
usecols=['UserID', 'UserAgentID', 'UserAgentOSID','UserDeviceID', 'UserAgentFamilyID'],
dtype={'UserID':'i4', 'UserAgentID':'i4', 'UserAgentOSID':'i4',
'UserDeviceID':'i4', 'UserAgentFamilyID':'i4'})
t = None
if ds.get_table("Location") is None:
t = Table.from_csv("Location", ds, os.path.join(csv_dir, "Location.tsv"), delimiter='\t', chunksize=10**7,
usecols=['LocationID', 'CityID', 'RegionID'],
dtype={'LocationID': 'i4', 'CityID':'f4', 'RegionID': 'f4'})
t = None
if ds.get_table("PhoneRequestsStream") is None:
t = Table.from_csv("PhoneRequestsStream", ds, os.path.join(csv_dir, "PhoneRequestsStream.tsv"), delimiter='\t', chunksize=10**7,
usecols=['UserID', 'IPID', 'AdID', 'PhoneRequestDate'],
dtype={'UserID':'i4', 'IPID':'i4', 'AdID':'i4', 'PhoneRequestDate': 'object'})
t = None
if ds.get_table("VisitsStream") is None:
t = Table.from_csv("VisitsStream", ds, os.path.join(csv_dir, "VisitsStream.tsv"), delimiter='\t', chunksize=10**7,
usecols=['UserID', 'IPID', 'AdID', 'ViewDate'],
dtype={'UserID':'i4', 'IPID':'i4', 'AdID':'i4', 'ViewDate': 'object'})
t = None
return ds
示例8: TestColumn
class TestColumn(unittest.TestCase):
def setUp(self):
self.a = [6, 4, 7, 4, 6, 9]
self.test_dir = os.path.join("/temp", "dazzle-test")
ds = DataSet(self.test_dir, force_create=True)
self.t = Table("t", ds, [("a", np.array([1, 3], dtype=np.int8)), ("x", np.array([2, 4], dtype=np.float))], force_create=True)
self.ca = self.t.get_column("a")
@raises(DazzleError)
def test_data_dir01(self):
"""no table associated"""
print(LiteralColumn("a", np.array([], np.int)).data_dir)
def test_data_dir02(self):
self.assertEqual(self.ca.data_dir, os.path.join(self.test_dir, "t", "a"))
@raises(DazzleError)
def test_carray01(self):
"""no table associated"""
print(LiteralColumn("a", np.array([], np.int)).carray)
def test_carray02(self):
assert_array_equal(self.ca.carray[:], [1, 3])
@raises(DazzleError)
def test_init01(self):
LiteralColumn("", [])
@raises(DazzleError)
def test_init02(self):
LiteralColumn("1a", [])
@raises(DazzleError)
def test_init03(self):
LiteralColumn("_a", [])
@raises(DazzleError)
def test_init04(self):
LiteralColumn("a", "XX")
@raises(DazzleError)
def test_init05(self):
LiteralColumn("a", self)
def test_init06(self):
assert_array_equal(self.ca.carray[:], [1, 3])
def test_len01(self):
self.assertEqual(len(self.ca), 2)
def test_position01(self):
self.t.append({'a': self.a, 'x': self.a})
self.assertEqual(self.ca.position, 0)
def test_position02(self):
self.t.append({'a': self.a, 'x': self.a})
self.assertEqual(self.t.get_column("x").position, 1)
def test_getitem01(self):
self.assertEqual(self.ca[0], 1)
def test_getitem02(self):
self.assertEqual(self.ca[1], 3)
@raises(IndexError)
def test_getitem03(self):
self.t.append({'a': self.a, 'x': self.a})
_ = self.ca[10]
def test_getitem04(self):
self.t.append({'a': self.a, 'x': self.a})
assert_array_equal(self.ca[:], self.ca.carray[:])
def test_getitem05(self):
self.t.append({'a': self.a, 'x': self.a})
assert_array_equal(self.ca[0:5], [1, 3, 6, 4, 7])
def test_setitem01(self):
self.ca[0] = 2
self.assertEqual(self.ca[0], 2)
assert_array_equal(self.ca.carray[:], [2, 3])
def test_append01(self):
self.t.append({'a': self.a, 'x': self.a})
self.ca.append(self.a)
assert_array_equal(self.ca.carray[:], [1, 3, 6, 4, 7, 4, 6, 9, 6, 4, 7, 4, 6, 9])
@raises(DazzleError)
def test_rename01(self):
self.t.append({'a': self.a, 'x': self.a})
self.ca.rename("")
@raises(DazzleError)
def test_rename02(self):
self.t.append({'a': self.a, 'x': self.a})
self.ca.rename("x")
def test_rename03(self):
self.ca.rename("b")
#.........这里部分代码省略.........
示例9: setUp
def setUp(self):
self.a = [6, 4, 7, 4, 6, 9]
self.test_dir = os.path.join("/temp", "dazzle-test")
ds = DataSet(self.test_dir, force_create=True)
self.t = Table("t", ds, [("a", np.array([1, 3], dtype=np.int8)), ("x", np.array([2, 4], dtype=np.float))], force_create=True)
self.ca = self.t.get_column("a")
示例10: test_from_csv04
def test_from_csv04(self):
cat = Table.from_csv("Category", self.ds, os.path.join(AVITO_DATA_DIR, "Category.tsv"), delimiter='\t',
usecols=['CategoryID', 'ParentCategoryID'], verbose=False)
self.assertEqual(len(cat.ctable), 68)
self.assertEqual(len(cat.columns), 2)
示例11: test_sum07
def test_sum07(self):
ds = DataSet(self.test_dir, force_create=True)
ca = Table("t", ds, [("a", np.array([np.nan, np.nan], np.float))], force_create=True).get_column("a")
assert_close(ca.sum(skipna=True), 0.0)
示例12: test_from_csv02
def test_from_csv02(self):
Table.from_csv("Category", self.ds, "/temp/dazzle-test/dataset.json", usecols=['CategoryID', 'ParentCategoryID'], verbose=False)
示例13: test_from_csv03
def test_from_csv03(self):
cat = Table.from_csv("Category", self.ds, os.path.join(AVITO_DATA_DIR, "Category.tsv"), verbose=False)
示例14: test_from_csv01
def test_from_csv01(self):
Table.from_csv("Category", self.ds, "/bim/bam/test.csv", usecols=['CategoryID', 'ParentCategoryID'], verbose=False)
示例15: test_copy04
def test_copy04(self):
test_dir = os.path.join("/temp/dazzle-test2")
ds2 = DataSet(test_dir, force_create=True)
t = Table.copy("t", ds2, "/temp/dazzle-test/t")
assert_equal_table(t, self.ds.get_table("t"))