本文整理汇总了Python中castra.Castra类的典型用法代码示例。如果您正苦于以下问题:Python Castra类的具体用法?Python Castra怎么用?Python Castra使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Castra类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_get_empty_result
def test_get_empty_result(base):
c = Castra(path=base, template=A)
c.extend(A)
df = c[100:200]
assert (df.columns == A.columns).all()
示例2: execute
def execute(file_name):
categories = ['distinguished', 'removal_reason']
f = load(file_name)
batches = partition_all(200000, f)
df, frames = peek(map(to_df, batches))
castra = Castra('./subreddit_dumps/'+file_name+'.castra',
template = df, categories = categories)
castra.extend_sequence(frames, freq = '3h')
示例3: test_pickle_Castra
def test_pickle_Castra():
path = tempfile.mkdtemp(prefix='castra-')
c = Castra(path=path, template=A)
c.extend(A)
c.extend(B)
dumped = pickle.dumps(c)
undumped = pickle.loads(dumped)
tm.assert_frame_equal(pd.concat([A, B]), undumped[:])
示例4: from_castra
def from_castra(x, columns=None):
"""Load a dask DataFrame from a Castra.
Parameters
----------
x : filename or Castra
columns: list or string, optional
The columns to load. Default is all columns.
"""
from castra import Castra
if not isinstance(x, Castra):
x = Castra(x, readonly=True)
return x.to_dask(columns)
示例5: test_reload
def test_reload():
path = tempfile.mkdtemp(prefix='castra-')
try:
c = Castra(template=A, path=path)
c.extend(A)
d = Castra(path=path)
assert c.columns == d.columns
assert (c.partitions == d.partitions).all()
assert c.minimum == d.minimum
finally:
shutil.rmtree(path)
示例6: test_Castra
def test_Castra():
c = Castra(template=A)
c.extend(A)
c.extend(B)
assert c.columns == ['x', 'y']
tm.assert_frame_equal(c[0:100], pd.concat([A, B]))
tm.assert_frame_equal(c[:5], A)
tm.assert_frame_equal(c[5:], B)
tm.assert_frame_equal(c[2:5], A[1:])
tm.assert_frame_equal(c[2:15], pd.concat([A[1:], B[:1]]))
示例7: from_castra
def from_castra(x, columns=None):
"""
Load a dask DataFrame from a Castra.
The Castra project has been deprecated. We recommend using Parquet
instead.
Parameters
----------
x : filename or Castra
columns: list or string, optional
The columns to load. Default is all columns.
"""
from castra import Castra
if not isinstance(x, Castra):
x = Castra(x, readonly=True)
return x.to_dask(columns)
示例8: test_del_with_random_dir
def test_del_with_random_dir(self):
c = Castra(template=A)
assert os.path.exists(c.path)
c.__del__()
assert not os.path.exists(c.path)
示例9: load
def load(file_name):
c = Castra(path = './subreddit_dumps/'+file_name+'.castra/')
df = c.to_dask()
return df
示例10: test_get_slice
def test_get_slice(base):
c = Castra(path=base, template=A)
c.extend(A)
tm.assert_frame_equal(c[:], c[:, :])
tm.assert_frame_equal(c[:, 1:], c[:][['y']])
示例11: test_readonly
def test_readonly():
path = tempfile.mkdtemp(prefix='castra-')
try:
c = Castra(path=path, template=A)
c.extend(A)
d = Castra(path=path, readonly=True)
with pytest.raises(IOError):
d.extend(B)
with pytest.raises(IOError):
d.extend_sequence([B])
with pytest.raises(IOError):
d.flush()
with pytest.raises(IOError):
d.drop()
with pytest.raises(IOError):
d.save_partitions()
with pytest.raises(IOError):
d.flush_meta()
assert c.columns == d.columns
assert (c.partitions == d.partitions).all()
assert c.minimum == d.minimum
finally:
shutil.rmtree(path)