本文整理匯總了Python中fastparquet.ParquetFile.cats方法的典型用法代碼示例。如果您正苦於以下問題:Python ParquetFile.cats方法的具體用法?Python ParquetFile.cats怎麽用?Python ParquetFile.cats使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類fastparquet.ParquetFile
的用法示例。
在下文中一共展示了ParquetFile.cats方法的2個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: _read_pf_simple
# 需要導入模塊: from fastparquet import ParquetFile [as 別名]
# 或者: from fastparquet.ParquetFile import cats [as 別名]
def _read_pf_simple(fs, path, base, index_names, all_columns, is_series,
categories, cats, scheme, storage_name_mapping):
"""Read dataset with fastparquet using ParquetFile machinery"""
from fastparquet import ParquetFile
pf = ParquetFile(path, open_with=fs.open)
relpath = path.replace(base, '').lstrip('/')
for rg in pf.row_groups:
for ch in rg.columns:
ch.file_path = relpath
pf.file_scheme = scheme
pf.cats = cats
pf.fn = base
df = pf.to_pandas(all_columns, categories, index=index_names)
if df.index.nlevels == 1:
if index_names:
df.index.name = storage_name_mapping.get(index_names[0],
index_names[0])
else:
if index_names:
df.index.names = [storage_name_mapping.get(name, name)
for name in index_names]
df.columns = [storage_name_mapping.get(col, col)
for col in all_columns
if col not in (index_names or [])]
if is_series:
return df[df.columns[0]]
else:
return df
示例2: _read_fp_multifile
# 需要導入模塊: from fastparquet import ParquetFile [as 別名]
# 或者: from fastparquet.ParquetFile import cats [as 別名]
def _read_fp_multifile(fs, fs_token, paths, columns=None,
categories=None, index=None):
"""Read dataset with fastparquet by assuming metadata from first file"""
from fastparquet import ParquetFile
from fastparquet.util import analyse_paths, get_file_scheme
base, fns = analyse_paths(paths)
scheme = get_file_scheme(fns)
pf = ParquetFile(paths[0], open_with=fs.open)
pf.file_scheme = scheme
pf.cats = _paths_to_cats(fns, scheme)
(meta, _, index_name, out_type, all_columns, index_names,
storage_name_mapping) = _pf_validation(
pf, columns, index, categories, [])
name = 'read-parquet-' + tokenize(fs_token, paths, all_columns,
categories)
dsk = {(name, i): (_read_pf_simple, fs, path, base,
index_names, all_columns, out_type == Series,
categories, pf.cats,
pf.file_scheme, storage_name_mapping)
for i, path in enumerate(paths)}
divisions = (None, ) * (len(paths) + 1)
return out_type(dsk, name, meta, divisions)