本文整理汇总了Python中fastparquet.ParquetFile.fn方法的典型用法代码示例。如果您正苦于以下问题:Python ParquetFile.fn方法的具体用法?Python ParquetFile.fn怎么用?Python ParquetFile.fn使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类fastparquet.ParquetFile
的用法示例。
在下文中一共展示了ParquetFile.fn方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _read_pf_simple
# 需要导入模块: from fastparquet import ParquetFile [as 别名]
# 或者: from fastparquet.ParquetFile import fn [as 别名]
def _read_pf_simple(fs, path, base, index_names, all_columns, is_series,
categories, cats, scheme, storage_name_mapping):
"""Read dataset with fastparquet using ParquetFile machinery"""
from fastparquet import ParquetFile
pf = ParquetFile(path, open_with=fs.open)
relpath = path.replace(base, '').lstrip('/')
for rg in pf.row_groups:
for ch in rg.columns:
ch.file_path = relpath
pf.file_scheme = scheme
pf.cats = cats
pf.fn = base
df = pf.to_pandas(all_columns, categories, index=index_names)
if df.index.nlevels == 1:
if index_names:
df.index.name = storage_name_mapping.get(index_names[0],
index_names[0])
else:
if index_names:
df.index.names = [storage_name_mapping.get(name, name)
for name in index_names]
df.columns = [storage_name_mapping.get(col, col)
for col in all_columns
if col not in (index_names or [])]
if is_series:
return df[df.columns[0]]
else:
return df
示例2: _read_parquet_file
# 需要导入模块: from fastparquet import ParquetFile [as 别名]
# 或者: from fastparquet.ParquetFile import fn [as 别名]
def _read_parquet_file(fs, base, fn, index, columns, series, categories,
cs, dt, scheme, storage_name_mapping, *args):
"""Read a single file with fastparquet, to be used in a task"""
from fastparquet.api import ParquetFile
from collections import OrderedDict
name_storage_mapping = {v: k for k, v in storage_name_mapping.items()}
if not isinstance(columns, (tuple, list)):
columns = [columns,]
series = True
if index:
index, = index
if index not in columns:
columns = columns + [index]
columns = [name_storage_mapping.get(col, col) for col in columns]
index = name_storage_mapping.get(index, index)
cs = OrderedDict([(k, v) for k, v in cs.items() if k in columns])
pf = ParquetFile(fn, open_with=fs.open)
pf.file_scheme = scheme
for rg in pf.row_groups:
for ch in rg.columns:
ch.file_path = fn.replace(base, "").lstrip('/')
pf.fn = base
df = pf.to_pandas(columns=columns, index=index, categories=categories)
if df.index.nlevels == 1:
if index:
df.index.name = storage_name_mapping.get(index, index)
else:
if index:
df.index.names = [storage_name_mapping.get(name, name)
for name in index]
df.columns = [storage_name_mapping.get(col, col)
for col in columns
if col != index]
if series:
return df[df.columns[0]]
else:
return df