當前位置: 首頁>>代碼示例>>Python>>正文


Python ParquetFile.file_scheme方法代碼示例

本文整理匯總了Python中fastparquet.ParquetFile.file_scheme方法的典型用法代碼示例。如果您正苦於以下問題:Python ParquetFile.file_scheme方法的具體用法?Python ParquetFile.file_scheme怎麽用?Python ParquetFile.file_scheme使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在fastparquet.ParquetFile的用法示例。


在下文中一共展示了ParquetFile.file_scheme方法的3個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: _read_pf_simple

# 需要導入模塊: from fastparquet import ParquetFile [as 別名]
# 或者: from fastparquet.ParquetFile import file_scheme [as 別名]
def _read_pf_simple(fs, path, base, index_names, all_columns, is_series,
                    categories, cats, scheme, storage_name_mapping):
    """Read dataset with fastparquet using ParquetFile machinery"""
    from fastparquet import ParquetFile
    pf = ParquetFile(path, open_with=fs.open)
    relpath = path.replace(base, '').lstrip('/')
    for rg in pf.row_groups:
        for ch in rg.columns:
            ch.file_path = relpath
    pf.file_scheme = scheme
    pf.cats = cats
    pf.fn = base
    df = pf.to_pandas(all_columns, categories, index=index_names)
    if df.index.nlevels == 1:
        if index_names:
            df.index.name = storage_name_mapping.get(index_names[0],
                                                     index_names[0])
    else:
        if index_names:
            df.index.names = [storage_name_mapping.get(name, name)
                              for name in index_names]
    df.columns = [storage_name_mapping.get(col, col)
                  for col in all_columns
                  if col not in (index_names or [])]

    if is_series:
        return df[df.columns[0]]
    else:
        return df
開發者ID:yliapis,項目名稱:dask,代碼行數:31,代碼來源:parquet.py

示例2: _read_parquet_file

# 需要導入模塊: from fastparquet import ParquetFile [as 別名]
# 或者: from fastparquet.ParquetFile import file_scheme [as 別名]
def _read_parquet_file(fs, base, fn, index, columns, series, categories,
                       cs, dt, scheme, storage_name_mapping, *args):
    """Read a single file with fastparquet, to be used in a task"""
    from fastparquet.api import ParquetFile
    from collections import OrderedDict

    name_storage_mapping = {v: k for k, v in storage_name_mapping.items()}
    if not isinstance(columns, (tuple, list)):
        columns = [columns,]
        series = True
    if index:
        index, = index
        if index not in columns:
            columns = columns + [index]
    columns = [name_storage_mapping.get(col, col) for col in columns]
    index = name_storage_mapping.get(index, index)
    cs = OrderedDict([(k, v) for k, v in cs.items() if k in columns])
    pf = ParquetFile(fn, open_with=fs.open)
    pf.file_scheme = scheme
    for rg in pf.row_groups:
        for ch in rg.columns:
            ch.file_path = fn.replace(base, "").lstrip('/')
    pf.fn = base
    df = pf.to_pandas(columns=columns, index=index, categories=categories)

    if df.index.nlevels == 1:
        if index:
            df.index.name = storage_name_mapping.get(index, index)
    else:
        if index:
            df.index.names = [storage_name_mapping.get(name, name)
                              for name in index]
    df.columns = [storage_name_mapping.get(col, col)
                  for col in columns
                  if col != index]

    if series:
        return df[df.columns[0]]
    else:
        return df
開發者ID:yliapis,項目名稱:dask,代碼行數:42,代碼來源:parquet.py

示例3: _read_fp_multifile

# 需要導入模塊: from fastparquet import ParquetFile [as 別名]
# 或者: from fastparquet.ParquetFile import file_scheme [as 別名]
def _read_fp_multifile(fs, fs_token, paths, columns=None,
                       categories=None, index=None):
    """Read dataset with fastparquet by assuming metadata from first file"""
    from fastparquet import ParquetFile
    from fastparquet.util import analyse_paths, get_file_scheme
    base, fns = analyse_paths(paths)
    scheme = get_file_scheme(fns)
    pf = ParquetFile(paths[0], open_with=fs.open)
    pf.file_scheme = scheme
    pf.cats = _paths_to_cats(fns, scheme)
    (meta, _, index_name, out_type, all_columns, index_names,
     storage_name_mapping) = _pf_validation(
        pf, columns, index, categories, [])
    name = 'read-parquet-' + tokenize(fs_token, paths, all_columns,
                                      categories)
    dsk = {(name, i): (_read_pf_simple, fs, path, base,
                       index_names, all_columns, out_type == Series,
                       categories, pf.cats,
                       pf.file_scheme, storage_name_mapping)
           for i, path in enumerate(paths)}
    divisions = (None, ) * (len(paths) + 1)
    return out_type(dsk, name, meta, divisions)
開發者ID:mrocklin,項目名稱:dask,代碼行數:24,代碼來源:parquet.py


注:本文中的fastparquet.ParquetFile.file_scheme方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。