Python pyarrow.parquet方法代码示例

本文整理汇总了Python中pyarrow.parquet方法的典型用法代码示例。如果您正苦于以下问题：Python pyarrow.parquet方法的具体用法？Python pyarrow.parquet怎么用？Python pyarrow.parquet使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyarrow的用法示例。

在下文中一共展示了pyarrow.parquet方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: validate_dataframe

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import parquet [as 别名]
def validate_dataframe(df):

        if not isinstance(df, DataFrame):
            raise ValueError("to_parquet only supports IO with DataFrames")

        # must have value column names (strings only)
        if df.columns.inferred_type not in {'string', 'unicode'}:
            raise ValueError("parquet must have string column names")

        # index level names must be strings
        valid_names = all(
            isinstance(name, string_types)
            for name in df.index.names
            if name is not None
        )
        if not valid_names:
            raise ValueError("Index level names must be strings")

开发者ID:Frank-qlu，项目名称:recruit，代码行数:19，代码来源:parquet.py

示例2: init

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import parquet [as 别名]
def __init__(self):
        # since pandas is a dependency of pyarrow
        # we need to import on first use
        try:
            import pyarrow
            import pyarrow.parquet
        except ImportError:
            raise ImportError(
                "pyarrow is required for parquet support\n\n"
                "you can install via conda\n"
                "conda install pyarrow -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U pyarrow\n"
            )
        if LooseVersion(pyarrow.__version__) < '0.9.0':
            raise ImportError(
                "pyarrow >= 0.9.0 is required for parquet support\n\n"
                "you can install via conda\n"
                "conda install pyarrow -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U pyarrow\n"
            )

        self.api = pyarrow

开发者ID:Frank-qlu，项目名称:recruit，代码行数:26，代码来源:parquet.py

示例3: write

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import parquet [as 别名]
def write(self, df, path, compression='snappy',
              coerce_timestamps='ms', index=None, partition_cols=None,
              **kwargs):
        self.validate_dataframe(df)
        path, _, _, _ = get_filepath_or_buffer(path, mode='wb')

        if index is None:
            from_pandas_kwargs = {}
        else:
            from_pandas_kwargs = {'preserve_index': index}
        table = self.api.Table.from_pandas(df, **from_pandas_kwargs)
        if partition_cols is not None:
            self.api.parquet.write_to_dataset(
                table, path, compression=compression,
                coerce_timestamps=coerce_timestamps,
                partition_cols=partition_cols, **kwargs)
        else:
            self.api.parquet.write_table(
                table, path, compression=compression,
                coerce_timestamps=coerce_timestamps, **kwargs)

开发者ID:Frank-qlu，项目名称:recruit，代码行数:22，代码来源:parquet.py

示例4: write

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import parquet [as 别名]
def write(self, df, path, compression='snappy',
              coerce_timestamps='ms', **kwargs):
        self.validate_dataframe(df)
        if self._pyarrow_lt_070:
            self._validate_write_lt_070(df)
        path, _, _, _ = get_filepath_or_buffer(path, mode='wb')

        if self._pyarrow_lt_060:
            table = self.api.Table.from_pandas(df, timestamps_to_ms=True)
            self.api.parquet.write_table(
                table, path, compression=compression, **kwargs)

        else:
            table = self.api.Table.from_pandas(df)
            self.api.parquet.write_table(
                table, path, compression=compression,
                coerce_timestamps=coerce_timestamps, **kwargs)

开发者ID:birforce，项目名称:vnpy_crypto，代码行数:19，代码来源:parquet.py

示例5: read

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import parquet [as 别名]
def read(self, path, columns=None, **kwargs):
        path, _, _, should_close = get_filepath_or_buffer(path)
        if self._pyarrow_lt_070:
            result = self.api.parquet.read_pandas(path, columns=columns,
                                                  **kwargs).to_pandas()
        else:
            kwargs['use_pandas_metadata'] = True
            result = self.api.parquet.read_table(path, columns=columns,
                                                 **kwargs).to_pandas()
        if should_close:
            try:
                path.close()
            except:  # noqa: flake8
                pass

        return result

开发者ID:birforce，项目名称:vnpy_crypto，代码行数:18，代码来源:parquet.py

示例6: to_parquet

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import parquet [as 别名]
def to_parquet(df, path, engine='auto', compression='snappy', **kwargs):
    """
    Write a DataFrame to the parquet format.

    Parameters
    ----------
    df : DataFrame
    path : string
        File path
    engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto'
        Parquet library to use. If 'auto', then the option
        ``io.parquet.engine`` is used. The default ``io.parquet.engine``
        behavior is to try 'pyarrow', falling back to 'fastparquet' if
        'pyarrow' is unavailable.
    compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy'
        Name of the compression to use. Use ``None`` for no compression.
    kwargs
        Additional keyword arguments passed to the engine
    """
    impl = get_engine(engine)
    return impl.write(df, path, compression=compression, **kwargs)

开发者ID:birforce，项目名称:vnpy_crypto，代码行数:23，代码来源:parquet.py

示例7: write

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import parquet [as 别名]
def write(self, df, path, compression='snappy',
              coerce_timestamps='ms', **kwargs):
        self.validate_dataframe(df)
        if self._pyarrow_lt_070:
            self._validate_write_lt_070(df)
        path, _, _ = get_filepath_or_buffer(path)

        if self._pyarrow_lt_060:
            table = self.api.Table.from_pandas(df, timestamps_to_ms=True)
            self.api.parquet.write_table(
                table, path, compression=compression, **kwargs)

        else:
            table = self.api.Table.from_pandas(df)
            self.api.parquet.write_table(
                table, path, compression=compression,
                coerce_timestamps=coerce_timestamps, **kwargs)

开发者ID:nccgroup，项目名称:Splunking-Crime，代码行数:19，代码来源:parquet.py

示例8: to_parquet

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import parquet [as 别名]
def to_parquet(df, path, engine='auto', compression='snappy', **kwargs):
    """
    Write a DataFrame to the parquet format.

    Parameters
    ----------
    df : DataFrame
    path : string
        File path
    engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto'
        Parquet reader library to use. If 'auto', then the option
        'io.parquet.engine' is used. If 'auto', then the first
        library to be installed is used.
    compression : str, optional, default 'snappy'
        compression method, includes {'gzip', 'snappy', 'brotli'}
    kwargs
        Additional keyword arguments passed to the engine
    """
    impl = get_engine(engine)
    return impl.write(df, path, compression=compression, **kwargs)

开发者ID:nccgroup，项目名称:Splunking-Crime，代码行数:22，代码来源:parquet.py

示例9: parquet_file

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import parquet [as 别名]
def parquet_file(
    table: Union[Dict[str, List[Any]], pyarrow.Table],
    dir: Optional[pathlib.Path] = None,
) -> ContextManager[pathlib.Path]:
    """
    Yield a filename with `table` written to a Parquet file.
    """
    if isinstance(table, dict):
        table = pyarrow.table(table)

    with tempfile_context(dir=dir) as parquet_path:
        pyarrow.parquet.write_table(
            table,
            parquet_path,
            version="2.0",
            compression="SNAPPY",
            use_dictionary=[
                name.encode("utf-8")
                for name, column in zip(table.column_names, table.columns)
                if pyarrow.types.is_dictionary(column.type)
            ],
        )
        yield parquet_path

开发者ID:CJWorkbench，项目名称:cjworkbench，代码行数:25，代码来源:util.py

示例10: get_engine

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import parquet [as 别名]
def get_engine(engine):
    """ return our implementation """

    if engine == 'auto':
        engine = get_option('io.parquet.engine')

    if engine == 'auto':
        # try engines in this order
        try:
            return PyArrowImpl()
        except ImportError:
            pass

        try:
            return FastParquetImpl()
        except ImportError:
            pass

    if engine not in ['pyarrow', 'fastparquet']:
        raise ValueError("engine must be one of 'pyarrow', 'fastparquet'")

    if engine == 'pyarrow':
        return PyArrowImpl()
    elif engine == 'fastparquet':
        return FastParquetImpl()

开发者ID:securityclippy，项目名称:elasticintel，代码行数:27，代码来源:parquet.py

示例11: init

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import parquet [as 别名]
def __init__(self):
        # since pandas is a dependency of pyarrow
        # we need to import on first use

        try:
            import pyarrow
            import pyarrow.parquet
        except ImportError:
            raise ImportError("pyarrow is required for parquet support\n\n"
                              "you can install via conda\n"
                              "conda install pyarrow -c conda-forge\n"
                              "\nor via pip\n"
                              "pip install -U pyarrow\n")

        if LooseVersion(pyarrow.__version__) < '0.4.1':
            raise ImportError("pyarrow >= 0.4.1 is required for parquet"
                              "support\n\n"
                              "you can install via conda\n"
                              "conda install pyarrow -c conda-forge\n"
                              "\nor via pip\n"
                              "pip install -U pyarrow\n")

        self._pyarrow_lt_050 = LooseVersion(pyarrow.__version__) < '0.5.0'
        self._pyarrow_lt_060 = LooseVersion(pyarrow.__version__) < '0.6.0'
        self.api = pyarrow

开发者ID:securityclippy，项目名称:elasticintel，代码行数:27，代码来源:parquet.py

示例12: read_parquet

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import parquet [as 别名]
def read_parquet(path, engine='auto', **kwargs):
    """
    Load a parquet object from the file path, returning a DataFrame.

    .. versionadded 0.21.0

    Parameters
    ----------
    path : string
        File path
    engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto'
        Parquet reader library to use. If 'auto', then the option
        'io.parquet.engine' is used. If 'auto', then the first
        library to be installed is used.
    kwargs are passed to the engine

    Returns
    -------
    DataFrame

    """

    impl = get_engine(engine)
    return impl.read(path)

开发者ID:securityclippy，项目名称:elasticintel，代码行数:26，代码来源:parquet.py

示例13: df_from_bytes_parquet_

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import parquet [as 别名]
def df_from_bytes_parquet_(bytes_: bytes) -> pd.DataFrame:
    """
    Since pyabc 0.9.14, pandas DataFrames are converted using
    pyarrow parquet. If the conversion to DataFrame fails,
    then `df_from_bytes_msgpack_` is tried, which was the formerly
    used method. This is in particular useful for databases that
    still employ the old format. In case errors occur here, it may
    be necessary to use a pandas version prior to 0.25.0.
    """
    try:
        b = BytesIO(bytes_)
        table = parquet.read_table(b)
        df = table.to_pandas()
    except pyarrow.lib.ArrowIOError:
        df = df_from_bytes_msgpack_(bytes_)
    return df

开发者ID:ICB-DCM，项目名称:pyABC，代码行数:18，代码来源:dataframe_bytes_storage.py

示例14: get_engine

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import parquet [as 别名]
def get_engine(engine):
    """ return our implementation """

    if engine == 'auto':
        engine = get_option('io.parquet.engine')

    if engine == 'auto':
        # try engines in this order
        try:
            return PyArrowImpl()
        except ImportError:
            pass

        try:
            return FastParquetImpl()
        except ImportError:
            pass

        raise ImportError("Unable to find a usable engine; "
                          "tried using: 'pyarrow', 'fastparquet'.\n"
                          "pyarrow or fastparquet is required for parquet "
                          "support")

    if engine not in ['pyarrow', 'fastparquet']:
        raise ValueError("engine must be one of 'pyarrow', 'fastparquet'")

    if engine == 'pyarrow':
        return PyArrowImpl()
    elif engine == 'fastparquet':
        return FastParquetImpl()

开发者ID:Frank-qlu，项目名称:recruit，代码行数:32，代码来源:parquet.py

示例15: read

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import parquet [as 别名]
def read(self, path, columns=None, **kwargs):
        path, _, _, should_close = get_filepath_or_buffer(path)

        kwargs['use_pandas_metadata'] = True
        result = self.api.parquet.read_table(path, columns=columns,
                                             **kwargs).to_pandas()
        if should_close:
            try:
                path.close()
            except:  # noqa: flake8
                pass

        return result

开发者ID:Frank-qlu，项目名称:recruit，代码行数:15，代码来源:parquet.py

注：本文中的pyarrow.parquet方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。