Python pyarrow.parquet方法代碼示例

本文整理匯總了Python中pyarrow.parquet方法的典型用法代碼示例。如果您正苦於以下問題：Python pyarrow.parquet方法的具體用法？Python pyarrow.parquet怎麽用？Python pyarrow.parquet使用的例子？那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類pyarrow的用法示例。

在下文中一共展示了pyarrow.parquet方法的15個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: validate_dataframe

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import parquet [as 別名]
def validate_dataframe(df):

        if not isinstance(df, DataFrame):
            raise ValueError("to_parquet only supports IO with DataFrames")

        # must have value column names (strings only)
        if df.columns.inferred_type not in {'string', 'unicode'}:
            raise ValueError("parquet must have string column names")

        # index level names must be strings
        valid_names = all(
            isinstance(name, string_types)
            for name in df.index.names
            if name is not None
        )
        if not valid_names:
            raise ValueError("Index level names must be strings")

開發者ID:Frank-qlu，項目名稱:recruit，代碼行數:19，代碼來源:parquet.py

示例2: init

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import parquet [as 別名]
def __init__(self):
        # since pandas is a dependency of pyarrow
        # we need to import on first use
        try:
            import pyarrow
            import pyarrow.parquet
        except ImportError:
            raise ImportError(
                "pyarrow is required for parquet support\n\n"
                "you can install via conda\n"
                "conda install pyarrow -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U pyarrow\n"
            )
        if LooseVersion(pyarrow.__version__) < '0.9.0':
            raise ImportError(
                "pyarrow >= 0.9.0 is required for parquet support\n\n"
                "you can install via conda\n"
                "conda install pyarrow -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U pyarrow\n"
            )

        self.api = pyarrow

開發者ID:Frank-qlu，項目名稱:recruit，代碼行數:26，代碼來源:parquet.py

示例3: write

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import parquet [as 別名]
def write(self, df, path, compression='snappy',
              coerce_timestamps='ms', index=None, partition_cols=None,
              **kwargs):
        self.validate_dataframe(df)
        path, _, _, _ = get_filepath_or_buffer(path, mode='wb')

        if index is None:
            from_pandas_kwargs = {}
        else:
            from_pandas_kwargs = {'preserve_index': index}
        table = self.api.Table.from_pandas(df, **from_pandas_kwargs)
        if partition_cols is not None:
            self.api.parquet.write_to_dataset(
                table, path, compression=compression,
                coerce_timestamps=coerce_timestamps,
                partition_cols=partition_cols, **kwargs)
        else:
            self.api.parquet.write_table(
                table, path, compression=compression,
                coerce_timestamps=coerce_timestamps, **kwargs)

開發者ID:Frank-qlu，項目名稱:recruit，代碼行數:22，代碼來源:parquet.py

示例4: write

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import parquet [as 別名]
def write(self, df, path, compression='snappy',
              coerce_timestamps='ms', **kwargs):
        self.validate_dataframe(df)
        if self._pyarrow_lt_070:
            self._validate_write_lt_070(df)
        path, _, _, _ = get_filepath_or_buffer(path, mode='wb')

        if self._pyarrow_lt_060:
            table = self.api.Table.from_pandas(df, timestamps_to_ms=True)
            self.api.parquet.write_table(
                table, path, compression=compression, **kwargs)

        else:
            table = self.api.Table.from_pandas(df)
            self.api.parquet.write_table(
                table, path, compression=compression,
                coerce_timestamps=coerce_timestamps, **kwargs)

開發者ID:birforce，項目名稱:vnpy_crypto，代碼行數:19，代碼來源:parquet.py

示例5: read

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import parquet [as 別名]
def read(self, path, columns=None, **kwargs):
        path, _, _, should_close = get_filepath_or_buffer(path)
        if self._pyarrow_lt_070:
            result = self.api.parquet.read_pandas(path, columns=columns,
                                                  **kwargs).to_pandas()
        else:
            kwargs['use_pandas_metadata'] = True
            result = self.api.parquet.read_table(path, columns=columns,
                                                 **kwargs).to_pandas()
        if should_close:
            try:
                path.close()
            except:  # noqa: flake8
                pass

        return result

開發者ID:birforce，項目名稱:vnpy_crypto，代碼行數:18，代碼來源:parquet.py

示例6: to_parquet

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import parquet [as 別名]
def to_parquet(df, path, engine='auto', compression='snappy', **kwargs):
    """
    Write a DataFrame to the parquet format.

    Parameters
    ----------
    df : DataFrame
    path : string
        File path
    engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto'
        Parquet library to use. If 'auto', then the option
        ``io.parquet.engine`` is used. The default ``io.parquet.engine``
        behavior is to try 'pyarrow', falling back to 'fastparquet' if
        'pyarrow' is unavailable.
    compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy'
        Name of the compression to use. Use ``None`` for no compression.
    kwargs
        Additional keyword arguments passed to the engine
    """
    impl = get_engine(engine)
    return impl.write(df, path, compression=compression, **kwargs)

開發者ID:birforce，項目名稱:vnpy_crypto，代碼行數:23，代碼來源:parquet.py

示例7: write

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import parquet [as 別名]
def write(self, df, path, compression='snappy',
              coerce_timestamps='ms', **kwargs):
        self.validate_dataframe(df)
        if self._pyarrow_lt_070:
            self._validate_write_lt_070(df)
        path, _, _ = get_filepath_or_buffer(path)

        if self._pyarrow_lt_060:
            table = self.api.Table.from_pandas(df, timestamps_to_ms=True)
            self.api.parquet.write_table(
                table, path, compression=compression, **kwargs)

        else:
            table = self.api.Table.from_pandas(df)
            self.api.parquet.write_table(
                table, path, compression=compression,
                coerce_timestamps=coerce_timestamps, **kwargs)

開發者ID:nccgroup，項目名稱:Splunking-Crime，代碼行數:19，代碼來源:parquet.py

示例8: to_parquet

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import parquet [as 別名]
def to_parquet(df, path, engine='auto', compression='snappy', **kwargs):
    """
    Write a DataFrame to the parquet format.

    Parameters
    ----------
    df : DataFrame
    path : string
        File path
    engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto'
        Parquet reader library to use. If 'auto', then the option
        'io.parquet.engine' is used. If 'auto', then the first
        library to be installed is used.
    compression : str, optional, default 'snappy'
        compression method, includes {'gzip', 'snappy', 'brotli'}
    kwargs
        Additional keyword arguments passed to the engine
    """
    impl = get_engine(engine)
    return impl.write(df, path, compression=compression, **kwargs)

開發者ID:nccgroup，項目名稱:Splunking-Crime，代碼行數:22，代碼來源:parquet.py

示例9: parquet_file

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import parquet [as 別名]
def parquet_file(
    table: Union[Dict[str, List[Any]], pyarrow.Table],
    dir: Optional[pathlib.Path] = None,
) -> ContextManager[pathlib.Path]:
    """
    Yield a filename with `table` written to a Parquet file.
    """
    if isinstance(table, dict):
        table = pyarrow.table(table)

    with tempfile_context(dir=dir) as parquet_path:
        pyarrow.parquet.write_table(
            table,
            parquet_path,
            version="2.0",
            compression="SNAPPY",
            use_dictionary=[
                name.encode("utf-8")
                for name, column in zip(table.column_names, table.columns)
                if pyarrow.types.is_dictionary(column.type)
            ],
        )
        yield parquet_path

開發者ID:CJWorkbench，項目名稱:cjworkbench，代碼行數:25，代碼來源:util.py

示例10: get_engine

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import parquet [as 別名]
def get_engine(engine):
    """ return our implementation """

    if engine == 'auto':
        engine = get_option('io.parquet.engine')

    if engine == 'auto':
        # try engines in this order
        try:
            return PyArrowImpl()
        except ImportError:
            pass

        try:
            return FastParquetImpl()
        except ImportError:
            pass

    if engine not in ['pyarrow', 'fastparquet']:
        raise ValueError("engine must be one of 'pyarrow', 'fastparquet'")

    if engine == 'pyarrow':
        return PyArrowImpl()
    elif engine == 'fastparquet':
        return FastParquetImpl()

開發者ID:securityclippy，項目名稱:elasticintel，代碼行數:27，代碼來源:parquet.py

示例11: init

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import parquet [as 別名]
def __init__(self):
        # since pandas is a dependency of pyarrow
        # we need to import on first use

        try:
            import pyarrow
            import pyarrow.parquet
        except ImportError:
            raise ImportError("pyarrow is required for parquet support\n\n"
                              "you can install via conda\n"
                              "conda install pyarrow -c conda-forge\n"
                              "\nor via pip\n"
                              "pip install -U pyarrow\n")

        if LooseVersion(pyarrow.__version__) < '0.4.1':
            raise ImportError("pyarrow >= 0.4.1 is required for parquet"
                              "support\n\n"
                              "you can install via conda\n"
                              "conda install pyarrow -c conda-forge\n"
                              "\nor via pip\n"
                              "pip install -U pyarrow\n")

        self._pyarrow_lt_050 = LooseVersion(pyarrow.__version__) < '0.5.0'
        self._pyarrow_lt_060 = LooseVersion(pyarrow.__version__) < '0.6.0'
        self.api = pyarrow

開發者ID:securityclippy，項目名稱:elasticintel，代碼行數:27，代碼來源:parquet.py

示例12: read_parquet

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import parquet [as 別名]
def read_parquet(path, engine='auto', **kwargs):
    """
    Load a parquet object from the file path, returning a DataFrame.

    .. versionadded 0.21.0

    Parameters
    ----------
    path : string
        File path
    engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto'
        Parquet reader library to use. If 'auto', then the option
        'io.parquet.engine' is used. If 'auto', then the first
        library to be installed is used.
    kwargs are passed to the engine

    Returns
    -------
    DataFrame

    """

    impl = get_engine(engine)
    return impl.read(path)

開發者ID:securityclippy，項目名稱:elasticintel，代碼行數:26，代碼來源:parquet.py

示例13: df_from_bytes_parquet_

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import parquet [as 別名]
def df_from_bytes_parquet_(bytes_: bytes) -> pd.DataFrame:
    """
    Since pyabc 0.9.14, pandas DataFrames are converted using
    pyarrow parquet. If the conversion to DataFrame fails,
    then `df_from_bytes_msgpack_` is tried, which was the formerly
    used method. This is in particular useful for databases that
    still employ the old format. In case errors occur here, it may
    be necessary to use a pandas version prior to 0.25.0.
    """
    try:
        b = BytesIO(bytes_)
        table = parquet.read_table(b)
        df = table.to_pandas()
    except pyarrow.lib.ArrowIOError:
        df = df_from_bytes_msgpack_(bytes_)
    return df

開發者ID:ICB-DCM，項目名稱:pyABC，代碼行數:18，代碼來源:dataframe_bytes_storage.py

示例14: get_engine

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import parquet [as 別名]
def get_engine(engine):
    """ return our implementation """

    if engine == 'auto':
        engine = get_option('io.parquet.engine')

    if engine == 'auto':
        # try engines in this order
        try:
            return PyArrowImpl()
        except ImportError:
            pass

        try:
            return FastParquetImpl()
        except ImportError:
            pass

        raise ImportError("Unable to find a usable engine; "
                          "tried using: 'pyarrow', 'fastparquet'.\n"
                          "pyarrow or fastparquet is required for parquet "
                          "support")

    if engine not in ['pyarrow', 'fastparquet']:
        raise ValueError("engine must be one of 'pyarrow', 'fastparquet'")

    if engine == 'pyarrow':
        return PyArrowImpl()
    elif engine == 'fastparquet':
        return FastParquetImpl()

開發者ID:Frank-qlu，項目名稱:recruit，代碼行數:32，代碼來源:parquet.py

示例15: read

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import parquet [as 別名]
def read(self, path, columns=None, **kwargs):
        path, _, _, should_close = get_filepath_or_buffer(path)

        kwargs['use_pandas_metadata'] = True
        result = self.api.parquet.read_table(path, columns=columns,
                                             **kwargs).to_pandas()
        if should_close:
            try:
                path.close()
            except:  # noqa: flake8
                pass

        return result

開發者ID:Frank-qlu，項目名稱:recruit，代碼行數:15，代碼來源:parquet.py

注：本文中的pyarrow.parquet方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。