当前位置: 首页>>代码示例>>Python>>正文


Python parquet.write_table函数代码示例

本文整理汇总了Python中pyarrow.parquet.write_table函数的典型用法代码示例。如果您正苦于以下问题:Python write_table函数的具体用法?Python write_table怎么用?Python write_table使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了write_table函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_pandas_parquet_configuration_options

def test_pandas_parquet_configuration_options(tmpdir):
    size = 10000
    np.random.seed(0)
    df = pd.DataFrame({
        'uint8': np.arange(size, dtype=np.uint8),
        'uint16': np.arange(size, dtype=np.uint16),
        'uint32': np.arange(size, dtype=np.uint32),
        'uint64': np.arange(size, dtype=np.uint64),
        'int8': np.arange(size, dtype=np.int16),
        'int16': np.arange(size, dtype=np.int16),
        'int32': np.arange(size, dtype=np.int32),
        'int64': np.arange(size, dtype=np.int64),
        'float32': np.arange(size, dtype=np.float32),
        'float64': np.arange(size, dtype=np.float64),
        'bool': np.random.randn(size) > 0
    })
    filename = tmpdir.join('pandas_rountrip.parquet')
    arrow_table = pa.Table.from_pandas(df)

    for use_dictionary in [True, False]:
        pq.write_table(arrow_table, filename.strpath,
                       version="2.0",
                       use_dictionary=use_dictionary)
        table_read = pq.read_table(filename.strpath)
        df_read = table_read.to_pandas()
        pdt.assert_frame_equal(df, df_read)

    for compression in ['NONE', 'SNAPPY', 'GZIP']:
        pq.write_table(arrow_table, filename.strpath,
                       version="2.0",
                       compression=compression)
        table_read = pq.read_table(filename.strpath)
        df_read = table_read.to_pandas()
        pdt.assert_frame_equal(df, df_read)
开发者ID:kiril-me,项目名称:arrow,代码行数:34,代码来源:test_parquet.py

示例2: test_read_multiple_parquet_files

    def test_read_multiple_parquet_files(self):
        import pyarrow.parquet as pq

        nfiles = 10
        size = 5

        tmpdir = pjoin(self.tmp_path, 'multi-parquet-' + guid())

        self.hdfs.mkdir(tmpdir)

        test_data = []
        paths = []
        for i in range(nfiles):
            df = test_parquet._test_dataframe(size, seed=i)

            df['index'] = np.arange(i * size, (i + 1) * size)

            # Hack so that we don't have a dtype cast in v1 files
            df['uint32'] = df['uint32'].astype(np.int64)

            path = pjoin(tmpdir, '{0}.parquet'.format(i))

            table = pa.Table.from_pandas(df, preserve_index=False)
            with self.hdfs.open(path, 'wb') as f:
                pq.write_table(table, f)

            test_data.append(table)
            paths.append(path)

        result = self.hdfs.read_parquet(tmpdir)
        expected = pa.concat_tables(test_data)

        pdt.assert_frame_equal(result.to_pandas()
                               .sort_values(by='index').reset_index(drop=True),
                               expected.to_pandas())
开发者ID:NonVolatileComputing,项目名称:arrow,代码行数:35,代码来源:test_hdfs.py

示例3: test_read_single_row_group

def test_read_single_row_group():
    # ARROW-471
    N, K = 10000, 4
    df = alltypes_sample(size=N)

    a_table = pa.Table.from_pandas(df, timestamps_to_ms=True)

    buf = io.BytesIO()
    pq.write_table(a_table, buf, row_group_size=N / K,
                   compression='snappy', version='2.0')

    buf.seek(0)

    pf = pq.ParquetFile(buf)

    assert pf.num_row_groups == K

    row_groups = [pf.read_row_group(i) for i in range(K)]
    result = pa.concat_tables(row_groups)
    pdt.assert_frame_equal(df, result.to_pandas())

    cols = df.columns[:2]
    row_groups = [pf.read_row_group(i, columns=cols)
                  for i in range(K)]
    result = pa.concat_tables(row_groups)
    pdt.assert_frame_equal(df[cols], result.to_pandas())
开发者ID:StevenMPhillips,项目名称:arrow,代码行数:26,代码来源:test_parquet.py

示例4: test_pandas_parquet_1_0_rountrip

def test_pandas_parquet_1_0_rountrip(tmpdir):
    size = 10000
    np.random.seed(0)
    df = pd.DataFrame({
        'uint8': np.arange(size, dtype=np.uint8),
        'uint16': np.arange(size, dtype=np.uint16),
        'uint32': np.arange(size, dtype=np.uint32),
        'uint64': np.arange(size, dtype=np.uint64),
        'int8': np.arange(size, dtype=np.int16),
        'int16': np.arange(size, dtype=np.int16),
        'int32': np.arange(size, dtype=np.int32),
        'int64': np.arange(size, dtype=np.int64),
        'float32': np.arange(size, dtype=np.float32),
        'float64': np.arange(size, dtype=np.float64),
        'bool': np.random.randn(size) > 0,
        'str': [str(x) for x in range(size)],
        'str_with_nulls': [None] + [str(x) for x in range(size - 2)] + [None],
        'empty_str': [''] * size
    })
    filename = tmpdir.join('pandas_rountrip.parquet')
    arrow_table = pa.Table.from_pandas(df)
    pq.write_table(arrow_table, filename.strpath, version="1.0")
    table_read = pq.read_table(filename.strpath)
    df_read = table_read.to_pandas()

    # We pass uint32_t as int64_t if we write Parquet version 1.0
    df['uint32'] = df['uint32'].values.astype(np.int64)

    pdt.assert_frame_equal(df, df_read)
开发者ID:kiril-me,项目名称:arrow,代码行数:29,代码来源:test_parquet.py

示例5: _write_table

def _write_table(table, path, **kwargs):
    import pyarrow.parquet as pq

    if isinstance(table, pd.DataFrame):
        table = pa.Table.from_pandas(table)

    pq.write_table(table, path, **kwargs)
    return table
开发者ID:NonVolatileComputing,项目名称:arrow,代码行数:8,代码来源:test_parquet.py

示例6: make_sample_file

def make_sample_file(df):
    a_table = pa.Table.from_pandas(df, timestamps_to_ms=True)

    buf = io.BytesIO()
    pq.write_table(a_table, buf, compression='SNAPPY', version='2.0')

    buf.seek(0)
    return pq.ParquetFile(buf)
开发者ID:kiril-me,项目名称:arrow,代码行数:8,代码来源:test_parquet.py

示例7: test_pandas_parquet_2_0_rountrip

def test_pandas_parquet_2_0_rountrip(tmpdir):
    df = alltypes_sample(size=10000)

    filename = tmpdir.join('pandas_rountrip.parquet')
    arrow_table = pa.Table.from_pandas(df, timestamps_to_ms=True)
    pq.write_table(arrow_table, filename.strpath, version="2.0")
    table_read = pq.read_table(filename.strpath)
    df_read = table_read.to_pandas()
    pdt.assert_frame_equal(df, df_read)
开发者ID:kiril-me,项目名称:arrow,代码行数:9,代码来源:test_parquet.py

示例8: test_pandas_parquet_native_file_roundtrip

def test_pandas_parquet_native_file_roundtrip(tmpdir):
    df = _test_dataframe(10000)
    arrow_table = A.from_pandas_dataframe(df)
    imos = paio.InMemoryOutputStream()
    pq.write_table(arrow_table, imos, version="2.0")
    buf = imos.get_result()
    reader = paio.BufferReader(buf)
    df_read = pq.read_table(reader).to_pandas()
    pdt.assert_frame_equal(df, df_read)
开发者ID:apache,项目名称:arrow,代码行数:9,代码来源:test_parquet.py

示例9: test_column_of_lists

def test_column_of_lists(tmpdir):
    df, schema = dataframe_with_arrays()

    filename = tmpdir.join('pandas_rountrip.parquet')
    arrow_table = pa.Table.from_pandas(df, timestamps_to_ms=True, schema=schema)
    pq.write_table(arrow_table, filename.strpath, version="2.0")
    table_read = pq.read_table(filename.strpath)
    df_read = table_read.to_pandas()
    pdt.assert_frame_equal(df, df_read)
开发者ID:kiril-me,项目名称:arrow,代码行数:9,代码来源:test_parquet.py

示例10: _write_partition_pyarrow

def _write_partition_pyarrow(df, open_with, filename, write_index,
                             metadata_path=None, **kwargs):
    import pyarrow as pa
    from pyarrow import parquet
    t = pa.Table.from_pandas(df, preserve_index=write_index)

    with open_with(filename, 'wb') as fil:
        parquet.write_table(t, fil, **kwargs)

    if metadata_path is not None:
        with open_with(metadata_path, 'wb') as fil:
            kwargs.pop('compression', None)
            parquet.write_metadata(t.schema, fil, **kwargs)
开发者ID:postelrich,项目名称:dask,代码行数:13,代码来源:parquet.py

示例11: read_parquet

def read_parquet(fn):
  """ read parquet file with Spark """
  print("Loading parquest file: %s..."% fn)
  file_name = 'parquet_sample.dat'
  read_parquest(file_name)
  fn = 'sample.parquet'
  tbl = pq.read_table(fn)
  df = tbl.to_pandas()
  d=df.iloc[:, 0:3]

  table = pa.Table.from_pandas(d)
  pq.write_table(table, 'example.parquet')

  pass
开发者ID:teckoo,项目名称:HTMLCSS_Javascript_cookbook,代码行数:14,代码来源:file_io.py

示例12: test_min_chunksize

def test_min_chunksize():
    data = pd.DataFrame([np.arange(4)], columns=['A', 'B', 'C', 'D'])
    table = pa.Table.from_pandas(data.reset_index())

    buf = io.BytesIO()
    pq.write_table(table, buf, chunk_size=-1)

    buf.seek(0)
    result = pq.read_table(buf)

    assert result.equals(table)

    with pytest.raises(ValueError):
        pq.write_table(table, buf, chunk_size=0)
开发者ID:StevenMPhillips,项目名称:arrow,代码行数:14,代码来源:test_parquet.py

示例13: test_client

def test_client(tmpdir, data):

    # construct with a path to a file
    d = tmpdir / 'pq'
    d.mkdir()

    for k, v in data.items():
        f = d / "{}.parquet".format(k)
        table = pa.Table.from_pandas(v)
        pq.write_table(table, str(f))

    c = ParquetClient(tmpdir)
    assert c.list_databases() == ['pq']
    assert c.database().pq.list_tables() == ['close', 'open']
开发者ID:cloudera,项目名称:ibis,代码行数:14,代码来源:test_parquet.py

示例14: test_pandas_column_selection

def test_pandas_column_selection(tmpdir):
    size = 10000
    np.random.seed(0)
    df = pd.DataFrame({
        'uint8': np.arange(size, dtype=np.uint8),
        'uint16': np.arange(size, dtype=np.uint16)
    })
    filename = tmpdir.join('pandas_rountrip.parquet')
    arrow_table = pa.Table.from_pandas(df)
    pq.write_table(arrow_table, filename.strpath)
    table_read = pq.read_table(filename.strpath, columns=['uint8'])
    df_read = table_read.to_pandas()

    pdt.assert_frame_equal(df[['uint8']], df_read)
开发者ID:kiril-me,项目名称:arrow,代码行数:14,代码来源:test_parquet.py

示例15: test_fastparquet_read_with_hdfs

def test_fastparquet_read_with_hdfs():
    fs = hdfs_test_client()

    df = tm.makeDataFrame()
    table = pa.Table.from_pandas(df)

    path = '/tmp/testing.parquet'
    with fs.open(path, 'wb') as f:
        pq.write_table(table, f)

    parquet_file = fastparquet.ParquetFile(path, open_with=fs.open)

    result = parquet_file.to_pandas()
    tm.assert_frame_equal(result, df)
开发者ID:dremio,项目名称:arrow,代码行数:14,代码来源:parquet_interop.py


注:本文中的pyarrow.parquet.write_table函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。