本文整理汇总了Python中pandas.io.parsers.read_csv方法的典型用法代码示例。如果您正苦于以下问题:Python parsers.read_csv方法的具体用法?Python parsers.read_csv怎么用?Python parsers.read_csv使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas.io.parsers
的用法示例。
在下文中一共展示了parsers.read_csv方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_read_write_reread_dta14
# 需要导入模块: from pandas.io import parsers [as 别名]
# 或者: from pandas.io.parsers import read_csv [as 别名]
def test_read_write_reread_dta14(self, file, parsed_114, version):
file = getattr(self, file)
parsed = self.read_dta(file)
parsed.index.name = 'index'
expected = self.read_csv(self.csv14)
cols = ['byte_', 'int_', 'long_', 'float_', 'double_']
for col in cols:
expected[col] = expected[col]._convert(datetime=True, numeric=True)
expected['float_'] = expected['float_'].astype(np.float32)
expected['date_td'] = pd.to_datetime(
expected['date_td'], errors='coerce')
tm.assert_frame_equal(parsed_114, parsed)
with tm.ensure_clean() as path:
parsed_114.to_stata(path, {'date_td': 'td'}, version=version)
written_and_read_again = self.read_dta(path)
tm.assert_frame_equal(
written_and_read_again.set_index('index'), parsed_114)
示例2: test_read_csv_compat
# 需要导入模块: from pandas.io import parsers [as 别名]
# 或者: from pandas.io.parsers import read_csv [as 别名]
def test_read_csv_compat():
csv_data = """\
A,B,C,D,E
2011,58,360.242940,149.910199,11950.7
2011,59,444.953632,166.985655,11788.4
2011,60,364.136849,183.628767,11806.2
2011,61,413.836124,184.375703,11916.8
2011,62,502.953953,173.237159,12468.3
"""
expected = read_csv(StringIO(csv_data), engine="python")
fwf_data = """\
A B C D E
201158 360.242940 149.910199 11950.7
201159 444.953632 166.985655 11788.4
201160 364.136849 183.628767 11806.2
201161 413.836124 184.375703 11916.8
201162 502.953953 173.237159 12468.3
"""
colspecs = [(0, 4), (4, 8), (8, 20), (21, 33), (34, 43)]
result = read_fwf(StringIO(fwf_data), colspecs=colspecs)
tm.assert_frame_equal(result, expected)
示例3: test_parse_public_s3_bucket
# 需要导入模块: from pandas.io import parsers [as 别名]
# 或者: from pandas.io.parsers import read_csv [as 别名]
def test_parse_public_s3_bucket(self, tips_df):
pytest.importorskip('s3fs')
# more of an integration test due to the not-public contents portion
# can probably mock this though.
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
df = read_csv('s3://pandas-test/tips.csv' +
ext, compression=comp)
assert isinstance(df, DataFrame)
assert not df.empty
tm.assert_frame_equal(df, tips_df)
# Read public file from bucket with not-public contents
df = read_csv('s3://cant_get_it/tips.csv')
assert isinstance(df, DataFrame)
assert not df.empty
tm.assert_frame_equal(df, tips_df)
示例4: test_parse_public_s3_bucket_chunked
# 需要导入模块: from pandas.io import parsers [as 别名]
# 或者: from pandas.io.parsers import read_csv [as 别名]
def test_parse_public_s3_bucket_chunked(self, tips_df):
# Read with a chunksize
chunksize = 5
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
chunksize=chunksize, compression=comp)
assert df_reader.chunksize == chunksize
for i_chunk in [0, 1, 2]:
# Read a couple of chunks and make sure we see them
# properly.
df = df_reader.get_chunk()
assert isinstance(df, DataFrame)
assert not df.empty
true_df = tips_df.iloc[
chunksize * i_chunk: chunksize * (i_chunk + 1)]
tm.assert_frame_equal(true_df, df)
示例5: test_parse_public_s3_bucket_chunked_python
# 需要导入模块: from pandas.io import parsers [as 别名]
# 或者: from pandas.io.parsers import read_csv [as 别名]
def test_parse_public_s3_bucket_chunked_python(self, tips_df):
# Read with a chunksize using the Python parser
chunksize = 5
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
chunksize=chunksize, compression=comp,
engine='python')
assert df_reader.chunksize == chunksize
for i_chunk in [0, 1, 2]:
# Read a couple of chunks and make sure we see them properly.
df = df_reader.get_chunk()
assert isinstance(df, DataFrame)
assert not df.empty
true_df = tips_df.iloc[
chunksize * i_chunk: chunksize * (i_chunk + 1)]
tm.assert_frame_equal(true_df, df)
示例6: test_read_csv_chunked_download
# 需要导入模块: from pandas.io import parsers [as 别名]
# 或者: from pandas.io.parsers import read_csv [as 别名]
def test_read_csv_chunked_download(self, s3_resource, caplog):
# 8 MB, S3FS usees 5MB chunks
df = DataFrame(np.random.randn(100000, 4), columns=list('abcd'))
buf = BytesIO()
str_buf = StringIO()
df.to_csv(str_buf)
buf = BytesIO(str_buf.getvalue().encode('utf-8'))
s3_resource.Bucket("pandas-test").put_object(
Key="large-file.csv",
Body=buf)
with caplog.at_level(logging.DEBUG, logger='s3fs.core'):
read_csv("s3://pandas-test/large-file.csv", nrows=5)
# log of fetch_range (start, stop)
assert ((0, 5505024) in {x.args[-2:] for x in caplog.records})
示例7: test_parse_public_s3_bucket
# 需要导入模块: from pandas.io import parsers [as 别名]
# 或者: from pandas.io.parsers import read_csv [as 别名]
def test_parse_public_s3_bucket(self, tips_df):
pytest.importorskip('s3fs')
# more of an integration test due to the not-public contents portion
# can probably mock this though.
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
df = read_csv('s3://pandas-test/tips.csv' +
ext, compression=comp)
assert isinstance(df, DataFrame)
assert not df.empty
tm.assert_frame_equal(df, tips_df)
# Read public file from bucket with not-public contents
df = read_csv('s3://cant_get_it/tips.csv')
assert isinstance(df, DataFrame)
assert not df.empty
tm.assert_frame_equal(df, tips_df)
示例8: test_read_csv_chunked_download
# 需要导入模块: from pandas.io import parsers [as 别名]
# 或者: from pandas.io.parsers import read_csv [as 别名]
def test_read_csv_chunked_download(self, s3_resource, caplog):
# 8 MB, S3FS usees 5MB chunks
df = DataFrame(np.random.randn(100000, 4), columns=list('abcd'))
buf = BytesIO()
str_buf = StringIO()
df.to_csv(str_buf)
buf = BytesIO(str_buf.getvalue().encode('utf-8'))
s3_resource.Bucket("pandas-test").put_object(
Key="large-file.csv",
Body=buf)
with caplog.at_level(logging.DEBUG, logger='s3fs.core'):
read_csv("s3://pandas-test/large-file.csv", nrows=5)
# log of fetch_range (start, stop)
assert ((0, 5505024) in set(x.args[-2:] for x in caplog.records))
示例9: get_csv_refdf
# 需要导入模块: from pandas.io import parsers [as 别名]
# 或者: from pandas.io.parsers import read_csv [as 别名]
def get_csv_refdf(self, basename):
"""
Obtain the reference data from read_csv with the Python engine.
Parameters
----------
basename : str
File base name, excluding file extension.
Returns
-------
dfref : DataFrame
"""
pref = os.path.join(self.dirpath, basename + '.csv')
dfref = read_csv(pref, index_col=0, parse_dates=True, engine='python')
return dfref
示例10: test_parse_cols_int
# 需要导入模块: from pandas.io import parsers [as 别名]
# 或者: from pandas.io.parsers import read_csv [as 别名]
def test_parse_cols_int(self):
_skip_if_no_openpyxl()
_skip_if_no_xlrd()
suffix = ['xls', 'xlsx', 'xlsm']
for s in suffix:
pth = os.path.join(self.dirpath, 'test.%s' % s)
xls = ExcelFile(pth)
df = xls.parse('Sheet1', index_col=0, parse_dates=True,
parse_cols=3)
df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
df2 = df2.reindex(columns=['A', 'B', 'C'])
df3 = xls.parse('Sheet2', skiprows=[1], index_col=0,
parse_dates=True, parse_cols=3)
# TODO add index to xls file)
tm.assert_frame_equal(df, df2, check_names=False)
tm.assert_frame_equal(df3, df2, check_names=False)
示例11: test_parse_cols_list
# 需要导入模块: from pandas.io import parsers [as 别名]
# 或者: from pandas.io.parsers import read_csv [as 别名]
def test_parse_cols_list(self):
_skip_if_no_openpyxl()
_skip_if_no_xlrd()
suffix = ['xls', 'xlsx', 'xlsm']
for s in suffix:
pth = os.path.join(self.dirpath, 'test.%s' % s)
xls = ExcelFile(pth)
df = xls.parse('Sheet1', index_col=0, parse_dates=True,
parse_cols=[0, 2, 3])
df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
df2 = df2.reindex(columns=['B', 'C'])
df3 = xls.parse('Sheet2', skiprows=[1], index_col=0,
parse_dates=True,
parse_cols=[0, 2, 3])
# TODO add index to xls file)
tm.assert_frame_equal(df, df2, check_names=False)
tm.assert_frame_equal(df3, df2, check_names=False)
示例12: check_excel_table_sheet_by_index
# 需要导入模块: from pandas.io import parsers [as 别名]
# 或者: from pandas.io.parsers import read_csv [as 别名]
def check_excel_table_sheet_by_index(self, filename, csvfile):
import xlrd
pth = os.path.join(self.dirpath, filename)
xls = ExcelFile(pth)
df = xls.parse(0, index_col=0, parse_dates=True)
df2 = self.read_csv(csvfile, index_col=0, parse_dates=True)
df3 = xls.parse(1, skiprows=[1], index_col=0, parse_dates=True)
tm.assert_frame_equal(df, df2, check_names=False)
tm.assert_frame_equal(df3, df2, check_names=False)
df4 = xls.parse(0, index_col=0, parse_dates=True, skipfooter=1)
df5 = xls.parse(0, index_col=0, parse_dates=True, skip_footer=1)
tm.assert_frame_equal(df4, df.ix[:-1])
tm.assert_frame_equal(df4, df5)
self.assertRaises(xlrd.XLRDError, xls.parse, 'asdf')
示例13: test_xlsx_table
# 需要导入模块: from pandas.io import parsers [as 别名]
# 或者: from pandas.io.parsers import read_csv [as 别名]
def test_xlsx_table(self):
_skip_if_no_xlrd()
_skip_if_no_openpyxl()
pth = os.path.join(self.dirpath, 'test.xlsx')
xlsx = ExcelFile(pth)
df = xlsx.parse('Sheet1', index_col=0, parse_dates=True)
df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
df3 = xlsx.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True)
# TODO add index to xlsx file
tm.assert_frame_equal(df, df2, check_names=False)
tm.assert_frame_equal(df3, df2, check_names=False)
df4 = xlsx.parse('Sheet1', index_col=0, parse_dates=True,
skipfooter=1)
df5 = xlsx.parse('Sheet1', index_col=0, parse_dates=True,
skip_footer=1)
tm.assert_frame_equal(df4, df.ix[:-1])
tm.assert_frame_equal(df4, df5)
示例14: read_csv
# 需要导入模块: from pandas.io import parsers [as 别名]
# 或者: from pandas.io.parsers import read_csv [as 别名]
def read_csv(self, file):
return read_csv(file, parse_dates=True)
示例15: test_read_dta3
# 需要导入模块: from pandas.io import parsers [as 别名]
# 或者: from pandas.io.parsers import read_csv [as 别名]
def test_read_dta3(self, file):
file = getattr(self, file)
parsed = self.read_dta(file)
# match stata here
expected = self.read_csv(self.csv3)
expected = expected.astype(np.float32)
expected['year'] = expected['year'].astype(np.int16)
expected['quarter'] = expected['quarter'].astype(np.int8)
tm.assert_frame_equal(parsed, expected)