本文整理汇总了Python中pandas.read_fwf方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.read_fwf方法的具体用法?Python pandas.read_fwf怎么用?Python pandas.read_fwf使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas
的用法示例。
在下文中一共展示了pandas.read_fwf方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_fwf_colspecs_None
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_fwf [as 别名]
def test_fwf_colspecs_None(self):
# GH 7079
data = """\
123456
456789
"""
colspecs = [(0, 3), (3, None)]
result = read_fwf(StringIO(data), colspecs=colspecs, header=None)
expected = DataFrame([[123, 456], [456, 789]])
tm.assert_frame_equal(result, expected)
colspecs = [(None, 3), (3, 6)]
result = read_fwf(StringIO(data), colspecs=colspecs, header=None)
expected = DataFrame([[123, 456], [456, 789]])
tm.assert_frame_equal(result, expected)
colspecs = [(0, None), (3, None)]
result = read_fwf(StringIO(data), colspecs=colspecs, header=None)
expected = DataFrame([[123456, 456], [456789, 789]])
tm.assert_frame_equal(result, expected)
colspecs = [(None, None), (3, 6)]
result = read_fwf(StringIO(data), colspecs=colspecs, header=None)
expected = DataFrame([[123456, 456], [456789, 789]])
tm.assert_frame_equal(result, expected)
示例2: test_fwf_compression
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_fwf [as 别名]
def test_fwf_compression(self):
try:
import gzip
import bz2
except ImportError:
pytest.skip("Need gzip and bz2 to run this test")
data = """1111111111
2222222222
3333333333""".strip()
widths = [5, 5]
names = ['one', 'two']
expected = read_fwf(StringIO(data), widths=widths, names=names)
if compat.PY3:
data = bytes(data, encoding='utf-8')
comps = [('gzip', gzip.GzipFile), ('bz2', bz2.BZ2File)]
for comp_name, compresser in comps:
with tm.ensure_clean() as path:
tmp = compresser(path, mode='wb')
tmp.write(data)
tmp.close()
result = read_fwf(path, widths=widths, names=names,
compression=comp_name)
tm.assert_frame_equal(result, expected)
示例3: load_local_file
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_fwf [as 别名]
def load_local_file(self, interval):
# Read in data
headings = ['probe', 'year', 'doy', 'hour', 'minute', 'second',
'naverage', 'Bx', 'By', 'Bz', '|B|',
'sigma_Bx', 'sigma_By', 'sigma_Bz']
colspecs = [(1, 2), (2, 4), (4, 7), (7, 9), (9, 11), (11, 13),
(13, 15), (15, 22), (22, 29), (29, 36), (36, 42), (42, 48),
(48, 54), (54, 60)]
data = pd.read_fwf(self.local_path(interval), names=headings,
header=None, colspecs=colspecs)
# Process data
data['year'] += 1900
# Convert date info to datetime
data['Time'] = pd.to_datetime(data['year'], format='%Y') + \
pd.to_timedelta(data['doy'] - 1, unit='d') + \
pd.to_timedelta(data['hour'], unit='h') + \
pd.to_timedelta(data['minute'], unit='m') + \
pd.to_timedelta(data['second'], unit='s')
data = data.drop(['year', 'doy', 'hour', 'minute', 'second'], axis=1)
data = data.set_index('Time', drop=False)
return data
示例4: get_ghcnd_stn_metadata
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_fwf [as 别名]
def get_ghcnd_stn_metadata(fname=None, download=False):
"""
Get the ghcnd station metadata from ghcnd-stations.txt.
China station start with "CHM000...", like "CHM00054511"
Args:
fname (string, optional): You can specify the station metadata file.
Defaults to download the file from website.
Returns:
[type]: [description]
Examples:
>>> stnmd = get_ghcnd_stn_metadata()
"""
if fname == None:
fname = get_cache_file("pub/data/ghcn/daily/", "ghcnd-stations.txt", name="GHCN")
if not fname.is_file() or download:
url = 'https://www1.ncdc.noaa.gov/pub/data/ghcn/daily/ghcnd-stations.txt'
urllib.request.urlretrieve(url, fname)
md = pd.read_fwf(fname, colspecs=[(0,12), (12,21), (21,31), (31,38), (38,69)],
names=['station','lat','lon','elev','name'])
return md
示例5: read_fwf
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_fwf [as 别名]
def read_fwf(
cls, filepath_or_buffer, colspecs="infer", widths=None, infer_nrows=100, **kwds
):
ErrorMessage.default_to_pandas("`read_fwf`")
pd_obj = pandas.read_fwf(
filepath_or_buffer,
colspecs=colspecs,
widths=widths,
infer_nrows=infer_nrows,
**kwds,
)
if isinstance(pd_obj, pandas.DataFrame):
return cls.from_pandas(pd_obj)
if isinstance(pd_obj, pandas.io.parsers.TextFileReader):
# Overwriting the read method should return a Modin DataFrame for calls
# to __next__ and get_chunk
pd_read = pd_obj.read
pd_obj.read = lambda *args, **kwargs: cls.from_pandas(
pd_read(*args, **kwargs)
)
return pd_obj
示例6: test_fwf_file_usecols
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_fwf [as 别名]
def test_fwf_file_usecols(usecols):
fwf_data = """a b c d
id8141 360.242940 149.910199 11950.7
id1594 444.953632 166.985655 11788.4
id1849 364.136849 183.628767 11806.2
id1230 413.836124 184.375703 11916.8
id1948 502.953953 173.237159 12468.3"""
setup_fwf_file(overwrite=True, fwf_data=fwf_data)
pandas_df = pandas.read_fwf(TEST_FWF_FILENAME, usecols=usecols)
modin_df = pd.read_fwf(TEST_FWF_FILENAME, usecols=usecols)
df_equals(modin_df, pandas_df)
teardown_fwf_file()
示例7: test_fwf_file_parse_dates
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_fwf [as 别名]
def test_fwf_file_parse_dates():
dates = pandas.date_range("2000", freq="h", periods=10)
fwf_data = "col1 col2 col3 col4"
for i in range(10, 20):
fwf_data = fwf_data + "\n{col1} {col2} {col3} {col4}".format(
col1=str(i),
col2=str(dates[i - 10].date()),
col3=str(i),
col4=str(dates[i - 10].time()),
)
setup_fwf_file(overwrite=True, fwf_data=fwf_data)
pandas_df = pandas.read_fwf(TEST_FWF_FILENAME, parse_dates=[["col2", "col4"]])
modin_df = pd.read_fwf(TEST_FWF_FILENAME, parse_dates=[["col2", "col4"]])
df_equals(modin_df, pandas_df)
pandas_df = pandas.read_fwf(
TEST_FWF_FILENAME, parse_dates={"time": ["col2", "col4"]}
)
modin_df = pd.read_fwf(TEST_FWF_FILENAME, parse_dates={"time": ["col2", "col4"]})
df_equals(modin_df, pandas_df)
teardown_fwf_file()
示例8: ReadFemResp1995
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_fwf [as 别名]
def ReadFemResp1995():
"""Reads respondent data from NSFG Cycle 5.
returns: DataFrame
"""
dat_file = '1995FemRespData.dat.gz'
names = ['cmintvw', 'timesmar', 'cmmarrhx', 'cmbirth', 'finalwgt']
colspecs = [(12360-1, 12363),
(4637-1, 4638),
(11759-1, 11762),
(14-1, 16),
(12350-1, 12359)]
df = pd.read_fwf(dat_file,
compression='gzip',
colspecs=colspecs,
names=names)
df.timesmar.replace([98, 99], np.nan, inplace=True)
df['evrmarry'] = (df.timesmar > 0)
CleanData(df)
return df
示例9: ps
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_fwf [as 别名]
def ps(self, args=None, options='', all=True, verbose=True,
as_frame='auto', raise_on_error=True):
if args is None:
args = ''
if all:
args += 'A'
if verbose:
args += 'f'
if len(args) > 0 and args[0] != '-':
args = '-' + args
results = self.wait(('ps %s %s' % (args, options)).strip(),
raise_on_error=raise_on_error)
if as_frame == 'auto':
as_frame = has_pandas
if as_frame:
if not has_pandas:
raise ImportError("Unable to import pandas")
df = pd.read_fwf(StringIO(results))
cmd_loc = df.columns.get_loc('CMD')
if cmd_loc < len(df.columns):
col = cmd_loc.fillna('')
for i in range(cmd_loc + 1, len(df.columns)):
col = col + df.icol(i).fillna('')
df['CMD'] = col
return df
return results
示例10: test_BytesIO_input
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_fwf [as 别名]
def test_BytesIO_input(self):
if not compat.PY3:
pytest.skip(
"Bytes-related test - only needs to work on Python 3")
result = read_fwf(BytesIO("שלום\nשלום".encode('utf8')), widths=[
2, 2], encoding='utf8')
expected = DataFrame([["של", "ום"]], columns=["של", "ום"])
tm.assert_frame_equal(result, expected)
示例11: test_fwf_colspecs_is_list_or_tuple_of_two_element_tuples
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_fwf [as 别名]
def test_fwf_colspecs_is_list_or_tuple_of_two_element_tuples(self):
data = """index,A,B,C,D
foo,2,3,4,5
bar,7,8,9,10
baz,12,13,14,15
qux,12,13,14,15
foo2,12,13,14,15
bar2,12,13,14,15
"""
with tm.assert_raises_regex(TypeError,
'Each column specification '
'must be.+'):
read_fwf(StringIO(data), [('a', 1)])
示例12: test_fwf_regression
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_fwf [as 别名]
def test_fwf_regression(self):
# GH 3594
# turns out 'T060' is parsable as a datetime slice!
tzlist = [1, 10, 20, 30, 60, 80, 100]
ntz = len(tzlist)
tcolspecs = [16] + [8] * ntz
tcolnames = ['SST'] + ["T%03d" % z for z in tzlist[1:]]
data = """ 2009164202000 9.5403 9.4105 8.6571 7.8372 6.0612 5.8843 5.5192
2009164203000 9.5435 9.2010 8.6167 7.8176 6.0804 5.8728 5.4869
2009164204000 9.5873 9.1326 8.4694 7.5889 6.0422 5.8526 5.4657
2009164205000 9.5810 9.0896 8.4009 7.4652 6.0322 5.8189 5.4379
2009164210000 9.6034 9.0897 8.3822 7.4905 6.0908 5.7904 5.4039
"""
df = read_fwf(StringIO(data),
index_col=0,
header=None,
names=tcolnames,
widths=tcolspecs,
parse_dates=True,
date_parser=lambda s: datetime.strptime(s, '%Y%j%H%M%S'))
for c in df.columns:
res = df.loc[:, c]
assert len(res)
示例13: test_comment_fwf
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_fwf [as 别名]
def test_comment_fwf(self):
data = """
1 2. 4 #hello world
5 NaN 10.0
"""
expected = np.array([[1, 2., 4],
[5, np.nan, 10.]])
df = read_fwf(StringIO(data), colspecs=[(0, 3), (4, 9), (9, 25)],
comment='#')
tm.assert_almost_equal(df.values, expected)
示例14: test_1000_fwf
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_fwf [as 别名]
def test_1000_fwf(self):
data = """
1 2,334.0 5
10 13 10.
"""
expected = np.array([[1, 2334., 5],
[10, 13, 10]])
df = read_fwf(StringIO(data), colspecs=[(0, 3), (3, 11), (12, 16)],
thousands=',')
tm.assert_almost_equal(df.values, expected)
示例15: test_bool_header_arg
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_fwf [as 别名]
def test_bool_header_arg(self):
# see gh-6114
data = """\
MyColumn
a
b
a
b"""
for arg in [True, False]:
with pytest.raises(TypeError):
read_fwf(StringIO(data), header=arg)