本文整理汇总了Python中pandas.io.parsers.TextParser.read方法的典型用法代码示例。如果您正苦于以下问题:Python TextParser.read方法的具体用法?Python TextParser.read怎么用?Python TextParser.read使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas.io.parsers.TextParser
的用法示例。
在下文中一共展示了TextParser.read方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _data_to_frame
# 需要导入模块: from pandas.io.parsers import TextParser [as 别名]
# 或者: from pandas.io.parsers.TextParser import read [as 别名]
def _data_to_frame(data, header, index_col, skiprows, infer_types, parse_dates, tupleize_cols, thousands):
head, body, foot = data
if head:
body = [head] + body
if header is None: # special case when a table has <th> elements
header = 0
if foot:
body += [foot]
# fill out elements of body that are "ragged"
_expand_elements(body)
tp = TextParser(
body,
header=header,
index_col=index_col,
skiprows=_get_skiprows(skiprows),
parse_dates=parse_dates,
tupleize_cols=tupleize_cols,
thousands=thousands,
)
df = tp.read()
return df
示例2: test_read_text_list
# 需要导入模块: from pandas.io.parsers import TextParser [as 别名]
# 或者: from pandas.io.parsers.TextParser import read [as 别名]
def test_read_text_list(self):
data = """A,B,C\nfoo,1,2,3\nbar,4,5,6"""
as_list = [["A", "B", "C"], ["foo", "1", "2", "3"], ["bar", "4", "5", "6"]]
df = self.read_csv(StringIO(data), index_col=0)
parser = TextParser(as_list, index_col=0, chunksize=2)
chunk = parser.read(None)
tm.assert_frame_equal(chunk, df)
示例3: test_read_text_list
# 需要导入模块: from pandas.io.parsers import TextParser [as 别名]
# 或者: from pandas.io.parsers.TextParser import read [as 别名]
def test_read_text_list(self):
data = """A,B,C\nfoo,1,2,3\nbar,4,5,6"""
as_list = [['A', 'B', 'C'], ['foo', '1', '2', '3'], ['bar',
'4', '5', '6']]
df = self.read_csv(StringIO(data), index_col=0)
parser = TextParser(as_list, index_col=0, chunksize=2)
chunk = parser.read(None)
tm.assert_frame_equal(chunk, df)
示例4: _parse_excel
# 需要导入模块: from pandas.io.parsers import TextParser [as 别名]
# 或者: from pandas.io.parsers.TextParser import read [as 别名]
def _parse_excel(self, sheetname, header=0, skiprows=None, skip_footer=0,
index_col=None, has_index_names=None, parse_cols=None,
parse_dates=False, date_parser=None, na_values=None,
thousands=None, chunksize=None, **kwds):
from xlrd import (xldate_as_tuple, XL_CELL_DATE,
XL_CELL_ERROR, XL_CELL_BOOLEAN)
datemode = self.book.datemode
if isinstance(sheetname, compat.string_types):
sheet = self.book.sheet_by_name(sheetname)
else: # assume an integer if not a string
sheet = self.book.sheet_by_index(sheetname)
data = []
should_parse = {}
for i in range(sheet.nrows):
row = []
for j, (value, typ) in enumerate(zip(sheet.row_values(i),
sheet.row_types(i))):
if parse_cols is not None and j not in should_parse:
should_parse[j] = self._should_parse(j, parse_cols)
if parse_cols is None or should_parse[j]:
if typ == XL_CELL_DATE:
dt = xldate_as_tuple(value, datemode)
# how to produce this first case?
if dt[0] < datetime.MINYEAR: # pragma: no cover
value = datetime.time(*dt[3:])
else:
value = datetime.datetime(*dt)
elif typ == XL_CELL_ERROR:
value = np.nan
elif typ == XL_CELL_BOOLEAN:
value = bool(value)
row.append(value)
data.append(row)
if header is not None:
data[header] = _trim_excel_header(data[header])
parser = TextParser(data, header=header, index_col=index_col,
has_index_names=has_index_names,
na_values=na_values,
thousands=thousands,
parse_dates=parse_dates,
date_parser=date_parser,
skiprows=skiprows,
skip_footer=skip_footer,
chunksize=chunksize,
**kwds)
return parser.read()
示例5: _data_to_frame
# 需要导入模块: from pandas.io.parsers import TextParser [as 别名]
# 或者: from pandas.io.parsers.TextParser import read [as 别名]
def _data_to_frame(**kwargs):
head, body, foot = kwargs.pop('data')
header = kwargs.pop('header')
kwargs['skiprows'] = _get_skiprows(kwargs['skiprows'])
if head:
rows = lrange(len(head))
body = head + body
if header is None: # special case when a table has <th> elements
header = 0 if rows == [0] else rows
if foot:
body += [foot]
# fill out elements of body that are "ragged"
_expand_elements(body)
tp = TextParser(body, header=header, **kwargs)
df = tp.read()
return df
示例6: _data_to_frame
# 需要导入模块: from pandas.io.parsers import TextParser [as 别名]
# 或者: from pandas.io.parsers.TextParser import read [as 别名]
def _data_to_frame(data, header, index_col, skiprows, infer_types,
parse_dates, tupleize_cols, thousands):
head, body, _ = data # _ is footer which is rarely used: ignore for now
if head:
body = [head] + body
if header is None: # special case when a table has <th> elements
header = 0
# fill out elements of body that are "ragged"
_expand_elements(body)
tp = TextParser(body, header=header, index_col=index_col,
skiprows=_get_skiprows(skiprows),
parse_dates=parse_dates, tupleize_cols=tupleize_cols,
thousands=thousands)
df = tp.read()
return df
示例7: _data_to_frame
# 需要导入模块: from pandas.io.parsers import TextParser [as 别名]
# 或者: from pandas.io.parsers.TextParser import read [as 别名]
def _data_to_frame(**kwargs):
head, body, foot = kwargs.pop('data')
header = kwargs.pop('header')
kwargs['skiprows'] = _get_skiprows(kwargs['skiprows'])
if head:
body = head + body
# Infer header when there is a <thead> or top <th>-only rows
if header is None:
if len(head) == 1:
header = 0
else:
# ignore all-empty-text rows
header = [i for i, row in enumerate(head)
if any(text for text in row)]
if foot:
body += foot
# fill out elements of body that are "ragged"
_expand_elements(body)
tp = TextParser(body, header=header, **kwargs)
df = tp.read()
return df
示例8: _data_to_frame
# 需要导入模块: from pandas.io.parsers import TextParser [as 别名]
# 或者: from pandas.io.parsers.TextParser import read [as 别名]
def _data_to_frame(data, header, index_col, skiprows, infer_types,
parse_dates, tupleize_cols, thousands):
head, body, _ = data # _ is footer which is rarely used: ignore for now
if head:
body = [head] + body
if header is None: # special case when a table has <th> elements
header = 0
# fill out elements of body that are "ragged"
_expand_elements(body)
tp = TextParser(body, header=header, index_col=index_col,
skiprows=_get_skiprows(skiprows),
parse_dates=parse_dates, tupleize_cols=tupleize_cols,
thousands=thousands)
df = tp.read()
if infer_types: # TODO: rm this code so infer_types has no effect in 0.14
df = df.convert_objects(convert_dates='coerce')
else:
df = df.applymap(text_type)
return df
示例9: _parse_excel
# 需要导入模块: from pandas.io.parsers import TextParser [as 别名]
# 或者: from pandas.io.parsers.TextParser import read [as 别名]
def _parse_excel(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
index_col=None, has_index_names=None, parse_cols=None,
parse_dates=False, date_parser=None, na_values=None,
thousands=None, chunksize=None, convert_float=True,
**kwds):
import xlrd
from xlrd import (xldate, XL_CELL_DATE,
XL_CELL_ERROR, XL_CELL_BOOLEAN,
XL_CELL_NUMBER)
epoch1904 = self.book.datemode
# xlrd >= 0.9.3 can return datetime objects directly.
if LooseVersion(xlrd.__VERSION__) >= LooseVersion("0.9.3"):
xlrd_0_9_3 = True
else:
xlrd_0_9_3 = False
if isinstance(sheetname, compat.string_types):
sheet = self.book.sheet_by_name(sheetname)
else: # assume an integer if not a string
sheet = self.book.sheet_by_index(sheetname)
data = []
should_parse = {}
for i in range(sheet.nrows):
row = []
for j, (value, typ) in enumerate(zip(sheet.row_values(i),
sheet.row_types(i))):
if parse_cols is not None and j not in should_parse:
should_parse[j] = self._should_parse(j, parse_cols)
if parse_cols is None or should_parse[j]:
if typ == XL_CELL_DATE:
if xlrd_0_9_3:
# Use the newer xlrd datetime handling.
value = xldate.xldate_as_datetime(value, epoch1904)
# Excel doesn't distinguish between dates and time,
# so we treat dates on the epoch as times only.
# Also, Excel supports 1900 and 1904 epochs.
year = (value.timetuple())[0:3]
if ((not epoch1904 and year == (1899, 12, 31))
or (epoch1904 and year == (1904, 1, 1))):
value = datetime.time(value.hour,
value.minute,
value.second,
value.microsecond)
else:
# Use the xlrd <= 0.9.2 date handling.
dt = xldate.xldate_as_tuple(value, epoch1904)
if dt[0] < datetime.MINYEAR:
value = datetime.time(*dt[3:])
else:
value = datetime.datetime(*dt)
elif typ == XL_CELL_ERROR:
value = np.nan
elif typ == XL_CELL_BOOLEAN:
value = bool(value)
elif convert_float and typ == XL_CELL_NUMBER:
# GH5394 - Excel 'numbers' are always floats
# it's a minimal perf hit and less suprising
val = int(value)
if val == value:
value = val
row.append(value)
data.append(row)
if header is not None:
data[header] = _trim_excel_header(data[header])
parser = TextParser(data, header=header, index_col=index_col,
has_index_names=has_index_names,
na_values=na_values,
thousands=thousands,
parse_dates=parse_dates,
date_parser=date_parser,
skiprows=skiprows,
skip_footer=skip_footer,
chunksize=chunksize,
**kwds)
return parser.read()
示例10: _parse_excel
# 需要导入模块: from pandas.io.parsers import TextParser [as 别名]
# 或者: from pandas.io.parsers.TextParser import read [as 别名]
#.........这里部分代码省略.........
cell_contents = xldate.xldate_as_datetime(cell_contents,
epoch1904)
# Excel doesn't distinguish between dates and time,
# so we treat dates on the epoch as times only.
# Also, Excel supports 1900 and 1904 epochs.
year = (cell_contents.timetuple())[0:3]
if ((not epoch1904 and year == (1899, 12, 31))
or (epoch1904 and year == (1904, 1, 1))):
cell_contents = datetime.time(cell_contents.hour,
cell_contents.minute,
cell_contents.second,
cell_contents.microsecond)
else:
# Use the xlrd <= 0.9.2 date handling.
dt = xldate.xldate_as_tuple(cell_contents, epoch1904)
if dt[0] < datetime.MINYEAR:
cell_contents = datetime.time(*dt[3:])
else:
cell_contents = datetime.datetime(*dt)
elif cell_typ == XL_CELL_ERROR:
cell_contents = np.nan
elif cell_typ == XL_CELL_BOOLEAN:
cell_contents = bool(cell_contents)
elif convert_float and cell_typ == XL_CELL_NUMBER:
# GH5394 - Excel 'numbers' are always floats
# it's a minimal perf hit and less suprising
val = int(cell_contents)
if val == cell_contents:
cell_contents = val
return cell_contents
# xlrd >= 0.9.3 can return datetime objects directly.
if LooseVersion(xlrd.__VERSION__) >= LooseVersion("0.9.3"):
xlrd_0_9_3 = True
else:
xlrd_0_9_3 = False
ret_dict = False
#Keep sheetname to maintain backwards compatibility.
if isinstance(sheetname, list):
sheets = sheetname
ret_dict = True
elif sheetname is None:
sheets = self.sheet_names
ret_dict = True
else:
sheets = [sheetname]
#handle same-type duplicates.
sheets = list(set(sheets))
output = {}
for asheetname in sheets:
if verbose:
print("Reading sheet %s" % asheetname)
if isinstance(asheetname, compat.string_types):
sheet = self.book.sheet_by_name(asheetname)
else: # assume an integer if not a string
sheet = self.book.sheet_by_index(asheetname)
data = []
should_parse = {}
for i in range(sheet.nrows):
row = []
for j, (value, typ) in enumerate(zip(sheet.row_values(i),
sheet.row_types(i))):
if parse_cols is not None and j not in should_parse:
should_parse[j] = self._should_parse(j, parse_cols)
if parse_cols is None or should_parse[j]:
row.append(_parse_cell(value,typ))
data.append(row)
if header is not None:
data[header] = _trim_excel_header(data[header])
parser = TextParser(data, header=header, index_col=index_col,
has_index_names=has_index_names,
na_values=na_values,
thousands=thousands,
parse_dates=parse_dates,
date_parser=date_parser,
skiprows=skiprows,
skip_footer=skip_footer,
chunksize=chunksize,
**kwds)
output[asheetname] = parser.read()
if ret_dict:
return output
else:
return output[asheetname]
示例11: parse
# 需要导入模块: from pandas.io.parsers import TextParser [as 别名]
# 或者: from pandas.io.parsers.TextParser import read [as 别名]
#.........这里部分代码省略.........
output = OrderedDict()
for asheetname in sheets:
if verbose:
print("Reading sheet {sheet}".format(sheet=asheetname))
if isinstance(asheetname, compat.string_types):
sheet = self.get_sheet_by_name(asheetname)
else: # assume an integer if not a string
sheet = self.get_sheet_by_index(asheetname)
data = self.get_sheet_data(sheet, convert_float)
usecols = _maybe_convert_usecols(usecols)
if sheet.nrows == 0:
output[asheetname] = DataFrame()
continue
if is_list_like(header) and len(header) == 1:
header = header[0]
# forward fill and pull out names for MultiIndex column
header_names = None
if header is not None and is_list_like(header):
header_names = []
control_row = [True] * len(data[0])
for row in header:
if is_integer(skiprows):
row += skiprows
data[row], control_row = _fill_mi_header(data[row],
control_row)
if index_col is not None:
header_name, _ = _pop_header_name(data[row], index_col)
header_names.append(header_name)
if is_list_like(index_col):
# Forward fill values for MultiIndex index.
if not is_list_like(header):
offset = 1 + header
else:
offset = 1 + max(header)
# Check if we have an empty dataset
# before trying to collect data.
if offset < len(data):
for col in index_col:
last = data[offset][col]
for row in range(offset + 1, len(data)):
if data[row][col] == '' or data[row][col] is None:
data[row][col] = last
else:
last = data[row][col]
has_index_names = is_list_like(header) and len(header) > 1
# GH 12292 : error when read one empty column from excel file
try:
parser = TextParser(data,
names=names,
header=header,
index_col=index_col,
has_index_names=has_index_names,
squeeze=squeeze,
dtype=dtype,
true_values=true_values,
false_values=false_values,
skiprows=skiprows,
nrows=nrows,
na_values=na_values,
parse_dates=parse_dates,
date_parser=date_parser,
thousands=thousands,
comment=comment,
skipfooter=skipfooter,
usecols=usecols,
mangle_dupe_cols=mangle_dupe_cols,
**kwds)
output[asheetname] = parser.read(nrows=nrows)
if not squeeze or isinstance(output[asheetname], DataFrame):
if header_names:
output[asheetname].columns = output[
asheetname].columns.set_names(header_names)
elif compat.PY2:
output[asheetname].columns = _maybe_convert_to_string(
output[asheetname].columns)
except EmptyDataError:
# No Data, return an empty DataFrame
output[asheetname] = DataFrame()
if ret_dict:
return output
else:
return output[asheetname]