本文整理汇总了Python中pandas.io.parsers.TextParser方法的典型用法代码示例。如果您正苦于以下问题:Python parsers.TextParser方法的具体用法?Python parsers.TextParser怎么用?Python parsers.TextParser使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas.io.parsers
的用法示例。
在下文中一共展示了parsers.TextParser方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_reader_list
# 需要导入模块: from pandas.io import parsers [as 别名]
# 或者: from pandas.io.parsers import TextParser [as 别名]
def test_reader_list(all_parsers):
data = """index,A,B,C,D
foo,2,3,4,5
bar,7,8,9,10
baz,12,13,14,15
qux,12,13,14,15
foo2,12,13,14,15
bar2,12,13,14,15
"""
parser = all_parsers
kwargs = dict(index_col=0)
lines = list(csv.reader(StringIO(data)))
reader = TextParser(lines, chunksize=2, **kwargs)
expected = parser.read_csv(StringIO(data), **kwargs)
chunks = list(reader)
tm.assert_frame_equal(chunks[0], expected[:2])
tm.assert_frame_equal(chunks[1], expected[2:4])
tm.assert_frame_equal(chunks[2], expected[4:])
示例2: test_reader_list_skiprows
# 需要导入模块: from pandas.io import parsers [as 别名]
# 或者: from pandas.io.parsers import TextParser [as 别名]
def test_reader_list_skiprows(all_parsers):
data = """index,A,B,C,D
foo,2,3,4,5
bar,7,8,9,10
baz,12,13,14,15
qux,12,13,14,15
foo2,12,13,14,15
bar2,12,13,14,15
"""
parser = all_parsers
kwargs = dict(index_col=0)
lines = list(csv.reader(StringIO(data)))
reader = TextParser(lines, chunksize=2, skiprows=[1], **kwargs)
expected = parser.read_csv(StringIO(data), **kwargs)
chunks = list(reader)
tm.assert_frame_equal(chunks[0], expected[1:3])
示例3: _data_to_frame
# 需要导入模块: from pandas.io import parsers [as 别名]
# 或者: from pandas.io.parsers import TextParser [as 别名]
def _data_to_frame(**kwargs):
head, body, foot = kwargs.pop('data')
header = kwargs.pop('header')
kwargs['skiprows'] = _get_skiprows(kwargs['skiprows'])
if head:
body = head + body
# Infer header when there is a <thead> or top <th>-only rows
if header is None:
if len(head) == 1:
header = 0
else:
# ignore all-empty-text rows
header = [i for i, row in enumerate(head)
if any(text for text in row)]
if foot:
body += foot
# fill out elements of body that are "ragged"
_expand_elements(body)
tp = TextParser(body, header=header, **kwargs)
df = tp.read()
return df
示例4: _data_to_frame
# 需要导入模块: from pandas.io import parsers [as 别名]
# 或者: from pandas.io.parsers import TextParser [as 别名]
def _data_to_frame(**kwargs):
head, body, foot = kwargs.pop('data')
header = kwargs.pop('header')
kwargs['skiprows'] = _get_skiprows(kwargs['skiprows'])
if head:
rows = lrange(len(head))
body = head + body
if header is None: # special case when a table has <th> elements
header = 0 if rows == [0] else rows
if foot:
body += [foot]
# fill out elements of body that are "ragged"
_expand_elements(body)
tp = TextParser(body, header=header, **kwargs)
df = tp.read()
return df
示例5: _data_to_frame
# 需要导入模块: from pandas.io import parsers [as 别名]
# 或者: from pandas.io.parsers import TextParser [as 别名]
def _data_to_frame(data, header, index_col, skiprows, infer_types,
parse_dates, tupleize_cols, thousands):
head, body, _ = data # _ is footer which is rarely used: ignore for now
if head:
body = [head] + body
if header is None: # special case when a table has <th> elements
header = 0
# fill out elements of body that are "ragged"
_expand_elements(body)
tp = TextParser(body, header=header, index_col=index_col,
skiprows=_get_skiprows(skiprows),
parse_dates=parse_dates, tupleize_cols=tupleize_cols,
thousands=thousands)
df = tp.read()
if infer_types: # TODO: rm this code so infer_types has no effect in 0.14
df = df.convert_objects(convert_dates='coerce')
else:
df = df.applymap(text_type)
return df
示例6: get_as_dataframe
# 需要导入模块: from pandas.io import parsers [as 别名]
# 或者: from pandas.io.parsers import TextParser [as 别名]
def get_as_dataframe(worksheet,
evaluate_formulas=False,
**options):
r"""
Returns the worksheet contents as a DataFrame.
:param worksheet: the worksheet.
:param evaluate_formulas: if True, get the value of a cell after
formula evaluation; otherwise get the formula itself if present.
Defaults to False.
:param \*\*options: all the options for pandas.io.parsers.TextParser,
according to the version of pandas that is installed.
(Note: TextParser supports only the default 'python' parser engine,
not the C engine.)
:returns: pandas.DataFrame
"""
all_values = _get_all_values(worksheet, evaluate_formulas)
return TextParser(all_values, **options).read(options.get('nrows', None))
示例7: test_read_data_list
# 需要导入模块: from pandas.io import parsers [as 别名]
# 或者: from pandas.io.parsers import TextParser [as 别名]
def test_read_data_list(all_parsers):
parser = all_parsers
kwargs = dict(index_col=0)
data = "A,B,C\nfoo,1,2,3\nbar,4,5,6"
data_list = [["A", "B", "C"], ["foo", "1", "2", "3"],
["bar", "4", "5", "6"]]
expected = parser.read_csv(StringIO(data), **kwargs)
parser = TextParser(data_list, chunksize=2, **kwargs)
result = parser.read()
tm.assert_frame_equal(result, expected)
示例8: test_read_text_list
# 需要导入模块: from pandas.io import parsers [as 别名]
# 或者: from pandas.io.parsers import TextParser [as 别名]
def test_read_text_list(self):
data = """A,B,C\nfoo,1,2,3\nbar,4,5,6"""
as_list = [['A', 'B', 'C'], ['foo', '1', '2', '3'], ['bar',
'4', '5', '6']]
df = self.read_csv(StringIO(data), index_col=0)
parser = TextParser(as_list, index_col=0, chunksize=2)
chunk = parser.read(None)
tm.assert_frame_equal(chunk, df)
示例9: _parse_options_data
# 需要导入模块: from pandas.io import parsers [as 别名]
# 或者: from pandas.io.parsers import TextParser [as 别名]
def _parse_options_data(table):
rows = table.xpath('.//tr')
header = _unpack(rows[0], kind='th')
data = [_unpack(row, kind='td') for row in rows[1:]]
# Use ',' as a thousands separator as we're pulling from the US site.
return TextParser(data, names=header, na_values=['N/A'],
thousands=',').get_chunk()
示例10: test_iterator
# 需要导入模块: from pandas.io import parsers [as 别名]
# 或者: from pandas.io.parsers import TextParser [as 别名]
def test_iterator(self):
# See gh-6607
reader = self.read_csv(StringIO(self.data1), index_col=0,
iterator=True)
df = self.read_csv(StringIO(self.data1), index_col=0)
chunk = reader.read(3)
tm.assert_frame_equal(chunk, df[:3])
last_chunk = reader.read(5)
tm.assert_frame_equal(last_chunk, df[3:])
# pass list
lines = list(csv.reader(StringIO(self.data1)))
parser = TextParser(lines, index_col=0, chunksize=2)
df = self.read_csv(StringIO(self.data1), index_col=0)
chunks = list(parser)
tm.assert_frame_equal(chunks[0], df[:2])
tm.assert_frame_equal(chunks[1], df[2:4])
tm.assert_frame_equal(chunks[2], df[4:])
# pass skiprows
parser = TextParser(lines, index_col=0, chunksize=2, skiprows=[1])
chunks = list(parser)
tm.assert_frame_equal(chunks[0], df[1:3])
treader = self.read_table(StringIO(self.data1), sep=',', index_col=0,
iterator=True)
assert isinstance(treader, TextFileReader)
# gh-3967: stopping iteration when chunksize is specified
data = """A,B,C
foo,1,2,3
bar,4,5,6
baz,7,8,9
"""
reader = self.read_csv(StringIO(data), iterator=True)
result = list(reader)
expected = DataFrame(dict(A=[1, 4, 7], B=[2, 5, 8], C=[
3, 6, 9]), index=['foo', 'bar', 'baz'])
tm.assert_frame_equal(result[0], expected)
# chunksize = 1
reader = self.read_csv(StringIO(data), chunksize=1)
result = list(reader)
expected = DataFrame(dict(A=[1, 4, 7], B=[2, 5, 8], C=[
3, 6, 9]), index=['foo', 'bar', 'baz'])
assert len(result) == 3
tm.assert_frame_equal(pd.concat(result), expected)
# skipfooter is not supported with the C parser yet
if self.engine == 'python':
# test bad parameter (skipfooter)
reader = self.read_csv(StringIO(self.data1), index_col=0,
iterator=True, skipfooter=1)
pytest.raises(ValueError, reader.read, 3)